From f4661d293eb2d01dfc742982761a36fafe456d46 Mon Sep 17 00:00:00 2001
From: Jacek Tomaka <jacek.tomaka@poczta.fm>
Date: Sat, 25 Aug 2018 11:50:39 +0800
Subject: [PATCH 0001/1890] x86/microcode: Make revision and processor flags
 world-readable

The microcode revision is already readable for non-root users via
/proc/cpuinfo. Thus, there's no reason to keep the same information
readable by root only in /sys/devices/system/cpu/cpuX/microcode/.

Make .../processor_flags world-readable too, while at it.

Reported-by: Tim Burgess <timb@dug.com>
Signed-off-by: Jacek Tomaka <jacek.tomaka@poczta.fm>
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20180825035039.14409-1-jacekt@dugeo.com
---
 arch/x86/kernel/cpu/microcode/core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index b9bc8a1a584e3..2637ff09d6a0d 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -666,8 +666,8 @@ static ssize_t pf_show(struct device *dev,
 }
 
 static DEVICE_ATTR_WO(reload);
-static DEVICE_ATTR(version, 0400, version_show, NULL);
-static DEVICE_ATTR(processor_flags, 0400, pf_show, NULL);
+static DEVICE_ATTR(version, 0444, version_show, NULL);
+static DEVICE_ATTR(processor_flags, 0444, pf_show, NULL);
 
 static struct attribute *mc_default_attrs[] = {
 	&dev_attr_version.attr,
-- 
GitLab


From afaef01c001537fa97a25092d7f54d764dc7d8c1 Mon Sep 17 00:00:00 2001
From: Alexander Popov <alex.popov@linux.com>
Date: Fri, 17 Aug 2018 01:16:58 +0300
Subject: [PATCH 0002/1890] x86/entry: Add STACKLEAK erasing the kernel stack
 at the end of syscalls

The STACKLEAK feature (initially developed by PaX Team) has the following
benefits:

1. Reduces the information that can be revealed through kernel stack leak
   bugs. The idea of erasing the thread stack at the end of syscalls is
   similar to CONFIG_PAGE_POISONING and memzero_explicit() in kernel
   crypto, which all comply with FDP_RIP.2 (Full Residual Information
   Protection) of the Common Criteria standard.

2. Blocks some uninitialized stack variable attacks (e.g. CVE-2017-17712,
   CVE-2010-2963). That kind of bugs should be killed by improving C
   compilers in future, which might take a long time.

This commit introduces the code filling the used part of the kernel
stack with a poison value before returning to userspace. Full
STACKLEAK feature also contains the gcc plugin which comes in a
separate commit.

The STACKLEAK feature is ported from grsecurity/PaX. More information at:
  https://grsecurity.net/
  https://pax.grsecurity.net/

This code is modified from Brad Spengler/PaX Team's code in the last
public patch of grsecurity/PaX based on our understanding of the code.
Changes or omissions from the original code are ours and don't reflect
the original grsecurity/PaX code.

Performance impact:

Hardware: Intel Core i7-4770, 16 GB RAM

Test #1: building the Linux kernel on a single core
        0.91% slowdown

Test #2: hackbench -s 4096 -l 2000 -g 15 -f 25 -P
        4.2% slowdown

So the STACKLEAK description in Kconfig includes: "The tradeoff is the
performance impact: on a single CPU system kernel compilation sees a 1%
slowdown, other systems and workloads may vary and you are advised to
test this feature on your expected workload before deploying it".

Signed-off-by: Alexander Popov <alex.popov@linux.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Dave Hansen <dave.hansen@linux.intel.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 Documentation/x86/x86_64/mm.txt  |  2 ++
 arch/Kconfig                     |  7 ++++
 arch/x86/Kconfig                 |  1 +
 arch/x86/entry/calling.h         | 14 ++++++++
 arch/x86/entry/entry_32.S        |  7 ++++
 arch/x86/entry/entry_64.S        |  3 ++
 arch/x86/entry/entry_64_compat.S |  5 +++
 include/linux/sched.h            |  4 +++
 include/linux/stackleak.h        | 26 ++++++++++++++
 kernel/Makefile                  |  4 +++
 kernel/fork.c                    |  3 ++
 kernel/stackleak.c               | 62 ++++++++++++++++++++++++++++++++
 scripts/gcc-plugins/Kconfig      | 19 ++++++++++
 13 files changed, 157 insertions(+)
 create mode 100644 include/linux/stackleak.h
 create mode 100644 kernel/stackleak.c

diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
index 5432a96d31ffd..600bc2afa27d6 100644
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -24,6 +24,7 @@ ffffffffa0000000 - fffffffffeffffff (1520 MB) module mapping space
 [fixmap start]   - ffffffffff5fffff kernel-internal fixmap range
 ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
 ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
+STACKLEAK_POISON value in this last hole: ffffffffffff4111
 
 Virtual memory map with 5 level page tables:
 
@@ -50,6 +51,7 @@ ffffffffa0000000 - fffffffffeffffff (1520 MB) module mapping space
 [fixmap start]   - ffffffffff5fffff kernel-internal fixmap range
 ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
 ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
+STACKLEAK_POISON value in this last hole: ffffffffffff4111
 
 Architecture defines a 64-bit virtual address. Implementations can support
 less. Currently supported are 48- and 57-bit virtual addresses. Bits 63
diff --git a/arch/Kconfig b/arch/Kconfig
index 6801123932a50..ee79ff56faab9 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -419,6 +419,13 @@ config SECCOMP_FILTER
 
 	  See Documentation/userspace-api/seccomp_filter.rst for details.
 
+config HAVE_ARCH_STACKLEAK
+	bool
+	help
+	  An architecture should select this if it has the code which
+	  fills the used part of the kernel stack with the STACKLEAK_POISON
+	  value before returning from system calls.
+
 config HAVE_STACKPROTECTOR
 	bool
 	help
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 1a0be022f91d8..662cb2cc9630c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -127,6 +127,7 @@ config X86
 	select HAVE_ARCH_PREL32_RELOCATIONS
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_THREAD_STRUCT_WHITELIST
+	select HAVE_ARCH_STACKLEAK
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 352e70cd33e80..20d0885b00fbe 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -329,8 +329,22 @@ For 32-bit we have the following conventions - kernel is built with
 
 #endif
 
+.macro STACKLEAK_ERASE_NOCLOBBER
+#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
+	PUSH_AND_CLEAR_REGS
+	call stackleak_erase
+	POP_REGS
+#endif
+.endm
+
 #endif /* CONFIG_X86_64 */
 
+.macro STACKLEAK_ERASE
+#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
+	call stackleak_erase
+#endif
+.endm
+
 /*
  * This does 'call enter_from_user_mode' unless we can avoid it based on
  * kernel config or using the static jump infrastructure.
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 2767c625a52cf..dfb975b4c981f 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -46,6 +46,8 @@
 #include <asm/frame.h>
 #include <asm/nospec-branch.h>
 
+#include "calling.h"
+
 	.section .entry.text, "ax"
 
 /*
@@ -711,6 +713,7 @@ ENTRY(ret_from_fork)
 	/* When we fork, we trace the syscall return in the child, too. */
 	movl    %esp, %eax
 	call    syscall_return_slowpath
+	STACKLEAK_ERASE
 	jmp     restore_all
 
 	/* kernel thread */
@@ -885,6 +888,8 @@ ENTRY(entry_SYSENTER_32)
 	ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
 		    "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
 
+	STACKLEAK_ERASE
+
 /* Opportunistic SYSEXIT */
 	TRACE_IRQS_ON			/* User mode traces as IRQs on. */
 
@@ -996,6 +1001,8 @@ ENTRY(entry_INT80_32)
 	call	do_int80_syscall_32
 .Lsyscall_32_done:
 
+	STACKLEAK_ERASE
+
 restore_all:
 	TRACE_IRQS_IRET
 	SWITCH_TO_ENTRY_STACK
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 957dfb693eccd..a5dd280930202 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -329,6 +329,8 @@ syscall_return_via_sysret:
 	 * We are on the trampoline stack.  All regs except RDI are live.
 	 * We can do future final exit work right here.
 	 */
+	STACKLEAK_ERASE_NOCLOBBER
+
 	SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
 
 	popq	%rdi
@@ -688,6 +690,7 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
 	 * We are on the trampoline stack.  All regs except RDI are live.
 	 * We can do future final exit work right here.
 	 */
+	STACKLEAK_ERASE_NOCLOBBER
 
 	SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
 
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 7d0df78db7272..8eaf8952c408c 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -261,6 +261,11 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
 
 	/* Opportunistic SYSRET */
 sysret32_from_system_call:
+	/*
+	 * We are not going to return to userspace from the trampoline
+	 * stack. So let's erase the thread stack right now.
+	 */
+	STACKLEAK_ERASE
 	TRACE_IRQS_ON			/* User mode traces as IRQs on. */
 	movq	RBX(%rsp), %rbx		/* pt_regs->rbx */
 	movq	RBP(%rsp), %rbp		/* pt_regs->rbp */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 977cb57d7bc9e..c1a23acd24e72 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1192,6 +1192,10 @@ struct task_struct {
 	void				*security;
 #endif
 
+#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
+	unsigned long			lowest_stack;
+#endif
+
 	/*
 	 * New fields for task_struct should be added above here, so that
 	 * they are included in the randomized portion of task_struct.
diff --git a/include/linux/stackleak.h b/include/linux/stackleak.h
new file mode 100644
index 0000000000000..628c2b947b892
--- /dev/null
+++ b/include/linux/stackleak.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_STACKLEAK_H
+#define _LINUX_STACKLEAK_H
+
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+
+/*
+ * Check that the poison value points to the unused hole in the
+ * virtual memory map for your platform.
+ */
+#define STACKLEAK_POISON -0xBEEF
+#define STACKLEAK_SEARCH_DEPTH 128
+
+#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
+#include <asm/stacktrace.h>
+
+static inline void stackleak_task_init(struct task_struct *t)
+{
+	t->lowest_stack = (unsigned long)end_of_stack(t) + sizeof(unsigned long);
+}
+#else /* !CONFIG_GCC_PLUGIN_STACKLEAK */
+static inline void stackleak_task_init(struct task_struct *t) { }
+#endif
+
+#endif
diff --git a/kernel/Makefile b/kernel/Makefile
index 7a63d567fdb57..7343b3a9bff07 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -117,6 +117,10 @@ obj-$(CONFIG_HAS_IOMEM) += iomem.o
 obj-$(CONFIG_ZONE_DEVICE) += memremap.o
 obj-$(CONFIG_RSEQ) += rseq.o
 
+obj-$(CONFIG_GCC_PLUGIN_STACKLEAK) += stackleak.o
+KASAN_SANITIZE_stackleak.o := n
+KCOV_INSTRUMENT_stackleak.o := n
+
 $(obj)/configs.o: $(obj)/config_data.h
 
 targets += config_data.gz
diff --git a/kernel/fork.c b/kernel/fork.c
index d896e9ca38b0c..47911e49c2b1d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -91,6 +91,7 @@
 #include <linux/kcov.h>
 #include <linux/livepatch.h>
 #include <linux/thread_info.h>
+#include <linux/stackleak.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -1880,6 +1881,8 @@ static __latent_entropy struct task_struct *copy_process(
 	if (retval)
 		goto bad_fork_cleanup_io;
 
+	stackleak_task_init(p);
+
 	if (pid != &init_struct_pid) {
 		pid = alloc_pid(p->nsproxy->pid_ns_for_children);
 		if (IS_ERR(pid)) {
diff --git a/kernel/stackleak.c b/kernel/stackleak.c
new file mode 100644
index 0000000000000..deba0d8992f96
--- /dev/null
+++ b/kernel/stackleak.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This code fills the used part of the kernel stack with a poison value
+ * before returning to userspace. It's part of the STACKLEAK feature
+ * ported from grsecurity/PaX.
+ *
+ * Author: Alexander Popov <alex.popov@linux.com>
+ *
+ * STACKLEAK reduces the information which kernel stack leak bugs can
+ * reveal and blocks some uninitialized stack variable attacks.
+ */
+
+#include <linux/stackleak.h>
+
+asmlinkage void stackleak_erase(void)
+{
+	/* It would be nice not to have 'kstack_ptr' and 'boundary' on stack */
+	unsigned long kstack_ptr = current->lowest_stack;
+	unsigned long boundary = (unsigned long)end_of_stack(current);
+	unsigned int poison_count = 0;
+	const unsigned int depth = STACKLEAK_SEARCH_DEPTH / sizeof(unsigned long);
+
+	/* Check that 'lowest_stack' value is sane */
+	if (unlikely(kstack_ptr - boundary >= THREAD_SIZE))
+		kstack_ptr = boundary;
+
+	/* Search for the poison value in the kernel stack */
+	while (kstack_ptr > boundary && poison_count <= depth) {
+		if (*(unsigned long *)kstack_ptr == STACKLEAK_POISON)
+			poison_count++;
+		else
+			poison_count = 0;
+
+		kstack_ptr -= sizeof(unsigned long);
+	}
+
+	/*
+	 * One 'long int' at the bottom of the thread stack is reserved and
+	 * should not be poisoned (see CONFIG_SCHED_STACK_END_CHECK=y).
+	 */
+	if (kstack_ptr == boundary)
+		kstack_ptr += sizeof(unsigned long);
+
+	/*
+	 * Now write the poison value to the kernel stack. Start from
+	 * 'kstack_ptr' and move up till the new 'boundary'. We assume that
+	 * the stack pointer doesn't change when we write poison.
+	 */
+	if (on_thread_stack())
+		boundary = current_stack_pointer;
+	else
+		boundary = current_top_of_stack();
+
+	while (kstack_ptr < boundary) {
+		*(unsigned long *)kstack_ptr = STACKLEAK_POISON;
+		kstack_ptr += sizeof(unsigned long);
+	}
+
+	/* Reset the 'lowest_stack' value for the next syscall */
+	current->lowest_stack = current_top_of_stack() - THREAD_SIZE/64;
+}
+
diff --git a/scripts/gcc-plugins/Kconfig b/scripts/gcc-plugins/Kconfig
index cb0c889e13aa0..977b84e697871 100644
--- a/scripts/gcc-plugins/Kconfig
+++ b/scripts/gcc-plugins/Kconfig
@@ -139,4 +139,23 @@ config GCC_PLUGIN_RANDSTRUCT_PERFORMANCE
 	  in structures.  This reduces the performance hit of RANDSTRUCT
 	  at the cost of weakened randomization.
 
+config GCC_PLUGIN_STACKLEAK
+	bool "Erase the kernel stack before returning from syscalls"
+	depends on GCC_PLUGINS
+	depends on HAVE_ARCH_STACKLEAK
+	help
+	  This option makes the kernel erase the kernel stack before
+	  returning from system calls. That reduces the information which
+	  kernel stack leak bugs can reveal and blocks some uninitialized
+	  stack variable attacks.
+
+	  The tradeoff is the performance impact: on a single CPU system kernel
+	  compilation sees a 1% slowdown, other systems and workloads may vary
+	  and you are advised to test this feature on your expected workload
+	  before deploying it.
+
+	  This plugin was ported from grsecurity/PaX. More information at:
+	   * https://grsecurity.net/
+	   * https://pax.grsecurity.net/
+
 endif
-- 
GitLab


From 10e9ae9fabaf96c8e5227c1cd4827d58b3aa406d Mon Sep 17 00:00:00 2001
From: Alexander Popov <alex.popov@linux.com>
Date: Fri, 17 Aug 2018 01:16:59 +0300
Subject: [PATCH 0003/1890] gcc-plugins: Add STACKLEAK plugin for tracking the
 kernel stack

The STACKLEAK feature erases the kernel stack before returning from
syscalls. That reduces the information which kernel stack leak bugs can
reveal and blocks some uninitialized stack variable attacks.

This commit introduces the STACKLEAK gcc plugin. It is needed for
tracking the lowest border of the kernel stack, which is important
for the code erasing the used part of the kernel stack at the end
of syscalls (comes in a separate commit).

The STACKLEAK feature is ported from grsecurity/PaX. More information at:
  https://grsecurity.net/
  https://pax.grsecurity.net/

This code is modified from Brad Spengler/PaX Team's code in the last
public patch of grsecurity/PaX based on our understanding of the code.
Changes or omissions from the original code are ours and don't reflect
the original grsecurity/PaX code.

Signed-off-by: Alexander Popov <alex.popov@linux.com>
Tested-by: Laura Abbott <labbott@redhat.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 kernel/stackleak.c                     |  28 ++
 scripts/Makefile.gcc-plugins           |  10 +
 scripts/gcc-plugins/Kconfig            |  12 +
 scripts/gcc-plugins/stackleak_plugin.c | 427 +++++++++++++++++++++++++
 4 files changed, 477 insertions(+)
 create mode 100644 scripts/gcc-plugins/stackleak_plugin.c

diff --git a/kernel/stackleak.c b/kernel/stackleak.c
index deba0d8992f96..628485db37ba7 100644
--- a/kernel/stackleak.c
+++ b/kernel/stackleak.c
@@ -60,3 +60,31 @@ asmlinkage void stackleak_erase(void)
 	current->lowest_stack = current_top_of_stack() - THREAD_SIZE/64;
 }
 
+void __used stackleak_track_stack(void)
+{
+	/*
+	 * N.B. stackleak_erase() fills the kernel stack with the poison value,
+	 * which has the register width. That code assumes that the value
+	 * of 'lowest_stack' is aligned on the register width boundary.
+	 *
+	 * That is true for x86 and x86_64 because of the kernel stack
+	 * alignment on these platforms (for details, see 'cc_stack_align' in
+	 * arch/x86/Makefile). Take care of that when you port STACKLEAK to
+	 * new platforms.
+	 */
+	unsigned long sp = (unsigned long)&sp;
+
+	/*
+	 * Having CONFIG_STACKLEAK_TRACK_MIN_SIZE larger than
+	 * STACKLEAK_SEARCH_DEPTH makes the poison search in
+	 * stackleak_erase() unreliable. Let's prevent that.
+	 */
+	BUILD_BUG_ON(CONFIG_STACKLEAK_TRACK_MIN_SIZE > STACKLEAK_SEARCH_DEPTH);
+
+	if (sp < current->lowest_stack &&
+	    sp >= (unsigned long)task_stack_page(current) +
+						sizeof(unsigned long)) {
+		current->lowest_stack = sp;
+	}
+}
+EXPORT_SYMBOL(stackleak_track_stack);
diff --git a/scripts/Makefile.gcc-plugins b/scripts/Makefile.gcc-plugins
index 0a482f3415767..46c5c68098065 100644
--- a/scripts/Makefile.gcc-plugins
+++ b/scripts/Makefile.gcc-plugins
@@ -26,6 +26,16 @@ gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_RANDSTRUCT)		\
 gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_RANDSTRUCT_PERFORMANCE)	\
 		+= -fplugin-arg-randomize_layout_plugin-performance-mode
 
+gcc-plugin-$(CONFIG_GCC_PLUGIN_STACKLEAK)	+= stackleak_plugin.so
+gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_STACKLEAK)		\
+		+= -DSTACKLEAK_PLUGIN
+gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_STACKLEAK)		\
+		+= -fplugin-arg-stackleak_plugin-track-min-size=$(CONFIG_STACKLEAK_TRACK_MIN_SIZE)
+ifdef CONFIG_GCC_PLUGIN_STACKLEAK
+    DISABLE_STACKLEAK_PLUGIN += -fplugin-arg-stackleak_plugin-disable
+endif
+export DISABLE_STACKLEAK_PLUGIN
+
 # All the plugin CFLAGS are collected here in case a build target needs to
 # filter them out of the KBUILD_CFLAGS.
 GCC_PLUGINS_CFLAGS := $(strip $(addprefix -fplugin=$(objtree)/scripts/gcc-plugins/, $(gcc-plugin-y)) $(gcc-plugin-cflags-y))
diff --git a/scripts/gcc-plugins/Kconfig b/scripts/gcc-plugins/Kconfig
index 977b84e697871..c65fdd823591b 100644
--- a/scripts/gcc-plugins/Kconfig
+++ b/scripts/gcc-plugins/Kconfig
@@ -158,4 +158,16 @@ config GCC_PLUGIN_STACKLEAK
 	   * https://grsecurity.net/
 	   * https://pax.grsecurity.net/
 
+config STACKLEAK_TRACK_MIN_SIZE
+	int "Minimum stack frame size of functions tracked by STACKLEAK"
+	default 100
+	range 0 4096
+	depends on GCC_PLUGIN_STACKLEAK
+	help
+	  The STACKLEAK gcc plugin instruments the kernel code for tracking
+	  the lowest border of the kernel stack (and for some other purposes).
+	  It inserts the stackleak_track_stack() call for the functions with
+	  a stack frame size greater than or equal to this parameter.
+	  If unsure, leave the default value 100.
+
 endif
diff --git a/scripts/gcc-plugins/stackleak_plugin.c b/scripts/gcc-plugins/stackleak_plugin.c
new file mode 100644
index 0000000000000..2f48da98b5d42
--- /dev/null
+++ b/scripts/gcc-plugins/stackleak_plugin.c
@@ -0,0 +1,427 @@
+/*
+ * Copyright 2011-2017 by the PaX Team <pageexec@freemail.hu>
+ * Modified by Alexander Popov <alex.popov@linux.com>
+ * Licensed under the GPL v2
+ *
+ * Note: the choice of the license means that the compilation process is
+ * NOT 'eligible' as defined by gcc's library exception to the GPL v3,
+ * but for the kernel it doesn't matter since it doesn't link against
+ * any of the gcc libraries
+ *
+ * This gcc plugin is needed for tracking the lowest border of the kernel stack.
+ * It instruments the kernel code inserting stackleak_track_stack() calls:
+ *  - after alloca();
+ *  - for the functions with a stack frame size greater than or equal
+ *     to the "track-min-size" plugin parameter.
+ *
+ * This plugin is ported from grsecurity/PaX. For more information see:
+ *   https://grsecurity.net/
+ *   https://pax.grsecurity.net/
+ *
+ * Debugging:
+ *  - use fprintf() to stderr, debug_generic_expr(), debug_gimple_stmt(),
+ *     print_rtl() and print_simple_rtl();
+ *  - add "-fdump-tree-all -fdump-rtl-all" to the plugin CFLAGS in
+ *     Makefile.gcc-plugins to see the verbose dumps of the gcc passes;
+ *  - use gcc -E to understand the preprocessing shenanigans;
+ *  - use gcc with enabled CFG/GIMPLE/SSA verification (--enable-checking).
+ */
+
+#include "gcc-common.h"
+
+__visible int plugin_is_GPL_compatible;
+
+static int track_frame_size = -1;
+static const char track_function[] = "stackleak_track_stack";
+
+/*
+ * Mark these global variables (roots) for gcc garbage collector since
+ * they point to the garbage-collected memory.
+ */
+static GTY(()) tree track_function_decl;
+
+static struct plugin_info stackleak_plugin_info = {
+	.version = "201707101337",
+	.help = "track-min-size=nn\ttrack stack for functions with a stack frame size >= nn bytes\n"
+		"disable\t\tdo not activate the plugin\n"
+};
+
+static void stackleak_add_track_stack(gimple_stmt_iterator *gsi, bool after)
+{
+	gimple stmt;
+	gcall *stackleak_track_stack;
+	cgraph_node_ptr node;
+	int frequency;
+	basic_block bb;
+
+	/* Insert call to void stackleak_track_stack(void) */
+	stmt = gimple_build_call(track_function_decl, 0);
+	stackleak_track_stack = as_a_gcall(stmt);
+	if (after) {
+		gsi_insert_after(gsi, stackleak_track_stack,
+						GSI_CONTINUE_LINKING);
+	} else {
+		gsi_insert_before(gsi, stackleak_track_stack, GSI_SAME_STMT);
+	}
+
+	/* Update the cgraph */
+	bb = gimple_bb(stackleak_track_stack);
+	node = cgraph_get_create_node(track_function_decl);
+	gcc_assert(node);
+	frequency = compute_call_stmt_bb_frequency(current_function_decl, bb);
+	cgraph_create_edge(cgraph_get_node(current_function_decl), node,
+			stackleak_track_stack, bb->count, frequency);
+}
+
+static bool is_alloca(gimple stmt)
+{
+	if (gimple_call_builtin_p(stmt, BUILT_IN_ALLOCA))
+		return true;
+
+#if BUILDING_GCC_VERSION >= 4007
+	if (gimple_call_builtin_p(stmt, BUILT_IN_ALLOCA_WITH_ALIGN))
+		return true;
+#endif
+
+	return false;
+}
+
+/*
+ * Work with the GIMPLE representation of the code. Insert the
+ * stackleak_track_stack() call after alloca() and into the beginning
+ * of the function if it is not instrumented.
+ */
+static unsigned int stackleak_instrument_execute(void)
+{
+	basic_block bb, entry_bb;
+	bool prologue_instrumented = false, is_leaf = true;
+	gimple_stmt_iterator gsi;
+
+	/*
+	 * ENTRY_BLOCK_PTR is a basic block which represents possible entry
+	 * point of a function. This block does not contain any code and
+	 * has a CFG edge to its successor.
+	 */
+	gcc_assert(single_succ_p(ENTRY_BLOCK_PTR_FOR_FN(cfun)));
+	entry_bb = single_succ(ENTRY_BLOCK_PTR_FOR_FN(cfun));
+
+	/*
+	 * Loop through the GIMPLE statements in each of cfun basic blocks.
+	 * cfun is a global variable which represents the function that is
+	 * currently processed.
+	 */
+	FOR_EACH_BB_FN(bb, cfun) {
+		for (gsi = gsi_start_bb(bb); !gsi_end_p(gsi); gsi_next(&gsi)) {
+			gimple stmt;
+
+			stmt = gsi_stmt(gsi);
+
+			/* Leaf function is a function which makes no calls */
+			if (is_gimple_call(stmt))
+				is_leaf = false;
+
+			if (!is_alloca(stmt))
+				continue;
+
+			/* Insert stackleak_track_stack() call after alloca() */
+			stackleak_add_track_stack(&gsi, true);
+			if (bb == entry_bb)
+				prologue_instrumented = true;
+		}
+	}
+
+	if (prologue_instrumented)
+		return 0;
+
+	/*
+	 * Special cases to skip the instrumentation.
+	 *
+	 * Taking the address of static inline functions materializes them,
+	 * but we mustn't instrument some of them as the resulting stack
+	 * alignment required by the function call ABI will break other
+	 * assumptions regarding the expected (but not otherwise enforced)
+	 * register clobbering ABI.
+	 *
+	 * Case in point: native_save_fl on amd64 when optimized for size
+	 * clobbers rdx if it were instrumented here.
+	 *
+	 * TODO: any more special cases?
+	 */
+	if (is_leaf &&
+	    !TREE_PUBLIC(current_function_decl) &&
+	    DECL_DECLARED_INLINE_P(current_function_decl)) {
+		return 0;
+	}
+
+	if (is_leaf &&
+	    !strncmp(IDENTIFIER_POINTER(DECL_NAME(current_function_decl)),
+		     "_paravirt_", 10)) {
+		return 0;
+	}
+
+	/* Insert stackleak_track_stack() call at the function beginning */
+	bb = entry_bb;
+	if (!single_pred_p(bb)) {
+		/* gcc_assert(bb_loop_depth(bb) ||
+				(bb->flags & BB_IRREDUCIBLE_LOOP)); */
+		split_edge(single_succ_edge(ENTRY_BLOCK_PTR_FOR_FN(cfun)));
+		gcc_assert(single_succ_p(ENTRY_BLOCK_PTR_FOR_FN(cfun)));
+		bb = single_succ(ENTRY_BLOCK_PTR_FOR_FN(cfun));
+	}
+	gsi = gsi_after_labels(bb);
+	stackleak_add_track_stack(&gsi, false);
+
+	return 0;
+}
+
+static bool large_stack_frame(void)
+{
+#if BUILDING_GCC_VERSION >= 8000
+	return maybe_ge(get_frame_size(), track_frame_size);
+#else
+	return (get_frame_size() >= track_frame_size);
+#endif
+}
+
+/*
+ * Work with the RTL representation of the code.
+ * Remove the unneeded stackleak_track_stack() calls from the functions
+ * which don't call alloca() and don't have a large enough stack frame size.
+ */
+static unsigned int stackleak_cleanup_execute(void)
+{
+	rtx_insn *insn, *next;
+
+	if (cfun->calls_alloca)
+		return 0;
+
+	if (large_stack_frame())
+		return 0;
+
+	/*
+	 * Find stackleak_track_stack() calls. Loop through the chain of insns,
+	 * which is an RTL representation of the code for a function.
+	 *
+	 * The example of a matching insn:
+	 *  (call_insn 8 4 10 2 (call (mem (symbol_ref ("stackleak_track_stack")
+	 *  [flags 0x41] <function_decl 0x7f7cd3302a80 stackleak_track_stack>)
+	 *  [0 stackleak_track_stack S1 A8]) (0)) 675 {*call} (expr_list
+	 *  (symbol_ref ("stackleak_track_stack") [flags 0x41] <function_decl
+	 *  0x7f7cd3302a80 stackleak_track_stack>) (expr_list (0) (nil))) (nil))
+	 */
+	for (insn = get_insns(); insn; insn = next) {
+		rtx body;
+
+		next = NEXT_INSN(insn);
+
+		/* Check the expression code of the insn */
+		if (!CALL_P(insn))
+			continue;
+
+		/*
+		 * Check the expression code of the insn body, which is an RTL
+		 * Expression (RTX) describing the side effect performed by
+		 * that insn.
+		 */
+		body = PATTERN(insn);
+
+		if (GET_CODE(body) == PARALLEL)
+			body = XVECEXP(body, 0, 0);
+
+		if (GET_CODE(body) != CALL)
+			continue;
+
+		/*
+		 * Check the first operand of the call expression. It should
+		 * be a mem RTX describing the needed subroutine with a
+		 * symbol_ref RTX.
+		 */
+		body = XEXP(body, 0);
+		if (GET_CODE(body) != MEM)
+			continue;
+
+		body = XEXP(body, 0);
+		if (GET_CODE(body) != SYMBOL_REF)
+			continue;
+
+		if (SYMBOL_REF_DECL(body) != track_function_decl)
+			continue;
+
+		/* Delete the stackleak_track_stack() call */
+		delete_insn_and_edges(insn);
+#if BUILDING_GCC_VERSION >= 4007 && BUILDING_GCC_VERSION < 8000
+		if (GET_CODE(next) == NOTE &&
+		    NOTE_KIND(next) == NOTE_INSN_CALL_ARG_LOCATION) {
+			insn = next;
+			next = NEXT_INSN(insn);
+			delete_insn_and_edges(insn);
+		}
+#endif
+	}
+
+	return 0;
+}
+
+static bool stackleak_gate(void)
+{
+	tree section;
+
+	section = lookup_attribute("section",
+				   DECL_ATTRIBUTES(current_function_decl));
+	if (section && TREE_VALUE(section)) {
+		section = TREE_VALUE(TREE_VALUE(section));
+
+		if (!strncmp(TREE_STRING_POINTER(section), ".init.text", 10))
+			return false;
+		if (!strncmp(TREE_STRING_POINTER(section), ".devinit.text", 13))
+			return false;
+		if (!strncmp(TREE_STRING_POINTER(section), ".cpuinit.text", 13))
+			return false;
+		if (!strncmp(TREE_STRING_POINTER(section), ".meminit.text", 13))
+			return false;
+	}
+
+	return track_frame_size >= 0;
+}
+
+/* Build the function declaration for stackleak_track_stack() */
+static void stackleak_start_unit(void *gcc_data __unused,
+				 void *user_data __unused)
+{
+	tree fntype;
+
+	/* void stackleak_track_stack(void) */
+	fntype = build_function_type_list(void_type_node, NULL_TREE);
+	track_function_decl = build_fn_decl(track_function, fntype);
+	DECL_ASSEMBLER_NAME(track_function_decl); /* for LTO */
+	TREE_PUBLIC(track_function_decl) = 1;
+	TREE_USED(track_function_decl) = 1;
+	DECL_EXTERNAL(track_function_decl) = 1;
+	DECL_ARTIFICIAL(track_function_decl) = 1;
+	DECL_PRESERVE_P(track_function_decl) = 1;
+}
+
+/*
+ * Pass gate function is a predicate function that gets executed before the
+ * corresponding pass. If the return value is 'true' the pass gets executed,
+ * otherwise, it is skipped.
+ */
+static bool stackleak_instrument_gate(void)
+{
+	return stackleak_gate();
+}
+
+#define PASS_NAME stackleak_instrument
+#define PROPERTIES_REQUIRED PROP_gimple_leh | PROP_cfg
+#define TODO_FLAGS_START TODO_verify_ssa | TODO_verify_flow | TODO_verify_stmts
+#define TODO_FLAGS_FINISH TODO_verify_ssa | TODO_verify_stmts | TODO_dump_func \
+			| TODO_update_ssa | TODO_rebuild_cgraph_edges
+#include "gcc-generate-gimple-pass.h"
+
+static bool stackleak_cleanup_gate(void)
+{
+	return stackleak_gate();
+}
+
+#define PASS_NAME stackleak_cleanup
+#define TODO_FLAGS_FINISH TODO_dump_func
+#include "gcc-generate-rtl-pass.h"
+
+/*
+ * Every gcc plugin exports a plugin_init() function that is called right
+ * after the plugin is loaded. This function is responsible for registering
+ * the plugin callbacks and doing other required initialization.
+ */
+__visible int plugin_init(struct plugin_name_args *plugin_info,
+			  struct plugin_gcc_version *version)
+{
+	const char * const plugin_name = plugin_info->base_name;
+	const int argc = plugin_info->argc;
+	const struct plugin_argument * const argv = plugin_info->argv;
+	int i = 0;
+
+	/* Extra GGC root tables describing our GTY-ed data */
+	static const struct ggc_root_tab gt_ggc_r_gt_stackleak[] = {
+		{
+			.base = &track_function_decl,
+			.nelt = 1,
+			.stride = sizeof(track_function_decl),
+			.cb = &gt_ggc_mx_tree_node,
+			.pchw = &gt_pch_nx_tree_node
+		},
+		LAST_GGC_ROOT_TAB
+	};
+
+	/*
+	 * The stackleak_instrument pass should be executed before the
+	 * "optimized" pass, which is the control flow graph cleanup that is
+	 * performed just before expanding gcc trees to the RTL. In former
+	 * versions of the plugin this new pass was inserted before the
+	 * "tree_profile" pass, which is currently called "profile".
+	 */
+	PASS_INFO(stackleak_instrument, "optimized", 1,
+						PASS_POS_INSERT_BEFORE);
+
+	/*
+	 * The stackleak_cleanup pass should be executed after the
+	 * "reload" pass, when the stack frame size is final.
+	 */
+	PASS_INFO(stackleak_cleanup, "reload", 1, PASS_POS_INSERT_AFTER);
+
+	if (!plugin_default_version_check(version, &gcc_version)) {
+		error(G_("incompatible gcc/plugin versions"));
+		return 1;
+	}
+
+	/* Parse the plugin arguments */
+	for (i = 0; i < argc; i++) {
+		if (!strcmp(argv[i].key, "disable"))
+			return 0;
+
+		if (!strcmp(argv[i].key, "track-min-size")) {
+			if (!argv[i].value) {
+				error(G_("no value supplied for option '-fplugin-arg-%s-%s'"),
+					plugin_name, argv[i].key);
+				return 1;
+			}
+
+			track_frame_size = atoi(argv[i].value);
+			if (track_frame_size < 0) {
+				error(G_("invalid option argument '-fplugin-arg-%s-%s=%s'"),
+					plugin_name, argv[i].key, argv[i].value);
+				return 1;
+			}
+		} else {
+			error(G_("unknown option '-fplugin-arg-%s-%s'"),
+					plugin_name, argv[i].key);
+			return 1;
+		}
+	}
+
+	/* Give the information about the plugin */
+	register_callback(plugin_name, PLUGIN_INFO, NULL,
+						&stackleak_plugin_info);
+
+	/* Register to be called before processing a translation unit */
+	register_callback(plugin_name, PLUGIN_START_UNIT,
+					&stackleak_start_unit, NULL);
+
+	/* Register an extra GCC garbage collector (GGC) root table */
+	register_callback(plugin_name, PLUGIN_REGISTER_GGC_ROOTS, NULL,
+					(void *)&gt_ggc_r_gt_stackleak);
+
+	/*
+	 * Hook into the Pass Manager to register new gcc passes.
+	 *
+	 * The stack frame size info is available only at the last RTL pass,
+	 * when it's too late to insert complex code like a function call.
+	 * So we register two gcc passes to instrument every function at first
+	 * and remove the unneeded instrumentation later.
+	 */
+	register_callback(plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL,
+					&stackleak_instrument_pass_info);
+	register_callback(plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL,
+					&stackleak_cleanup_pass_info);
+
+	return 0;
+}
-- 
GitLab


From f90d1e0c7804b52e12fea501aa46a12c1ff6a567 Mon Sep 17 00:00:00 2001
From: Alexander Popov <alex.popov@linux.com>
Date: Fri, 17 Aug 2018 01:17:00 +0300
Subject: [PATCH 0004/1890] lkdtm: Add a test for STACKLEAK

Introduce an lkdtm test for the STACKLEAK feature: check that the
current task stack is properly erased (filled with STACKLEAK_POISON).

Signed-off-by: Alexander Popov <alex.popov@linux.com>
Signed-off-by: Tycho Andersen <tycho@tycho.ws>
Tested-by: Laura Abbott <labbott@redhat.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 drivers/misc/lkdtm/Makefile    |  2 +
 drivers/misc/lkdtm/core.c      |  1 +
 drivers/misc/lkdtm/lkdtm.h     |  3 ++
 drivers/misc/lkdtm/stackleak.c | 73 ++++++++++++++++++++++++++++++++++
 4 files changed, 79 insertions(+)
 create mode 100644 drivers/misc/lkdtm/stackleak.c

diff --git a/drivers/misc/lkdtm/Makefile b/drivers/misc/lkdtm/Makefile
index 3370a4138e942..951c984de61ae 100644
--- a/drivers/misc/lkdtm/Makefile
+++ b/drivers/misc/lkdtm/Makefile
@@ -8,7 +8,9 @@ lkdtm-$(CONFIG_LKDTM)		+= perms.o
 lkdtm-$(CONFIG_LKDTM)		+= refcount.o
 lkdtm-$(CONFIG_LKDTM)		+= rodata_objcopy.o
 lkdtm-$(CONFIG_LKDTM)		+= usercopy.o
+lkdtm-$(CONFIG_LKDTM)		+= stackleak.o
 
+KASAN_SANITIZE_stackleak.o	:= n
 KCOV_INSTRUMENT_rodata.o	:= n
 
 OBJCOPYFLAGS :=
diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c
index 2154d1bfd18b6..aca26d81e9b8d 100644
--- a/drivers/misc/lkdtm/core.c
+++ b/drivers/misc/lkdtm/core.c
@@ -183,6 +183,7 @@ static const struct crashtype crashtypes[] = {
 	CRASHTYPE(USERCOPY_STACK_FRAME_FROM),
 	CRASHTYPE(USERCOPY_STACK_BEYOND),
 	CRASHTYPE(USERCOPY_KERNEL),
+	CRASHTYPE(STACKLEAK_ERASING),
 };
 
 
diff --git a/drivers/misc/lkdtm/lkdtm.h b/drivers/misc/lkdtm/lkdtm.h
index 9e513dcfd8093..b611b157c84fa 100644
--- a/drivers/misc/lkdtm/lkdtm.h
+++ b/drivers/misc/lkdtm/lkdtm.h
@@ -83,4 +83,7 @@ void lkdtm_USERCOPY_STACK_FRAME_FROM(void);
 void lkdtm_USERCOPY_STACK_BEYOND(void);
 void lkdtm_USERCOPY_KERNEL(void);
 
+/* lkdtm_stackleak.c */
+void lkdtm_STACKLEAK_ERASING(void);
+
 #endif
diff --git a/drivers/misc/lkdtm/stackleak.c b/drivers/misc/lkdtm/stackleak.c
new file mode 100644
index 0000000000000..d5a084475abc5
--- /dev/null
+++ b/drivers/misc/lkdtm/stackleak.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This code tests that the current task stack is properly erased (filled
+ * with STACKLEAK_POISON).
+ *
+ * Authors:
+ *   Alexander Popov <alex.popov@linux.com>
+ *   Tycho Andersen <tycho@tycho.ws>
+ */
+
+#include "lkdtm.h"
+#include <linux/stackleak.h>
+
+void lkdtm_STACKLEAK_ERASING(void)
+{
+	unsigned long *sp, left, found, i;
+	const unsigned long check_depth =
+			STACKLEAK_SEARCH_DEPTH / sizeof(unsigned long);
+
+	/*
+	 * For the details about the alignment of the poison values, see
+	 * the comment in stackleak_track_stack().
+	 */
+	sp = PTR_ALIGN(&i, sizeof(unsigned long));
+
+	left = ((unsigned long)sp & (THREAD_SIZE - 1)) / sizeof(unsigned long);
+	sp--;
+
+	/*
+	 * One 'long int' at the bottom of the thread stack is reserved
+	 * and not poisoned.
+	 */
+	if (left > 1) {
+		left--;
+	} else {
+		pr_err("FAIL: not enough stack space for the test\n");
+		return;
+	}
+
+	pr_info("checking unused part of the thread stack (%lu bytes)...\n",
+					left * sizeof(unsigned long));
+
+	/*
+	 * Search for 'check_depth' poison values in a row (just like
+	 * stackleak_erase() does).
+	 */
+	for (i = 0, found = 0; i < left && found <= check_depth; i++) {
+		if (*(sp - i) == STACKLEAK_POISON)
+			found++;
+		else
+			found = 0;
+	}
+
+	if (found <= check_depth) {
+		pr_err("FAIL: thread stack is not erased (checked %lu bytes)\n",
+						i * sizeof(unsigned long));
+		return;
+	}
+
+	pr_info("first %lu bytes are unpoisoned\n",
+				(i - found) * sizeof(unsigned long));
+
+	/* The rest of thread stack should be erased */
+	for (; i < left; i++) {
+		if (*(sp - i) != STACKLEAK_POISON) {
+			pr_err("FAIL: thread stack is NOT properly erased\n");
+			return;
+		}
+	}
+
+	pr_info("OK: the rest of the thread stack is properly erased\n");
+	return;
+}
-- 
GitLab


From c8d126275a5fa59394fe17109bdb9812fed296b8 Mon Sep 17 00:00:00 2001
From: Alexander Popov <alex.popov@linux.com>
Date: Fri, 17 Aug 2018 01:17:01 +0300
Subject: [PATCH 0005/1890] fs/proc: Show STACKLEAK metrics in the /proc file
 system

Introduce CONFIG_STACKLEAK_METRICS providing STACKLEAK information about
tasks via the /proc file system. In particular, /proc/<pid>/stack_depth
shows the maximum kernel stack consumption for the current and previous
syscalls. Although this information is not precise, it can be useful for
estimating the STACKLEAK performance impact for your workloads.

Suggested-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Alexander Popov <alex.popov@linux.com>
Tested-by: Laura Abbott <labbott@redhat.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 fs/proc/base.c              | 18 ++++++++++++++++++
 include/linux/sched.h       |  1 +
 include/linux/stackleak.h   |  3 +++
 kernel/stackleak.c          |  4 ++++
 scripts/gcc-plugins/Kconfig | 12 ++++++++++++
 5 files changed, 38 insertions(+)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index ccf86f16d9f01..2a238d68610ef 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2891,6 +2891,21 @@ static int proc_pid_patch_state(struct seq_file *m, struct pid_namespace *ns,
 }
 #endif /* CONFIG_LIVEPATCH */
 
+#ifdef CONFIG_STACKLEAK_METRICS
+static int proc_stack_depth(struct seq_file *m, struct pid_namespace *ns,
+				struct pid *pid, struct task_struct *task)
+{
+	unsigned long prev_depth = THREAD_SIZE -
+				(task->prev_lowest_stack & (THREAD_SIZE - 1));
+	unsigned long depth = THREAD_SIZE -
+				(task->lowest_stack & (THREAD_SIZE - 1));
+
+	seq_printf(m, "previous stack depth: %lu\nstack depth: %lu\n",
+							prev_depth, depth);
+	return 0;
+}
+#endif /* CONFIG_STACKLEAK_METRICS */
+
 /*
  * Thread groups
  */
@@ -2992,6 +3007,9 @@ static const struct pid_entry tgid_base_stuff[] = {
 #ifdef CONFIG_LIVEPATCH
 	ONE("patch_state",  S_IRUSR, proc_pid_patch_state),
 #endif
+#ifdef CONFIG_STACKLEAK_METRICS
+	ONE("stack_depth", S_IRUGO, proc_stack_depth),
+#endif
 };
 
 static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c1a23acd24e72..ae9d10e14b82a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1194,6 +1194,7 @@ struct task_struct {
 
 #ifdef CONFIG_GCC_PLUGIN_STACKLEAK
 	unsigned long			lowest_stack;
+	unsigned long			prev_lowest_stack;
 #endif
 
 	/*
diff --git a/include/linux/stackleak.h b/include/linux/stackleak.h
index 628c2b947b892..b911b973d328b 100644
--- a/include/linux/stackleak.h
+++ b/include/linux/stackleak.h
@@ -18,6 +18,9 @@
 static inline void stackleak_task_init(struct task_struct *t)
 {
 	t->lowest_stack = (unsigned long)end_of_stack(t) + sizeof(unsigned long);
+# ifdef CONFIG_STACKLEAK_METRICS
+	t->prev_lowest_stack = t->lowest_stack;
+# endif
 }
 #else /* !CONFIG_GCC_PLUGIN_STACKLEAK */
 static inline void stackleak_task_init(struct task_struct *t) { }
diff --git a/kernel/stackleak.c b/kernel/stackleak.c
index 628485db37ba7..f66239572c898 100644
--- a/kernel/stackleak.c
+++ b/kernel/stackleak.c
@@ -41,6 +41,10 @@ asmlinkage void stackleak_erase(void)
 	if (kstack_ptr == boundary)
 		kstack_ptr += sizeof(unsigned long);
 
+#ifdef CONFIG_STACKLEAK_METRICS
+	current->prev_lowest_stack = kstack_ptr;
+#endif
+
 	/*
 	 * Now write the poison value to the kernel stack. Start from
 	 * 'kstack_ptr' and move up till the new 'boundary'. We assume that
diff --git a/scripts/gcc-plugins/Kconfig b/scripts/gcc-plugins/Kconfig
index c65fdd823591b..b0a015ef52685 100644
--- a/scripts/gcc-plugins/Kconfig
+++ b/scripts/gcc-plugins/Kconfig
@@ -170,4 +170,16 @@ config STACKLEAK_TRACK_MIN_SIZE
 	  a stack frame size greater than or equal to this parameter.
 	  If unsure, leave the default value 100.
 
+config STACKLEAK_METRICS
+	bool "Show STACKLEAK metrics in the /proc file system"
+	depends on GCC_PLUGIN_STACKLEAK
+	depends on PROC_FS
+	help
+	  If this is set, STACKLEAK metrics for every task are available in
+	  the /proc file system. In particular, /proc/<pid>/stack_depth
+	  shows the maximum kernel stack consumption for the current and
+	  previous syscalls. Although this information is not precise, it
+	  can be useful for estimating the STACKLEAK performance impact for
+	  your workloads.
+
 endif
-- 
GitLab


From ed535a2dae1836d15c71e250475952881265d244 Mon Sep 17 00:00:00 2001
From: Alexander Popov <alex.popov@linux.com>
Date: Fri, 17 Aug 2018 01:17:02 +0300
Subject: [PATCH 0006/1890] doc: self-protection: Add information about
 STACKLEAK feature

Add information about STACKLEAK feature to the "Memory poisoning"
section of self-protection.rst.

Signed-off-by: Alexander Popov <alex.popov@linux.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 Documentation/security/self-protection.rst | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/Documentation/security/self-protection.rst b/Documentation/security/self-protection.rst
index e1ca698e00063..f584fb74b4ff2 100644
--- a/Documentation/security/self-protection.rst
+++ b/Documentation/security/self-protection.rst
@@ -302,11 +302,11 @@ sure structure holes are cleared.
 Memory poisoning
 ----------------
 
-When releasing memory, it is best to poison the contents (clear stack on
-syscall return, wipe heap memory on a free), to avoid reuse attacks that
-rely on the old contents of memory. This frustrates many uninitialized
-variable attacks, stack content exposures, heap content exposures, and
-use-after-free attacks.
+When releasing memory, it is best to poison the contents, to avoid reuse
+attacks that rely on the old contents of memory. E.g., clear stack on a
+syscall return (``CONFIG_GCC_PLUGIN_STACKLEAK``), wipe heap memory on a
+free. This frustrates many uninitialized variable attacks, stack content
+exposures, heap content exposures, and use-after-free attacks.
 
 Destination tracking
 --------------------
-- 
GitLab


From 964c9dff0091893a9a74a88edf984c6da0b779f7 Mon Sep 17 00:00:00 2001
From: Alexander Popov <alex.popov@linux.com>
Date: Fri, 17 Aug 2018 01:17:03 +0300
Subject: [PATCH 0007/1890] stackleak: Allow runtime disabling of kernel stack
 erasing

Introduce CONFIG_STACKLEAK_RUNTIME_DISABLE option, which provides
'stack_erasing' sysctl. It can be used in runtime to control kernel
stack erasing for kernels built with CONFIG_GCC_PLUGIN_STACKLEAK.

Suggested-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Alexander Popov <alex.popov@linux.com>
Tested-by: Laura Abbott <labbott@redhat.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 Documentation/sysctl/kernel.txt | 18 ++++++++++++++++
 include/linux/stackleak.h       |  6 ++++++
 kernel/stackleak.c              | 38 +++++++++++++++++++++++++++++++++
 kernel/sysctl.c                 | 15 ++++++++++++-
 scripts/gcc-plugins/Kconfig     |  8 +++++++
 5 files changed, 84 insertions(+), 1 deletion(-)

diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 37a679501ddc6..1b8775298cf7a 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -89,6 +89,7 @@ show up in /proc/sys/kernel:
 - shmmni
 - softlockup_all_cpu_backtrace
 - soft_watchdog
+- stack_erasing
 - stop-a                      [ SPARC only ]
 - sysrq                       ==> Documentation/admin-guide/sysrq.rst
 - sysctl_writes_strict
@@ -987,6 +988,23 @@ detect a hard lockup condition.
 
 ==============================================================
 
+stack_erasing
+
+This parameter can be used to control kernel stack erasing at the end
+of syscalls for kernels built with CONFIG_GCC_PLUGIN_STACKLEAK.
+
+That erasing reduces the information which kernel stack leak bugs
+can reveal and blocks some uninitialized stack variable attacks.
+The tradeoff is the performance impact: on a single CPU system kernel
+compilation sees a 1% slowdown, other systems and workloads may vary.
+
+  0: kernel stack erasing is disabled, STACKLEAK_METRICS are not updated.
+
+  1: kernel stack erasing is enabled (default), it is performed before
+     returning to the userspace at the end of syscalls.
+
+==============================================================
+
 tainted:
 
 Non-zero if the kernel has been tainted. Numeric values, which can be
diff --git a/include/linux/stackleak.h b/include/linux/stackleak.h
index b911b973d328b..3d5c3271a9a8c 100644
--- a/include/linux/stackleak.h
+++ b/include/linux/stackleak.h
@@ -22,6 +22,12 @@ static inline void stackleak_task_init(struct task_struct *t)
 	t->prev_lowest_stack = t->lowest_stack;
 # endif
 }
+
+#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
+int stack_erasing_sysctl(struct ctl_table *table, int write,
+			void __user *buffer, size_t *lenp, loff_t *ppos);
+#endif
+
 #else /* !CONFIG_GCC_PLUGIN_STACKLEAK */
 static inline void stackleak_task_init(struct task_struct *t) { }
 #endif
diff --git a/kernel/stackleak.c b/kernel/stackleak.c
index f66239572c898..e42892926244d 100644
--- a/kernel/stackleak.c
+++ b/kernel/stackleak.c
@@ -12,6 +12,41 @@
 
 #include <linux/stackleak.h>
 
+#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
+#include <linux/jump_label.h>
+#include <linux/sysctl.h>
+
+static DEFINE_STATIC_KEY_FALSE(stack_erasing_bypass);
+
+int stack_erasing_sysctl(struct ctl_table *table, int write,
+			void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int ret = 0;
+	int state = !static_branch_unlikely(&stack_erasing_bypass);
+	int prev_state = state;
+
+	table->data = &state;
+	table->maxlen = sizeof(int);
+	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+	state = !!state;
+	if (ret || !write || state == prev_state)
+		return ret;
+
+	if (state)
+		static_branch_disable(&stack_erasing_bypass);
+	else
+		static_branch_enable(&stack_erasing_bypass);
+
+	pr_warn("stackleak: kernel stack erasing is %s\n",
+					state ? "enabled" : "disabled");
+	return ret;
+}
+
+#define skip_erasing()	static_branch_unlikely(&stack_erasing_bypass)
+#else
+#define skip_erasing()	false
+#endif /* CONFIG_STACKLEAK_RUNTIME_DISABLE */
+
 asmlinkage void stackleak_erase(void)
 {
 	/* It would be nice not to have 'kstack_ptr' and 'boundary' on stack */
@@ -20,6 +55,9 @@ asmlinkage void stackleak_erase(void)
 	unsigned int poison_count = 0;
 	const unsigned int depth = STACKLEAK_SEARCH_DEPTH / sizeof(unsigned long);
 
+	if (skip_erasing())
+		return;
+
 	/* Check that 'lowest_stack' value is sane */
 	if (unlikely(kstack_ptr - boundary >= THREAD_SIZE))
 		kstack_ptr = boundary;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index cc02050fd0c49..3ae223f7b5dfa 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -91,7 +91,9 @@
 #ifdef CONFIG_CHR_DEV_SG
 #include <scsi/sg.h>
 #endif
-
+#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
+#include <linux/stackleak.h>
+#endif
 #ifdef CONFIG_LOCKUP_DETECTOR
 #include <linux/nmi.h>
 #endif
@@ -1232,6 +1234,17 @@ static struct ctl_table kern_table[] = {
 		.extra1		= &zero,
 		.extra2		= &one,
 	},
+#endif
+#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
+	{
+		.procname	= "stack_erasing",
+		.data		= NULL,
+		.maxlen		= sizeof(int),
+		.mode		= 0600,
+		.proc_handler	= stack_erasing_sysctl,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
 #endif
 	{ }
 };
diff --git a/scripts/gcc-plugins/Kconfig b/scripts/gcc-plugins/Kconfig
index b0a015ef52685..0d5c799688f0a 100644
--- a/scripts/gcc-plugins/Kconfig
+++ b/scripts/gcc-plugins/Kconfig
@@ -182,4 +182,12 @@ config STACKLEAK_METRICS
 	  can be useful for estimating the STACKLEAK performance impact for
 	  your workloads.
 
+config STACKLEAK_RUNTIME_DISABLE
+	bool "Allow runtime disabling of kernel stack erasing"
+	depends on GCC_PLUGIN_STACKLEAK
+	help
+	  This option provides 'stack_erasing' sysctl, which can be used in
+	  runtime to control kernel stack erasing for kernels built with
+	  CONFIG_GCC_PLUGIN_STACKLEAK.
+
 endif
-- 
GitLab


From 6fcde90466738b84a073e4f4d18c50015ee29fb2 Mon Sep 17 00:00:00 2001
From: Alexander Popov <alex.popov@linux.com>
Date: Fri, 17 Aug 2018 01:17:04 +0300
Subject: [PATCH 0008/1890] arm64: Drop unneeded stackleak_check_alloca()

Drop stackleak_check_alloca() for arm64 since the STACKLEAK gcc plugin now
doesn't track stack depth overflow caused by alloca().

Signed-off-by: Alexander Popov <alex.popov@linux.com>
Tested-by: Laura Abbott <labbott@redhat.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 arch/arm64/kernel/process.c | 22 ----------------------
 1 file changed, 22 deletions(-)

diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 7f1628effe6d7..740b31f77adee 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -493,25 +493,3 @@ void arch_setup_new_exec(void)
 {
 	current->mm->context.flags = is_compat_task() ? MMCF_AARCH32 : 0;
 }
-
-#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
-void __used stackleak_check_alloca(unsigned long size)
-{
-	unsigned long stack_left;
-	unsigned long current_sp = current_stack_pointer;
-	struct stack_info info;
-
-	BUG_ON(!on_accessible_stack(current, current_sp, &info));
-
-	stack_left = current_sp - info.low;
-
-	/*
-	 * There's a good chance we're almost out of stack space if this
-	 * is true. Using panic() over BUG() is more likely to give
-	 * reliable debugging output.
-	 */
-	if (size >= stack_left)
-		panic("alloca() over the kernel stack boundary\n");
-}
-EXPORT_SYMBOL(stackleak_check_alloca);
-#endif
-- 
GitLab


From 5f997580e8b12b9f585e34cc16304925d26ce49e Mon Sep 17 00:00:00 2001
From: Tony Jones <tonyj@suse.de>
Date: Thu, 6 Sep 2018 21:33:57 -0700
Subject: [PATCH 0009/1890] apparmor: Fix network performance issue in
 aa_label_sk_perm

The netperf benchmark shows a 5.73% reduction in throughput for
small (64 byte) transfers by unconfined tasks.

DEFINE_AUDIT_SK() in aa_label_sk_perm() should not be performed
unconditionally, rather only when the label is confined.

netperf-tcp
                            56974a6fc^              56974a6fc
Min       64         563.48 (   0.00%)      531.17 (  -5.73%)
Min       128       1056.92 (   0.00%)      999.44 (  -5.44%)
Min       256       1945.95 (   0.00%)     1867.97 (  -4.01%)
Min       1024      6761.40 (   0.00%)     6364.23 (  -5.87%)
Min       2048     11110.53 (   0.00%)    10606.20 (  -4.54%)
Min       3312     13692.67 (   0.00%)    13158.41 (  -3.90%)
Min       4096     14926.29 (   0.00%)    14457.46 (  -3.14%)
Min       8192     18399.34 (   0.00%)    18091.65 (  -1.67%)
Min       16384    21384.13 (   0.00%)    21158.05 (  -1.06%)
Hmean     64         564.96 (   0.00%)      534.38 (  -5.41%)
Hmean     128       1064.42 (   0.00%)     1010.12 (  -5.10%)
Hmean     256       1965.85 (   0.00%)     1879.16 (  -4.41%)
Hmean     1024      6839.77 (   0.00%)     6478.70 (  -5.28%)
Hmean     2048     11154.80 (   0.00%)    10671.13 (  -4.34%)
Hmean     3312     13838.12 (   0.00%)    13249.01 (  -4.26%)
Hmean     4096     15009.99 (   0.00%)    14561.36 (  -2.99%)
Hmean     8192     18975.57 (   0.00%)    18326.54 (  -3.42%)
Hmean     16384    21440.44 (   0.00%)    21324.59 (  -0.54%)
Stddev    64           1.24 (   0.00%)        2.85 (-130.64%)
Stddev    128          4.51 (   0.00%)        6.53 ( -44.84%)
Stddev    256         11.67 (   0.00%)        8.50 (  27.16%)
Stddev    1024        48.33 (   0.00%)       75.07 ( -55.34%)
Stddev    2048        54.82 (   0.00%)       65.16 ( -18.86%)
Stddev    3312       153.57 (   0.00%)       56.29 (  63.35%)
Stddev    4096       100.25 (   0.00%)       88.50 (  11.72%)
Stddev    8192       358.13 (   0.00%)      169.99 (  52.54%)
Stddev    16384       43.99 (   0.00%)      141.82 (-222.39%)

Signed-off-by: Tony Jones <tonyj@suse.de>
Fixes: 56974a6fcfef ("apparmor: add base infastructure for socket
mediation")
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/net.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/security/apparmor/net.c b/security/apparmor/net.c
index bb24cfa0a164c..d5d72dd1ca1f9 100644
--- a/security/apparmor/net.c
+++ b/security/apparmor/net.c
@@ -146,17 +146,20 @@ int aa_af_perm(struct aa_label *label, const char *op, u32 request, u16 family,
 static int aa_label_sk_perm(struct aa_label *label, const char *op, u32 request,
 			    struct sock *sk)
 {
-	struct aa_profile *profile;
-	DEFINE_AUDIT_SK(sa, op, sk);
+	int error = 0;
 
 	AA_BUG(!label);
 	AA_BUG(!sk);
 
-	if (unconfined(label))
-		return 0;
+	if (!unconfined(label)) {
+		struct aa_profile *profile;
+		DEFINE_AUDIT_SK(sa, op, sk);
 
-	return fn_for_each_confined(label, profile,
-			aa_profile_af_sk_perm(profile, &sa, request, sk));
+		error = fn_for_each_confined(label, profile,
+			    aa_profile_af_sk_perm(profile, &sa, request, sk));
+	}
+
+	return error;
 }
 
 int aa_sk_perm(const char *op, u32 request, struct sock *sk)
-- 
GitLab


From 4a60aa05a0634241ce17f957bf9fb5ac1eed6576 Mon Sep 17 00:00:00 2001
From: Allan Xavier <allan.x.xavier@oracle.com>
Date: Fri, 7 Sep 2018 08:12:01 -0500
Subject: [PATCH 0010/1890] objtool: Support per-function rodata sections

Add support for processing switch jump tables in objects with multiple
.rodata sections, such as those created by '-ffunction-sections' and
'-fdata-sections'.  Currently, objtool always looks in .rodata for jump
table information, which results in many "sibling call from callable
instruction with modified stack frame" warnings with objects compiled
using those flags.

The fix is comprised of three parts:

1. Flagging all .rodata sections when importing ELF information for
   easier checking later.

2. Keeping a reference to the section each relocation is from in order
   to get the list_head for the other relocations in that section.

3. Finding jump tables by following relocations to .rodata sections,
   rather than always referencing a single global .rodata section.

The patch has been tested without data sections enabled and no
differences in the resulting orc unwind information were seen.

Note that as objtool adds terminators to end of each .text section the
unwind information generated between a function+data sections build and
a normal build aren't directly comparable. Manual inspection suggests
that objtool is now generating the correct information, or at least
making more of an effort to do so than it did previously.

Signed-off-by: Allan Xavier <allan.x.xavier@oracle.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/099bdc375195c490dda04db777ee0b95d566ded1.1536325914.git.jpoimboe@redhat.com
---
 tools/objtool/check.c | 38 ++++++++++++++++++++++++++++++++------
 tools/objtool/check.h |  4 ++--
 tools/objtool/elf.c   |  1 +
 tools/objtool/elf.h   |  3 ++-
 4 files changed, 37 insertions(+), 9 deletions(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 2928939b98ec2..0414a0d522621 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -836,7 +836,7 @@ static int add_switch_table(struct objtool_file *file, struct instruction *insn,
 	struct symbol *pfunc = insn->func->pfunc;
 	unsigned int prev_offset = 0;
 
-	list_for_each_entry_from(rela, &file->rodata->rela->rela_list, list) {
+	list_for_each_entry_from(rela, &table->rela_sec->rela_list, list) {
 		if (rela == next_table)
 			break;
 
@@ -926,6 +926,7 @@ static struct rela *find_switch_table(struct objtool_file *file,
 {
 	struct rela *text_rela, *rodata_rela;
 	struct instruction *orig_insn = insn;
+	struct section *rodata_sec;
 	unsigned long table_offset;
 
 	/*
@@ -953,10 +954,13 @@ static struct rela *find_switch_table(struct objtool_file *file,
 		/* look for a relocation which references .rodata */
 		text_rela = find_rela_by_dest_range(insn->sec, insn->offset,
 						    insn->len);
-		if (!text_rela || text_rela->sym != file->rodata->sym)
+		if (!text_rela || text_rela->sym->type != STT_SECTION ||
+		    !text_rela->sym->sec->rodata)
 			continue;
 
 		table_offset = text_rela->addend;
+		rodata_sec = text_rela->sym->sec;
+
 		if (text_rela->type == R_X86_64_PC32)
 			table_offset += 4;
 
@@ -964,10 +968,10 @@ static struct rela *find_switch_table(struct objtool_file *file,
 		 * Make sure the .rodata address isn't associated with a
 		 * symbol.  gcc jump tables are anonymous data.
 		 */
-		if (find_symbol_containing(file->rodata, table_offset))
+		if (find_symbol_containing(rodata_sec, table_offset))
 			continue;
 
-		rodata_rela = find_rela_by_dest(file->rodata, table_offset);
+		rodata_rela = find_rela_by_dest(rodata_sec, table_offset);
 		if (rodata_rela) {
 			/*
 			 * Use of RIP-relative switch jumps is quite rare, and
@@ -1052,7 +1056,7 @@ static int add_switch_table_alts(struct objtool_file *file)
 	struct symbol *func;
 	int ret;
 
-	if (!file->rodata || !file->rodata->rela)
+	if (!file->rodata)
 		return 0;
 
 	for_each_sec(file, sec) {
@@ -1198,10 +1202,33 @@ static int read_retpoline_hints(struct objtool_file *file)
 	return 0;
 }
 
+static void mark_rodata(struct objtool_file *file)
+{
+	struct section *sec;
+	bool found = false;
+
+	/*
+	 * This searches for the .rodata section or multiple .rodata.func_name
+	 * sections if -fdata-sections is being used. The .str.1.1 and .str.1.8
+	 * rodata sections are ignored as they don't contain jump tables.
+	 */
+	for_each_sec(file, sec) {
+		if (!strncmp(sec->name, ".rodata", 7) &&
+		    !strstr(sec->name, ".str1.")) {
+			sec->rodata = true;
+			found = true;
+		}
+	}
+
+	file->rodata = found;
+}
+
 static int decode_sections(struct objtool_file *file)
 {
 	int ret;
 
+	mark_rodata(file);
+
 	ret = decode_instructions(file);
 	if (ret)
 		return ret;
@@ -2171,7 +2198,6 @@ int check(const char *_objname, bool orc)
 	INIT_LIST_HEAD(&file.insn_list);
 	hash_init(file.insn_hash);
 	file.whitelist = find_section_by_name(file.elf, ".discard.func_stack_frame_non_standard");
-	file.rodata = find_section_by_name(file.elf, ".rodata");
 	file.c_file = find_section_by_name(file.elf, ".comment");
 	file.ignore_unreachables = no_unreachable;
 	file.hints = false;
diff --git a/tools/objtool/check.h b/tools/objtool/check.h
index 95700a2bcb7c1..e6e8a655b5563 100644
--- a/tools/objtool/check.h
+++ b/tools/objtool/check.h
@@ -60,8 +60,8 @@ struct objtool_file {
 	struct elf *elf;
 	struct list_head insn_list;
 	DECLARE_HASHTABLE(insn_hash, 16);
-	struct section *rodata, *whitelist;
-	bool ignore_unreachables, c_file, hints;
+	struct section *whitelist;
+	bool ignore_unreachables, c_file, hints, rodata;
 };
 
 int check(const char *objname, bool orc);
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 7ec85d567598c..f7082de1ee829 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -379,6 +379,7 @@ static int read_relas(struct elf *elf)
 			rela->offset = rela->rela.r_offset;
 			symndx = GELF_R_SYM(rela->rela.r_info);
 			rela->sym = find_symbol_by_index(elf, symndx);
+			rela->rela_sec = sec;
 			if (!rela->sym) {
 				WARN("can't find rela entry symbol %d for %s",
 				     symndx, sec->name);
diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h
index de5cd2ddded98..bc97ed86b9cd8 100644
--- a/tools/objtool/elf.h
+++ b/tools/objtool/elf.h
@@ -48,7 +48,7 @@ struct section {
 	char *name;
 	int idx;
 	unsigned int len;
-	bool changed, text;
+	bool changed, text, rodata;
 };
 
 struct symbol {
@@ -68,6 +68,7 @@ struct rela {
 	struct list_head list;
 	struct hlist_node hash;
 	GElf_Rela rela;
+	struct section *rela_sec;
 	struct symbol *sym;
 	unsigned int type;
 	unsigned long offset;
-- 
GitLab


From 0b59c25f91002c1dec0d0d848e5aaefa5f213c85 Mon Sep 17 00:00:00 2001
From: Igor Stoppa <igor.stoppa@gmail.com>
Date: Fri, 31 Aug 2018 22:44:22 +0300
Subject: [PATCH 0011/1890] mm: percpu: remove unnecessary unlikely()

WARN_ON() already contains an unlikely(), so it's not necessary to
wrap it into another.

Signed-off-by: Igor Stoppa <igor.stoppa@huawei.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: zijun_hu <zijun_hu@htc.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: linux-mm@kvack.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Dennis Zhou <dennis@kernel.org>
---
 mm/percpu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/percpu.c b/mm/percpu.c
index a749d4d96e3ec..f5c2796fe63e5 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -2588,7 +2588,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
 	BUG_ON(ai->nr_groups != 1);
 	upa = ai->alloc_size/ai->unit_size;
 	nr_g0_units = roundup(num_possible_cpus(), upa);
-	if (unlikely(WARN_ON(ai->groups[0].nr_units != nr_g0_units))) {
+	if (WARN_ON(ai->groups[0].nr_units != nr_g0_units)) {
 		pcpu_free_alloc_info(ai);
 		return -EINVAL;
 	}
-- 
GitLab


From 1f8266ff58840d698a1e96d2274189de1bdf7969 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Thu, 13 Sep 2018 18:12:09 +0200
Subject: [PATCH 0012/1890] apparmor: don't try to replace stale label in
 ptrace access check

As a comment above begin_current_label_crit_section() explains,
begin_current_label_crit_section() must run in sleepable context because
when label_is_stale() is true, aa_replace_current_label() runs, which uses
prepare_creds(), which can sleep.
Until now, the ptrace access check (which runs with a task lock held)
violated this rule.

Also add a might_sleep() assertion to begin_current_label_crit_section(),
because asserts are less likely to be ignored than comments.

Fixes: b2d09ae449ced ("apparmor: move ptrace checks to using labels")
Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/include/cred.h | 2 ++
 security/apparmor/lsm.c          | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/security/apparmor/include/cred.h b/security/apparmor/include/cred.h
index e287b7d0d4beb..265ae6641a064 100644
--- a/security/apparmor/include/cred.h
+++ b/security/apparmor/include/cred.h
@@ -151,6 +151,8 @@ static inline struct aa_label *begin_current_label_crit_section(void)
 {
 	struct aa_label *label = aa_current_raw_label();
 
+	might_sleep();
+
 	if (label_is_stale(label)) {
 		label = aa_get_newest_label(label);
 		if (aa_replace_current_label(label) == 0)
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index 74f17376202bd..f09fea0b4db7e 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -114,13 +114,13 @@ static int apparmor_ptrace_access_check(struct task_struct *child,
 	struct aa_label *tracer, *tracee;
 	int error;
 
-	tracer = begin_current_label_crit_section();
+	tracer = __begin_current_label_crit_section();
 	tracee = aa_get_task_label(child);
 	error = aa_may_ptrace(tracer, tracee,
 			(mode & PTRACE_MODE_READ) ? AA_PTRACE_READ
 						  : AA_PTRACE_TRACE);
 	aa_put_label(tracee);
-	end_current_label_crit_section(tracer);
+	__end_current_label_crit_section(tracer);
 
 	return error;
 }
-- 
GitLab


From b5bb425871186303e6936fa2581521bdd1964a58 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Tue, 25 Sep 2018 12:44:59 -0700
Subject: [PATCH 0013/1890] arm64: percpu: Initialize ret in the default case

Clang warns that if the default case is taken, ret will be
uninitialized.

./arch/arm64/include/asm/percpu.h:196:2: warning: variable 'ret' is used
uninitialized whenever switch default is taken
[-Wsometimes-uninitialized]
        default:
        ^~~~~~~
./arch/arm64/include/asm/percpu.h:200:9: note: uninitialized use occurs
here
        return ret;
               ^~~
./arch/arm64/include/asm/percpu.h:157:19: note: initialize the variable
'ret' to silence this warning
        unsigned long ret, loop;
                         ^
                          = 0

This warning appears several times while building the erofs filesystem.
While it's not strictly wrong, the BUILD_BUG will prevent this from
becoming a true problem. Initialize ret to 0 in the default case right
before the BUILD_BUG to silence all of these warnings.

Reported-by: Prasad Sodagudi <psodagud@codeaurora.org>
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Dennis Zhou <dennis@kernel.org>
---
 arch/arm64/include/asm/percpu.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h
index 9234013e759e5..21a81b59a0ccd 100644
--- a/arch/arm64/include/asm/percpu.h
+++ b/arch/arm64/include/asm/percpu.h
@@ -96,6 +96,7 @@ static inline unsigned long __percpu_##op(void *ptr,			\
 		: [val] "Ir" (val));					\
 		break;							\
 	default:							\
+		ret = 0;						\
 		BUILD_BUG();						\
 	}								\
 									\
@@ -125,6 +126,7 @@ static inline unsigned long __percpu_read(void *ptr, int size)
 		ret = READ_ONCE(*(u64 *)ptr);
 		break;
 	default:
+		ret = 0;
 		BUILD_BUG();
 	}
 
@@ -194,6 +196,7 @@ static inline unsigned long __percpu_xchg(void *ptr, unsigned long val,
 		: [val] "r" (val));
 		break;
 	default:
+		ret = 0;
 		BUILD_BUG();
 	}
 
-- 
GitLab


From f31fb2fe1ea4a03279c538612e7ecf0e5017639a Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Fri, 21 Sep 2018 12:13:07 +0200
Subject: [PATCH 0014/1890] pwm: tegra: Remove gratuituous blank line

It's common to follow a device tree ID table by the MODULE_DEVICE_TABLE
immediately, without an extra blank line between.

Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-tegra.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/pwm/pwm-tegra.c b/drivers/pwm/pwm-tegra.c
index f8ebbece57b71..48c4595a0ffce 100644
--- a/drivers/pwm/pwm-tegra.c
+++ b/drivers/pwm/pwm-tegra.c
@@ -300,7 +300,6 @@ static const struct of_device_id tegra_pwm_of_match[] = {
 	{ .compatible = "nvidia,tegra186-pwm", .data = &tegra186_pwm_soc },
 	{ }
 };
-
 MODULE_DEVICE_TABLE(of, tegra_pwm_of_match);
 
 static const struct dev_pm_ops tegra_pwm_pm_ops = {
-- 
GitLab


From 29efbc6aea9d9bd9aa9870a9afc1882046303cf9 Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Date: Thu, 30 Aug 2018 19:07:27 +0200
Subject: [PATCH 0015/1890] Compiler Attributes: remove unused attributes

__optimize and __deprecate_for_modules are unused in
the whole kernel tree. Simply drop them.

Tested-by: Sedat Dilek <sedat.dilek@gmail.com> # on top of v4.19-rc5, clang 7
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 include/linux/compiler-gcc.h   | 2 --
 include/linux/compiler.h       | 4 ----
 include/linux/compiler_types.h | 1 -
 3 files changed, 7 deletions(-)

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 4d36b27214fda..1302b425e625b 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -81,8 +81,6 @@
 
 #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
 
-#define __optimize(level)	__attribute__((__optimize__(level)))
-
 #define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
 
 #ifndef __CHECKER__
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 681d866efb1eb..7c0157d509640 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -301,10 +301,6 @@ static inline void *offset_to_ptr(const int *off)
 
 #endif /* __ASSEMBLY__ */
 
-#ifndef __optimize
-# define __optimize(level)
-#endif
-
 /* Compile time object size, -1 for unknown */
 #ifndef __compiletime_object_size
 # define __compiletime_object_size(obj) -1
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index db192becfec49..a19562cb047c8 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -108,7 +108,6 @@ struct ftrace_likely_data {
 
 /* Don't. Just don't. */
 #define __deprecated
-#define __deprecated_for_modules
 
 #endif /* __KERNEL__ */
 
-- 
GitLab


From 5c67a52f3da0f0d22764f2daec417702695a8112 Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Date: Thu, 30 Aug 2018 19:13:37 +0200
Subject: [PATCH 0016/1890] Compiler Attributes: always use the
 extra-underscores syntax

The attribute syntax optionally allows to surround attribute names
with "__" in order to avoid collisions with macros of the same name
(see https://gcc.gnu.org/onlinedocs/gcc/Attribute-Syntax.html).

This homogenizes all attributes to use the syntax with underscores.
While there are currently only a handful of cases of some TUs defining
macros like "error" which may collide with the attributes,
this should prevent futures surprises.

This has been done only for "standard" attributes supported by
the major compilers. In other words, those of third-party tools
(e.g. sparse, plugins...) have not been changed for the moment.

Tested-by: Sedat Dilek <sedat.dilek@gmail.com> # on top of v4.19-rc5, clang 7
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 include/linux/compiler-clang.h |  2 +-
 include/linux/compiler-gcc.h   | 12 +++++-----
 include/linux/compiler-intel.h |  2 +-
 include/linux/compiler.h       |  8 +++----
 include/linux/compiler_types.h | 42 +++++++++++++++++-----------------
 5 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index b1ce500fe8b3d..d11cad61ba5c6 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -21,7 +21,7 @@
 #define __SANITIZE_ADDRESS__
 #endif
 
-#define __no_sanitize_address __attribute__((no_sanitize("address")))
+#define __no_sanitize_address __attribute__((__no_sanitize__("address")))
 
 /*
  * Not all versions of clang implement the the type-generic versions
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 1302b425e625b..7a1de7683df5b 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -76,7 +76,7 @@
 #endif
 
 #ifdef RETPOLINE
-#define __noretpoline __attribute__((indirect_branch("keep")))
+#define __noretpoline __attribute__((__indirect_branch__("keep")))
 #endif
 
 #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
@@ -84,8 +84,8 @@
 #define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
 
 #ifndef __CHECKER__
-#define __compiletime_warning(message) __attribute__((warning(message)))
-#define __compiletime_error(message) __attribute__((error(message)))
+#define __compiletime_warning(message) __attribute__((__warning__(message)))
+#define __compiletime_error(message) __attribute__((__error__(message)))
 
 #ifdef LATENT_ENTROPY_PLUGIN
 #define __latent_entropy __attribute__((latent_entropy))
@@ -134,7 +134,7 @@
  * optimizer that something else uses this function or variable, thus preventing
  * this.
  */
-#define __visible	__attribute__((externally_visible))
+#define __visible	__attribute__((__externally_visible__))
 
 /* gcc version specific checks */
 
@@ -191,7 +191,7 @@
  * should not be applied to that function.
  * Conflicts with inlining: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368
  */
-#define __no_sanitize_address __attribute__((no_sanitize_address))
+#define __no_sanitize_address __attribute__((__no_sanitize_address__))
 #endif
 
 #if GCC_VERSION >= 50100
@@ -199,7 +199,7 @@
  * Mark structures as requiring designated initializers.
  * https://gcc.gnu.org/onlinedocs/gcc/Designated-Inits.html
  */
-#define __designated_init __attribute__((designated_init))
+#define __designated_init __attribute__((__designated_init__))
 #define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1
 #endif
 
diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h
index 4c7f9befa9f6c..fef8bb3e53efe 100644
--- a/include/linux/compiler-intel.h
+++ b/include/linux/compiler-intel.h
@@ -42,4 +42,4 @@
  * and may be redefined here because they should not be shared with other
  * compilers, like clang.
  */
-#define __visible	__attribute__((externally_visible))
+#define __visible	__attribute__((__externally_visible__))
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 7c0157d509640..ec4a28bad2c6f 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -24,7 +24,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 			long ______r;					\
 			static struct ftrace_likely_data		\
 				__attribute__((__aligned__(4)))		\
-				__attribute__((section("_ftrace_annotated_branch"))) \
+				__attribute__((__section__("_ftrace_annotated_branch"))) \
 				______f = {				\
 				.data.func = __func__,			\
 				.data.file = __FILE__,			\
@@ -60,7 +60,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 		int ______r;						\
 		static struct ftrace_branch_data			\
 			__attribute__((__aligned__(4)))			\
-			__attribute__((section("_ftrace_branch")))	\
+			__attribute__((__section__("_ftrace_branch")))	\
 			______f = {					\
 				.func = __func__,			\
 				.file = __FILE__,			\
@@ -146,7 +146,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 	extern typeof(sym) sym;					\
 	static const unsigned long __kentry_##sym		\
 	__used							\
-	__attribute__((section("___kentry" "+" #sym ), used))	\
+	__attribute__((__section__("___kentry" "+" #sym ), used))	\
 	= (unsigned long)&sym;
 #endif
 
@@ -287,7 +287,7 @@ unsigned long read_word_at_a_time(const void *addr)
  * visible to the compiler.
  */
 #define __ADDRESSABLE(sym) \
-	static void * __attribute__((section(".discard.addressable"), used)) \
+	static void * __attribute__((__section__(".discard.addressable"), used)) \
 		__PASTE(__addressable_##sym, __LINE__) = (void *)&sym;
 
 /**
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index a19562cb047c8..8fbdd47dd3d0d 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -195,26 +195,26 @@ struct ftrace_likely_data {
  * would be.
  * [...]
  */
-#define __pure			__attribute__((pure))
-#define __aligned(x)		__attribute__((aligned(x)))
-#define __aligned_largest	__attribute__((aligned))
-#define __printf(a, b)		__attribute__((format(printf, a, b)))
-#define __scanf(a, b)		__attribute__((format(scanf, a, b)))
-#define __maybe_unused		__attribute__((unused))
-#define __always_unused		__attribute__((unused))
-#define __mode(x)		__attribute__((mode(x)))
+#define __pure			__attribute__((__pure__))
+#define __aligned(x)		__attribute__((__aligned__(x)))
+#define __aligned_largest	__attribute__((__aligned__))
+#define __printf(a, b)		__attribute__((__format__(printf, a, b)))
+#define __scanf(a, b)		__attribute__((__format__(scanf, a, b)))
+#define __maybe_unused		__attribute__((__unused__))
+#define __always_unused		__attribute__((__unused__))
+#define __mode(x)		__attribute__((__mode__(x)))
 #define __malloc		__attribute__((__malloc__))
 #define __used			__attribute__((__used__))
-#define __noreturn		__attribute__((noreturn))
-#define __packed		__attribute__((packed))
-#define __weak			__attribute__((weak))
-#define __alias(symbol)		__attribute__((alias(#symbol)))
-#define __cold			__attribute__((cold))
+#define __noreturn		__attribute__((__noreturn__))
+#define __packed		__attribute__((__packed__))
+#define __weak			__attribute__((__weak__))
+#define __alias(symbol)		__attribute__((__alias__(#symbol)))
+#define __cold			__attribute__((__cold__))
 #define __section(S)		__attribute__((__section__(#S)))
 
 
 #ifdef CONFIG_ENABLE_MUST_CHECK
-#define __must_check		__attribute__((warn_unused_result))
+#define __must_check		__attribute__((__warn_unused_result__))
 #else
 #define __must_check
 #endif
@@ -222,7 +222,7 @@ struct ftrace_likely_data {
 #if defined(CC_USING_HOTPATCH) && !defined(__CHECKER__)
 #define notrace			__attribute__((hotpatch(0, 0)))
 #else
-#define notrace			__attribute__((no_instrument_function))
+#define notrace			__attribute__((__no_instrument_function__))
 #endif
 
 /*
@@ -231,7 +231,7 @@ struct ftrace_likely_data {
  * stack and frame pointer being set up and there is no chance to
  * restore the lr register to the value before mcount was called.
  */
-#define __naked			__attribute__((naked)) notrace
+#define __naked			__attribute__((__naked__)) notrace
 
 #define __compiler_offsetof(a, b)	__builtin_offsetof(a, b)
 
@@ -242,7 +242,7 @@ struct ftrace_likely_data {
  * defined so the gnu89 semantics are the default.
  */
 #ifdef __GNUC_STDC_INLINE__
-# define __gnu_inline	__attribute__((gnu_inline))
+# define __gnu_inline	__attribute__((__gnu_inline__))
 #else
 # define __gnu_inline
 #endif
@@ -262,17 +262,17 @@ struct ftrace_likely_data {
 #if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) || \
 	!defined(CONFIG_OPTIMIZE_INLINING)
 #define inline \
-	inline __attribute__((always_inline, unused)) notrace __gnu_inline
+	inline __attribute__((__always_inline__, __unused__)) notrace __gnu_inline
 #else
-#define inline inline	__attribute__((unused)) notrace __gnu_inline
+#define inline inline	__attribute__((__unused__)) notrace __gnu_inline
 #endif
 
 #define __inline__ inline
 #define __inline inline
-#define noinline	__attribute__((noinline))
+#define noinline	__attribute__((__noinline__))
 
 #ifndef __always_inline
-#define __always_inline inline __attribute__((always_inline))
+#define __always_inline inline __attribute__((__always_inline__))
 #endif
 
 /*
-- 
GitLab


From c2c640aa04cc4e6caf0ff17ff18b3784e0c99566 Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Date: Thu, 30 Aug 2018 19:45:01 +0200
Subject: [PATCH 0017/1890] Compiler Attributes: remove unneeded tests

Attributes const and always_inline have tests around them
which are unneeded, since they are supported by gcc >= 4.6,
clang >= 3 and icc >= 13. https://godbolt.org/z/DFPq37

In the case of gnu_inline, we do not need to test for
__GNUC_STDC_INLINE__ because, regardless of the current
inlining behavior, we can simply always force the old
GCC inlining behavior by using the attribute in all cases.

Tested-by: Sedat Dilek <sedat.dilek@gmail.com> # on top of v4.19-rc5, clang 7
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 include/linux/compiler_types.h | 23 +++--------------------
 1 file changed, 3 insertions(+), 20 deletions(-)

diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 8fbdd47dd3d0d..5ff9cda893f4a 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -158,10 +158,6 @@ struct ftrace_likely_data {
 	(sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || \
 	 sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long))
 
-#ifndef __attribute_const__
-#define __attribute_const__	__attribute__((__const__))
-#endif
-
 #ifndef __noclone
 #define __noclone
 #endif
@@ -196,6 +192,7 @@ struct ftrace_likely_data {
  * [...]
  */
 #define __pure			__attribute__((__pure__))
+#define __attribute_const__	__attribute__((__const__))
 #define __aligned(x)		__attribute__((__aligned__(x)))
 #define __aligned_largest	__attribute__((__aligned__))
 #define __printf(a, b)		__attribute__((__format__(printf, a, b)))
@@ -211,6 +208,8 @@ struct ftrace_likely_data {
 #define __alias(symbol)		__attribute__((__alias__(#symbol)))
 #define __cold			__attribute__((__cold__))
 #define __section(S)		__attribute__((__section__(#S)))
+#define __always_inline		inline __attribute__((__always_inline__))
+#define __gnu_inline		__attribute__((__gnu_inline__))
 
 
 #ifdef CONFIG_ENABLE_MUST_CHECK
@@ -235,18 +234,6 @@ struct ftrace_likely_data {
 
 #define __compiler_offsetof(a, b)	__builtin_offsetof(a, b)
 
-/*
- * Feature detection for gnu_inline (gnu89 extern inline semantics). Either
- * __GNUC_STDC_INLINE__ is defined (not using gnu89 extern inline semantics,
- * and we opt in to the gnu89 semantics), or __GNUC_STDC_INLINE__ is not
- * defined so the gnu89 semantics are the default.
- */
-#ifdef __GNUC_STDC_INLINE__
-# define __gnu_inline	__attribute__((__gnu_inline__))
-#else
-# define __gnu_inline
-#endif
-
 /*
  * Force always-inline if the user requests it so via the .config.
  * GCC does not warn about unused static inline functions for
@@ -271,10 +258,6 @@ struct ftrace_likely_data {
 #define __inline inline
 #define noinline	__attribute__((__noinline__))
 
-#ifndef __always_inline
-#define __always_inline inline __attribute__((__always_inline__))
-#endif
-
 /*
  * Rather then using noinline to prevent stack consumption, use
  * noinline_for_stack instead.  For documentation reasons.
-- 
GitLab


From ec0bbef66f867854691d5af18c2231d746958e0e Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Date: Thu, 30 Aug 2018 19:25:14 +0200
Subject: [PATCH 0018/1890] Compiler Attributes: homogenize __must_be_array

Different definitions of __must_be_array:

  * gcc: disabled for __CHECKER__

  * clang: same definition as gcc's, but without __CHECKER__

  * intel: the comment claims __builtin_types_compatible_p()
    is unsupported; but icc seems to support it since 13.0.1
    (released in 2012). See https://godbolt.org/z/S0l6QQ

Therefore, we can remove all of them and have a single definition
in compiler.h

Tested-by: Sedat Dilek <sedat.dilek@gmail.com> # on top of v4.19-rc5, clang 7
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 include/linux/compiler-clang.h | 1 -
 include/linux/compiler-gcc.h   | 7 -------
 include/linux/compiler-intel.h | 3 ---
 include/linux/compiler.h       | 7 +++++++
 4 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index d11cad61ba5c6..fa9532f8d8854 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -41,6 +41,5 @@
  * compilers, like ICC.
  */
 #define barrier() __asm__ __volatile__("" : : : "memory")
-#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
 #define __assume_aligned(a, ...)	\
 	__attribute__((__assume_aligned__(a, ## __VA_ARGS__)))
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 7a1de7683df5b..3b32bbfa5a491 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -68,13 +68,6 @@
  */
 #define uninitialized_var(x) x = x
 
-#ifdef __CHECKER__
-#define __must_be_array(a)	0
-#else
-/* &a[0] degrades to a pointer: a different type from an array */
-#define __must_be_array(a)	BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
-#endif
-
 #ifdef RETPOLINE
 #define __noretpoline __attribute__((__indirect_branch__("keep")))
 #endif
diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h
index fef8bb3e53efe..6004b4588bd44 100644
--- a/include/linux/compiler-intel.h
+++ b/include/linux/compiler-intel.h
@@ -29,9 +29,6 @@
  */
 #define OPTIMIZER_HIDE_VAR(var) barrier()
 
-/* Intel ECC compiler doesn't support __builtin_types_compatible_p() */
-#define __must_be_array(a) 0
-
 #endif
 
 /* icc has this, but it's called _bswap16 */
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index ec4a28bad2c6f..165b1d5683edd 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -357,4 +357,11 @@ static inline void *offset_to_ptr(const int *off)
 	compiletime_assert(__native_word(t),				\
 		"Need native word sized stores/loads for atomicity.")
 
+#ifdef __CHECKER__
+#define __must_be_array(a)	0
+#else
+/* &a[0] degrades to a pointer: a different type from an array */
+#define __must_be_array(a)	BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
+#endif
+
 #endif /* __LINUX_COMPILER_H */
-- 
GitLab


From 989bd5000f36052df604888ed12bb6ef390786b7 Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Date: Fri, 31 Aug 2018 18:00:16 +0200
Subject: [PATCH 0019/1890] Compiler Attributes: remove unneeded sparse
 (__CHECKER__) tests

Sparse knows about a few more attributes now, so we can remove
the __CHECKER__ conditions from them (which, in turn, allow us
to move some of them later on to compiler_attributes.h).

  * assume_aligned: since sparse's commit ffc860b ("sparse:
    ignore __assume_aligned__ attribute"), included in 0.5.1

  * error: since sparse's commit 0a04210 ("sparse: Add 'error'
    to ignored attributes"), included in 0.5.0

  * hotpatch: since sparse's commit 6043210 ("sparse/parse.c:
    ignore hotpatch attribute"), included in 0.5.1

  * warning: since sparse's commit 977365d ("Avoid "attribute
    'warning': unknown attribute" warning"), included in 0.4.2

On top of that, __must_be_array does not need it either because:

  * Even ancient versions of sparse do not have a problem

  * BUILD_BUG_ON_ZERO() is currently disabled for __CHECKER__

Tested-by: Sedat Dilek <sedat.dilek@gmail.com> # on top of v4.19-rc5, clang 7
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 include/linux/compiler-gcc.h   | 6 ++----
 include/linux/compiler.h       | 4 ----
 include/linux/compiler_types.h | 2 +-
 3 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 3b32bbfa5a491..1ca6a51cfaa9f 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -76,14 +76,12 @@
 
 #define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
 
-#ifndef __CHECKER__
 #define __compiletime_warning(message) __attribute__((__warning__(message)))
 #define __compiletime_error(message) __attribute__((__error__(message)))
 
-#ifdef LATENT_ENTROPY_PLUGIN
+#if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__)
 #define __latent_entropy __attribute__((latent_entropy))
 #endif
-#endif /* __CHECKER__ */
 
 /*
  * calling noreturn functions, __builtin_unreachable() and __builtin_trap()
@@ -131,7 +129,7 @@
 
 /* gcc version specific checks */
 
-#if GCC_VERSION >= 40900 && !defined(__CHECKER__)
+#if GCC_VERSION >= 40900
 /*
  * __assume_aligned(n, k): Tell the optimizer that the returned
  * pointer can be assumed to be k modulo n. The second argument is
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 165b1d5683edd..4030a2940d6bf 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -357,11 +357,7 @@ static inline void *offset_to_ptr(const int *off)
 	compiletime_assert(__native_word(t),				\
 		"Need native word sized stores/loads for atomicity.")
 
-#ifdef __CHECKER__
-#define __must_be_array(a)	0
-#else
 /* &a[0] degrades to a pointer: a different type from an array */
 #define __must_be_array(a)	BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
-#endif
 
 #endif /* __LINUX_COMPILER_H */
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 5ff9cda893f4a..a3eceb3ad1b35 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -218,7 +218,7 @@ struct ftrace_likely_data {
 #define __must_check
 #endif
 
-#if defined(CC_USING_HOTPATCH) && !defined(__CHECKER__)
+#if defined(CC_USING_HOTPATCH)
 #define notrace			__attribute__((hotpatch(0, 0)))
 #else
 #define notrace			__attribute__((__no_instrument_function__))
-- 
GitLab


From 66dbeef915f275dad6c8656b31667ef9640f5639 Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Date: Mon, 3 Sep 2018 18:34:18 +0200
Subject: [PATCH 0020/1890] Compiler Attributes: add missing SPDX ID in
 compiler_types.h

Tested-by: Sedat Dilek <sedat.dilek@gmail.com> # on top of v4.19-rc5, clang 7
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 include/linux/compiler_types.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index a3eceb3ad1b35..ed7c0e4a180e6 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __LINUX_COMPILER_TYPES_H
 #define __LINUX_COMPILER_TYPES_H
 
-- 
GitLab


From a3f8a30f3f0079c7edfc72e329eee8594fb3e3cb Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Date: Thu, 30 Aug 2018 20:36:59 +0200
Subject: [PATCH 0021/1890] Compiler Attributes: use feature checks instead of
 version checks

Instead of using version checks per-compiler to define (or not)
each attribute, use __has_attribute to test for them, following
the cleanup started with commit 815f0ddb346c
("include/linux/compiler*.h: make compiler-*.h mutually exclusive"),
which is supported on gcc >= 5, clang >= 2.9 and icc >= 17.
In the meantime, to support 4.6 <= gcc < 5, we implement
__has_attribute by hand.

All the attributes that can be unconditionally defined and directly
map to compiler attribute(s) (even if optional) have been moved
to a new file include/linux/compiler_attributes.h

In an effort to make the file as regular as possible, comments
stating the purpose of attributes have been removed. Instead,
links to the compiler docs have been added (i.e. to gcc and,
if available, to clang as well). In addition, they have been sorted.

Finally, if an attribute is optional (i.e. if it is guarded
by __has_attribute), the reason has been stated for future reference.

Tested-by: Sedat Dilek <sedat.dilek@gmail.com> # on top of v4.19-rc5, clang 7
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 include/linux/compiler-clang.h      |   4 -
 include/linux/compiler-gcc.h        |  51 ------
 include/linux/compiler-intel.h      |   6 -
 include/linux/compiler_attributes.h | 244 ++++++++++++++++++++++++++++
 include/linux/compiler_types.h      |  74 ++-------
 5 files changed, 254 insertions(+), 125 deletions(-)
 create mode 100644 include/linux/compiler_attributes.h

diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index fa9532f8d8854..3e7dafb3ea809 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -21,8 +21,6 @@
 #define __SANITIZE_ADDRESS__
 #endif
 
-#define __no_sanitize_address __attribute__((__no_sanitize__("address")))
-
 /*
  * Not all versions of clang implement the the type-generic versions
  * of the builtin overflow checkers. Fortunately, clang implements
@@ -41,5 +39,3 @@
  * compilers, like ICC.
  */
 #define barrier() __asm__ __volatile__("" : : : "memory")
-#define __assume_aligned(a, ...)	\
-	__attribute__((__assume_aligned__(a, ## __VA_ARGS__)))
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 1ca6a51cfaa9f..cfac027e16251 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -108,9 +108,6 @@
 		__builtin_unreachable();	\
 	} while (0)
 
-/* Mark a function definition as prohibited from being cloned. */
-#define __noclone	__attribute__((__noclone__, __optimize__("no-tracer")))
-
 #if defined(RANDSTRUCT_PLUGIN) && !defined(__CHECKER__)
 #define __randomize_layout __attribute__((randomize_layout))
 #define __no_randomize_layout __attribute__((no_randomize_layout))
@@ -119,32 +116,6 @@
 #define randomized_struct_fields_end	} __randomize_layout;
 #endif
 
-/*
- * When used with Link Time Optimization, gcc can optimize away C functions or
- * variables which are referenced only from assembly code.  __visible tells the
- * optimizer that something else uses this function or variable, thus preventing
- * this.
- */
-#define __visible	__attribute__((__externally_visible__))
-
-/* gcc version specific checks */
-
-#if GCC_VERSION >= 40900
-/*
- * __assume_aligned(n, k): Tell the optimizer that the returned
- * pointer can be assumed to be k modulo n. The second argument is
- * optional (default 0), so we use a variadic macro to make the
- * shorthand.
- *
- * Beware: Do not apply this to functions which may return
- * ERR_PTRs. Also, it is probably unwise to apply it to functions
- * returning extra information in the low bits (but in that case the
- * compiler should see some alignment anyway, when the return value is
- * massaged by 'flags = ptr & 3; ptr &= ~3;').
- */
-#define __assume_aligned(a, ...) __attribute__((__assume_aligned__(a, ## __VA_ARGS__)))
-#endif
-
 /*
  * GCC 'asm goto' miscompiles certain code sequences:
  *
@@ -176,32 +147,10 @@
 #define KASAN_ABI_VERSION 3
 #endif
 
-#if GCC_VERSION >= 40902
-/*
- * Tell the compiler that address safety instrumentation (KASAN)
- * should not be applied to that function.
- * Conflicts with inlining: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368
- */
-#define __no_sanitize_address __attribute__((__no_sanitize_address__))
-#endif
-
 #if GCC_VERSION >= 50100
-/*
- * Mark structures as requiring designated initializers.
- * https://gcc.gnu.org/onlinedocs/gcc/Designated-Inits.html
- */
-#define __designated_init __attribute__((__designated_init__))
 #define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1
 #endif
 
-#if !defined(__noclone)
-#define __noclone	/* not needed */
-#endif
-
-#if !defined(__no_sanitize_address)
-#define __no_sanitize_address
-#endif
-
 /*
  * Turn individual warnings and errors on and off locally, depending
  * on version.
diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h
index 6004b4588bd44..517bd14e12224 100644
--- a/include/linux/compiler-intel.h
+++ b/include/linux/compiler-intel.h
@@ -34,9 +34,3 @@
 /* icc has this, but it's called _bswap16 */
 #define __HAVE_BUILTIN_BSWAP16__
 #define __builtin_bswap16 _bswap16
-
-/* The following are for compatibility with GCC, from compiler-gcc.h,
- * and may be redefined here because they should not be shared with other
- * compilers, like clang.
- */
-#define __visible	__attribute__((__externally_visible__))
diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h
new file mode 100644
index 0000000000000..f0f9fc398440f
--- /dev/null
+++ b/include/linux/compiler_attributes.h
@@ -0,0 +1,244 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_COMPILER_ATTRIBUTES_H
+#define __LINUX_COMPILER_ATTRIBUTES_H
+
+/*
+ * The attributes in this file are unconditionally defined and they directly
+ * map to compiler attribute(s) -- except those that are optional.
+ *
+ * Any other "attributes" (i.e. those that depend on a configuration option,
+ * on a compiler, on an architecture, on plugins, on other attributes...)
+ * should be defined elsewhere (e.g. compiler_types.h or compiler-*.h).
+ *
+ * This file is meant to be sorted (by actual attribute name,
+ * not by #define identifier). Use the __attribute__((__name__)) syntax
+ * (i.e. with underscores) to avoid future collisions with other macros.
+ * If an attribute is optional, state the reason in the comment.
+ */
+
+/*
+ * To check for optional attributes, we use __has_attribute, which is supported
+ * on gcc >= 5, clang >= 2.9 and icc >= 17. In the meantime, to support
+ * 4.6 <= gcc < 5, we implement __has_attribute by hand.
+ *
+ * sparse does not support __has_attribute (yet) and defines __GNUC_MINOR__
+ * depending on the compiler used to build it; however, these attributes have
+ * no semantic effects for sparse, so it does not matter. Also note that,
+ * in order to avoid sparse's warnings, even the unsupported ones must be
+ * defined to 0.
+ */
+#ifndef __has_attribute
+# define __has_attribute(x) __GCC4_has_attribute_##x
+# define __GCC4_has_attribute___assume_aligned__      (__GNUC_MINOR__ >= 9)
+# define __GCC4_has_attribute___designated_init__     0
+# define __GCC4_has_attribute___externally_visible__  1
+# define __GCC4_has_attribute___noclone__             1
+# define __GCC4_has_attribute___optimize__            1
+# define __GCC4_has_attribute___no_sanitize_address__ (__GNUC_MINOR__ >= 8)
+#endif
+
+/*
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-alias-function-attribute
+ */
+#define __alias(symbol)                 __attribute__((__alias__(#symbol)))
+
+/*
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-aligned-function-attribute
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-aligned-type-attribute
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-aligned-variable-attribute
+ */
+#define __aligned(x)                    __attribute__((__aligned__(x)))
+#define __aligned_largest               __attribute__((__aligned__))
+
+/*
+ * Note: users of __always_inline currently do not write "inline" themselves,
+ * which seems to be required by gcc to apply the attribute according
+ * to its docs (and also "warning: always_inline function might not be
+ * inlinable [-Wattributes]" is emitted).
+ *
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-always_005finline-function-attribute
+ * clang: mentioned
+ */
+#define __always_inline                 inline __attribute__((__always_inline__))
+
+/*
+ * The second argument is optional (default 0), so we use a variadic macro
+ * to make the shorthand.
+ *
+ * Beware: Do not apply this to functions which may return
+ * ERR_PTRs. Also, it is probably unwise to apply it to functions
+ * returning extra information in the low bits (but in that case the
+ * compiler should see some alignment anyway, when the return value is
+ * massaged by 'flags = ptr & 3; ptr &= ~3;').
+ *
+ * Optional: only supported since gcc >= 4.9
+ * Optional: not supported by icc
+ *
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-assume_005faligned-function-attribute
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#assume-aligned
+ */
+#if __has_attribute(__assume_aligned__)
+# define __assume_aligned(a, ...)       __attribute__((__assume_aligned__(a, ## __VA_ARGS__)))
+#else
+# define __assume_aligned(a, ...)
+#endif
+
+/*
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-cold-function-attribute
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Label-Attributes.html#index-cold-label-attribute
+ */
+#define __cold                          __attribute__((__cold__))
+
+/*
+ * Note the long name.
+ *
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-const-function-attribute
+ */
+#define __attribute_const__             __attribute__((__const__))
+
+/*
+ * Don't. Just don't. See commit 771c035372a0 ("deprecate the '__deprecated'
+ * attribute warnings entirely and for good") for more information.
+ *
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-deprecated-function-attribute
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-deprecated-type-attribute
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-deprecated-variable-attribute
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Enumerator-Attributes.html#index-deprecated-enumerator-attribute
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#deprecated
+ */
+#define __deprecated
+
+/*
+ * Optional: only supported since gcc >= 5.1
+ * Optional: not supported by clang
+ * Optional: not supported by icc
+ *
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-designated_005finit-type-attribute
+ */
+#if __has_attribute(__designated_init__)
+# define __designated_init              __attribute__((__designated_init__))
+#else
+# define __designated_init
+#endif
+
+/*
+ * Optional: not supported by clang
+ *
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-externally_005fvisible-function-attribute
+ */
+#if __has_attribute(__externally_visible__)
+# define __visible                      __attribute__((__externally_visible__))
+#else
+# define __visible
+#endif
+
+/*
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-format-function-attribute
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#format
+ */
+#define __printf(a, b)                  __attribute__((__format__(printf, a, b)))
+#define __scanf(a, b)                   __attribute__((__format__(scanf, a, b)))
+
+/*
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-gnu_005finline-function-attribute
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#gnu-inline
+ */
+#define __gnu_inline                    __attribute__((__gnu_inline__))
+
+/*
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-malloc-function-attribute
+ */
+#define __malloc                        __attribute__((__malloc__))
+
+/*
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-mode-type-attribute
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-mode-variable-attribute
+ */
+#define __mode(x)                       __attribute__((__mode__(x)))
+
+/*
+ * Optional: not supported by clang
+ * Note: icc does not recognize gcc's no-tracer
+ *
+ *  gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-noclone-function-attribute
+ *  gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-optimize-function-attribute
+ */
+#if __has_attribute(__noclone__)
+# if __has_attribute(__optimize__)
+#  define __noclone                     __attribute__((__noclone__, __optimize__("no-tracer")))
+# else
+#  define __noclone                     __attribute__((__noclone__))
+# endif
+#else
+# define __noclone
+#endif
+
+/*
+ * Note the missing underscores.
+ *
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-noinline-function-attribute
+ * clang: mentioned
+ */
+#define   noinline                      __attribute__((__noinline__))
+
+/*
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-noreturn-function-attribute
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#noreturn
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#id1
+ */
+#define __noreturn                      __attribute__((__noreturn__))
+
+/*
+ * Optional: only supported since gcc >= 4.8
+ * Optional: not supported by icc
+ *
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-no_005fsanitize_005faddress-function-attribute
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#no-sanitize-address-no-address-safety-analysis
+ */
+#if __has_attribute(__no_sanitize_address__)
+# define __no_sanitize_address          __attribute__((__no_sanitize_address__))
+#else
+# define __no_sanitize_address
+#endif
+
+/*
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-packed-type-attribute
+ * clang: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-packed-variable-attribute
+ */
+#define __packed                        __attribute__((__packed__))
+
+/*
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-pure-function-attribute
+ */
+#define __pure                          __attribute__((__pure__))
+
+/*
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-section-function-attribute
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-section-variable-attribute
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#section-declspec-allocate
+ */
+#define __section(S)                    __attribute__((__section__(#S)))
+
+/*
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-unused-function-attribute
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-unused-type-attribute
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-unused-variable-attribute
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Label-Attributes.html#index-unused-label-attribute
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#maybe-unused-unused
+ */
+#define __always_unused                 __attribute__((__unused__))
+#define __maybe_unused                  __attribute__((__unused__))
+
+/*
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-used-function-attribute
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-used-variable-attribute
+ */
+#define __used                          __attribute__((__used__))
+
+/*
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-weak-function-attribute
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-weak-variable-attribute
+ */
+#define __weak                          __attribute__((__weak__))
+
+#endif /* __LINUX_COMPILER_ATTRIBUTES_H */
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index ed7c0e4a180e6..3439d7d0249aa 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -55,6 +55,9 @@ extern void __chk_io_ptr(const volatile void __iomem *);
 
 #ifdef __KERNEL__
 
+/* Attributes */
+#include <linux/compiler_attributes.h>
+
 /* Compiler specific macros. */
 #ifdef __clang__
 #include <linux/compiler-clang.h>
@@ -79,12 +82,6 @@ extern void __chk_io_ptr(const volatile void __iomem *);
 #include <asm/compiler.h>
 #endif
 
-/*
- * Generic compiler-independent macros required for kernel
- * build go below this comment. Actual compiler/compiler version
- * specific implementations come from the above header files
- */
-
 struct ftrace_branch_data {
 	const char *func;
 	const char *file;
@@ -107,9 +104,6 @@ struct ftrace_likely_data {
 	unsigned long			constant;
 };
 
-/* Don't. Just don't. */
-#define __deprecated
-
 #endif /* __KERNEL__ */
 
 #endif /* __ASSEMBLY__ */
@@ -119,10 +113,6 @@ struct ftrace_likely_data {
  * compilers. We don't consider that to be an error, so set them to nothing.
  * For example, some of them are for compiler specific plugins.
  */
-#ifndef __designated_init
-# define __designated_init
-#endif
-
 #ifndef __latent_entropy
 # define __latent_entropy
 #endif
@@ -140,17 +130,6 @@ struct ftrace_likely_data {
 # define randomized_struct_fields_end
 #endif
 
-#ifndef __visible
-#define __visible
-#endif
-
-/*
- * Assume alignment of return value.
- */
-#ifndef __assume_aligned
-#define __assume_aligned(a, ...)
-#endif
-
 /* Are two types/vars the same type (ignoring qualifiers)? */
 #define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
 
@@ -159,10 +138,6 @@ struct ftrace_likely_data {
 	(sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || \
 	 sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long))
 
-#ifndef __noclone
-#define __noclone
-#endif
-
 /* Helpers for emitting diagnostics in pragmas. */
 #ifndef __diag
 #define __diag(string)
@@ -182,37 +157,6 @@ struct ftrace_likely_data {
 #define __diag_error(compiler, version, option, comment) \
 	__diag_ ## compiler(version, error, option)
 
-/*
- * From the GCC manual:
- *
- * Many functions have no effects except the return value and their
- * return value depends only on the parameters and/or global
- * variables.  Such a function can be subject to common subexpression
- * elimination and loop optimization just as an arithmetic operator
- * would be.
- * [...]
- */
-#define __pure			__attribute__((__pure__))
-#define __attribute_const__	__attribute__((__const__))
-#define __aligned(x)		__attribute__((__aligned__(x)))
-#define __aligned_largest	__attribute__((__aligned__))
-#define __printf(a, b)		__attribute__((__format__(printf, a, b)))
-#define __scanf(a, b)		__attribute__((__format__(scanf, a, b)))
-#define __maybe_unused		__attribute__((__unused__))
-#define __always_unused		__attribute__((__unused__))
-#define __mode(x)		__attribute__((__mode__(x)))
-#define __malloc		__attribute__((__malloc__))
-#define __used			__attribute__((__used__))
-#define __noreturn		__attribute__((__noreturn__))
-#define __packed		__attribute__((__packed__))
-#define __weak			__attribute__((__weak__))
-#define __alias(symbol)		__attribute__((__alias__(#symbol)))
-#define __cold			__attribute__((__cold__))
-#define __section(S)		__attribute__((__section__(#S)))
-#define __always_inline		inline __attribute__((__always_inline__))
-#define __gnu_inline		__attribute__((__gnu_inline__))
-
-
 #ifdef CONFIG_ENABLE_MUST_CHECK
 #define __must_check		__attribute__((__warn_unused_result__))
 #else
@@ -246,18 +190,20 @@ struct ftrace_likely_data {
  * semantics rather than c99. This prevents multiple symbol definition errors
  * of extern inline functions at link time.
  * A lot of inline functions can cause havoc with function tracing.
+ * Do not use __always_inline here, since currently it expands to inline again
+ * (which would break users of __always_inline).
  */
 #if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) || \
 	!defined(CONFIG_OPTIMIZE_INLINING)
-#define inline \
-	inline __attribute__((__always_inline__, __unused__)) notrace __gnu_inline
+#define inline inline __attribute__((__always_inline__)) __gnu_inline \
+	__maybe_unused notrace
 #else
-#define inline inline	__attribute__((__unused__)) notrace __gnu_inline
+#define inline inline                                    __gnu_inline \
+	__maybe_unused notrace
 #endif
 
 #define __inline__ inline
-#define __inline inline
-#define noinline	__attribute__((__noinline__))
+#define __inline   inline
 
 /*
  * Rather then using noinline to prevent stack consumption, use
-- 
GitLab


From 06e3727e02f9ee9cf571692cd5c74fc5a8a2af52 Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Date: Mon, 3 Sep 2018 19:22:13 +0200
Subject: [PATCH 0022/1890] Compiler Attributes: KENTRY used twice the "used"
 attribute

Tested-by: Sedat Dilek <sedat.dilek@gmail.com> # on top of v4.19-rc5, clang 7
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 include/linux/compiler.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 4030a2940d6bf..17ee9165ca513 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -146,7 +146,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 	extern typeof(sym) sym;					\
 	static const unsigned long __kentry_##sym		\
 	__used							\
-	__attribute__((__section__("___kentry" "+" #sym ), used))	\
+	__attribute__((__section__("___kentry" "+" #sym )))	\
 	= (unsigned long)&sym;
 #endif
 
-- 
GitLab


From e04462fb82f8dd98288c0e7ab1eec79c92537d25 Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Date: Mon, 3 Sep 2018 19:17:50 +0200
Subject: [PATCH 0023/1890] Compiler Attributes: remove uses of __attribute__
 from compiler.h

Suggested-by: Nick Desaulniers <ndesaulniers@google.com>
Tested-by: Sedat Dilek <sedat.dilek@gmail.com> # on top of v4.19-rc5, clang 7
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 include/linux/compiler.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 17ee9165ca513..b5fb034fa6fa1 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -23,8 +23,8 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 #define __branch_check__(x, expect, is_constant) ({			\
 			long ______r;					\
 			static struct ftrace_likely_data		\
-				__attribute__((__aligned__(4)))		\
-				__attribute__((__section__("_ftrace_annotated_branch"))) \
+				__aligned(4)				\
+				__section("_ftrace_annotated_branch")	\
 				______f = {				\
 				.data.func = __func__,			\
 				.data.file = __FILE__,			\
@@ -59,8 +59,8 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 	({								\
 		int ______r;						\
 		static struct ftrace_branch_data			\
-			__attribute__((__aligned__(4)))			\
-			__attribute__((__section__("_ftrace_branch")))	\
+			__aligned(4)					\
+			__section("_ftrace_branch")			\
 			______f = {					\
 				.func = __func__,			\
 				.file = __FILE__,			\
@@ -146,7 +146,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 	extern typeof(sym) sym;					\
 	static const unsigned long __kentry_##sym		\
 	__used							\
-	__attribute__((__section__("___kentry" "+" #sym )))	\
+	__section("___kentry" "+" #sym )			\
 	= (unsigned long)&sym;
 #endif
 
@@ -287,7 +287,7 @@ unsigned long read_word_at_a_time(const void *addr)
  * visible to the compiler.
  */
 #define __ADDRESSABLE(sym) \
-	static void * __attribute__((__section__(".discard.addressable"), used)) \
+	static void * __section(".discard.addressable") __used \
 		__PASTE(__addressable_##sym, __LINE__) = (void *)&sym;
 
 /**
-- 
GitLab


From 303d22c5fc37cebe2bd0b3c15ac3db6950db60c6 Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Date: Mon, 3 Sep 2018 18:32:11 +0200
Subject: [PATCH 0024/1890] Compiler Attributes: add
 Doc/process/programming-language.rst

Tested-by: Sedat Dilek <sedat.dilek@gmail.com> # on top of v4.19-rc5, clang 7
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 Documentation/process/index.rst               |  1 +
 .../process/programming-language.rst          | 45 +++++++++++++++++++
 2 files changed, 46 insertions(+)
 create mode 100644 Documentation/process/programming-language.rst

diff --git a/Documentation/process/index.rst b/Documentation/process/index.rst
index 9ae3e317bddf9..00558b6d2649f 100644
--- a/Documentation/process/index.rst
+++ b/Documentation/process/index.rst
@@ -23,6 +23,7 @@ Below are the essential guides that every developer should read.
    code-of-conduct
    development-process
    submitting-patches
+   programming-language
    coding-style
    maintainer-pgp-guide
    email-clients
diff --git a/Documentation/process/programming-language.rst b/Documentation/process/programming-language.rst
new file mode 100644
index 0000000000000..e5f5f065dc24e
--- /dev/null
+++ b/Documentation/process/programming-language.rst
@@ -0,0 +1,45 @@
+.. _programming_language:
+
+Programming Language
+====================
+
+The kernel is written in the C programming language [c-language]_.
+More precisely, the kernel is typically compiled with ``gcc`` [gcc]_
+under ``-std=gnu89`` [gcc-c-dialect-options]_: the GNU dialect of ISO C90
+(including some C99 features).
+
+This dialect contains many extensions to the language [gnu-extensions]_,
+and many of them are used within the kernel as a matter of course.
+
+There is some support for compiling the kernel with ``clang`` [clang]_
+and ``icc`` [icc]_ for several of the architectures, although at the time
+of writing it is not completed, requiring third-party patches.
+
+Attributes
+----------
+
+One of the common extensions used throughout the kernel are attributes
+[gcc-attribute-syntax]_. Attributes allow to introduce
+implementation-defined semantics to language entities (like variables,
+functions or types) without having to make significant syntactic changes
+to the language (e.g. adding a new keyword) [n2049]_.
+
+In some cases, attributes are optional (i.e. a compiler not supporting them
+should still produce proper code, even if it is slower or does not perform
+as many compile-time checks/diagnostics).
+
+The kernel defines pseudo-keywords (e.g. ``__pure``) instead of using
+directly the GNU attribute syntax (e.g. ``__attribute__((__pure__))``)
+in order to feature detect which ones can be used and/or to shorten the code.
+
+Please refer to ``include/linux/compiler_attributes.h`` for more information.
+
+.. [c-language] http://www.open-std.org/jtc1/sc22/wg14/www/standards
+.. [gcc] https://gcc.gnu.org
+.. [clang] https://clang.llvm.org
+.. [icc] https://software.intel.com/en-us/c-compilers
+.. [gcc-c-dialect-options] https://gcc.gnu.org/onlinedocs/gcc/C-Dialect-Options.html
+.. [gnu-extensions] https://gcc.gnu.org/onlinedocs/gcc/C-Extensions.html
+.. [gcc-attribute-syntax] https://gcc.gnu.org/onlinedocs/gcc/Attribute-Syntax.html
+.. [n2049] http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2049.pdf
+
-- 
GitLab


From 5fc4a13b66b4643b7a564cc79c2453889c8665d7 Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Date: Sat, 8 Sep 2018 22:53:57 +0200
Subject: [PATCH 0025/1890] Compiler Attributes: add MAINTAINERS entry

Tested-by: Sedat Dilek <sedat.dilek@gmail.com> # on top of v4.19-rc5, clang 7
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 MAINTAINERS | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index b22e7fdfd2ea9..e4980d6755849 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3722,6 +3722,11 @@ L:	platform-driver-x86@vger.kernel.org
 S:	Maintained
 F:	drivers/platform/x86/compal-laptop.c
 
+COMPILER ATTRIBUTES
+M:	Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
+S:	Maintained
+F:	include/linux/compiler_attributes.h
+
 CONEXANT ACCESSRUNNER USB DRIVER
 L:	accessrunner-general@lists.sourceforge.net
 W:	http://accessrunner.sourceforge.net/
-- 
GitLab


From 92676236917d8e2e0de1d8612686d3d04a6a245b Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Date: Wed, 19 Sep 2018 01:06:24 +0200
Subject: [PATCH 0026/1890] Compiler Attributes: add support for __nonstring
 (gcc >= 8)

From the GCC manual:

  nonstring

    The nonstring variable attribute specifies that an object or member
    declaration with type array of char, signed char, or unsigned char,
    or pointer to such a type is intended to store character arrays that
    do not necessarily contain a terminating NUL. This is useful in detecting
    uses of such arrays or pointers with functions that expect NUL-terminated
    strings, and to avoid warnings when such an array or pointer is used as
    an argument to a bounded string manipulation function such as strncpy.

  https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html

This attribute can be used for documentation purposes (i.e. replacing
comments), but it is most helpful when the following warnings are enabled:

  -Wstringop-overflow

    Warn for calls to string manipulation functions such as memcpy and
    strcpy that are determined to overflow the destination buffer.

    [...]

  -Wstringop-truncation

    Warn for calls to bounded string manipulation functions such as
    strncat, strncpy, and stpncpy that may either truncate the copied
    string or leave the destination unchanged.

    [...]

    In situations where a character array is intended to store a sequence
    of bytes with no terminating NUL such an array may be annotated with
    attribute nonstring to avoid this warning. Such arrays, however,
    are not suitable arguments to functions that expect NUL-terminated
    strings. To help detect accidental misuses of such arrays GCC issues
    warnings unless it can prove that the use is safe.

  https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html

Tested-by: Sedat Dilek <sedat.dilek@gmail.com> # on top of v4.19-rc5, clang 7
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 include/linux/compiler_attributes.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h
index f0f9fc398440f..6b28c1b7310c0 100644
--- a/include/linux/compiler_attributes.h
+++ b/include/linux/compiler_attributes.h
@@ -34,6 +34,7 @@
 # define __GCC4_has_attribute___externally_visible__  1
 # define __GCC4_has_attribute___noclone__             1
 # define __GCC4_has_attribute___optimize__            1
+# define __GCC4_has_attribute___nonstring__           0
 # define __GCC4_has_attribute___no_sanitize_address__ (__GNUC_MINOR__ >= 8)
 #endif
 
@@ -181,6 +182,19 @@
  */
 #define   noinline                      __attribute__((__noinline__))
 
+/*
+ * Optional: only supported since gcc >= 8
+ * Optional: not supported by clang
+ * Optional: not supported by icc
+ *
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-nonstring-variable-attribute
+ */
+#if __has_attribute(__nonstring__)
+# define __nonstring                    __attribute__((__nonstring__))
+#else
+# define __nonstring
+#endif
+
 /*
  *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-noreturn-function-attribute
  * clang: https://clang.llvm.org/docs/AttributeReference.html#noreturn
-- 
GitLab


From 23066c3f4e2146da8c7d1505729f4409f4d93d28 Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Date: Wed, 19 Sep 2018 02:31:58 +0200
Subject: [PATCH 0027/1890] Compiler Attributes: enable -Wstringop-truncation
 on W=1 (gcc >= 8)

Commit 217c3e019675 ("disable stringop truncation warnings for now")
disabled -Wstringop-truncation since it was too noisy.

Having __nonstring available allows us to let GCC know that a string
is not meant to be NUL-terminated, which helps suppressing some
-Wstringop-truncation warnings.

Note that using __nonstring actually triggers other warnings
(-Wstringop-overflow, which is on by default) which may be real
problems. Therefore, cleaning up -Wstringop-truncation warnings
also buys us the ability to uncover further potential problems.

To encourage the use of __nonstring, we put the warning back at W=1.
In the future, if we end up with a fairly warning-free tree,
we might want to enable it by default.

Tested-by: Sedat Dilek <sedat.dilek@gmail.com> # on top of v4.19-rc5, clang 7
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 scripts/Makefile.extrawarn | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn
index 8d5357053f865..b8f36f5f43f20 100644
--- a/scripts/Makefile.extrawarn
+++ b/scripts/Makefile.extrawarn
@@ -29,6 +29,7 @@ warning-1 += $(call cc-option, -Wmissing-include-dirs)
 warning-1 += $(call cc-option, -Wunused-but-set-variable)
 warning-1 += $(call cc-option, -Wunused-const-variable)
 warning-1 += $(call cc-option, -Wpacked-not-aligned)
+warning-1 += $(call cc-option, -Wstringop-truncation)
 warning-1 += $(call cc-disable-warning, missing-field-initializers)
 warning-1 += $(call cc-disable-warning, sign-compare)
 
-- 
GitLab


From 98cade0a08ba339cd11d6e89b0df5d1d2fa21202 Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Date: Tue, 14 Aug 2018 21:38:26 +0200
Subject: [PATCH 0028/1890] Compiler Attributes: auxdisplay: panel: use
 __nonstring

Let gcc know these arrays are not meant to be NUL-terminated
by annotating them with the new __nonstring variable attribute;
and remove the comment since it conveys the same information.

Tested-by: Sedat Dilek <sedat.dilek@gmail.com> # on top of v4.19-rc5, clang 7
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 drivers/auxdisplay/panel.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/auxdisplay/panel.c b/drivers/auxdisplay/panel.c
index 3b25a643058c9..21b9b2f2470a2 100644
--- a/drivers/auxdisplay/panel.c
+++ b/drivers/auxdisplay/panel.c
@@ -155,10 +155,9 @@ struct logical_input {
 			int release_data;
 		} std;
 		struct {	/* valid when type == INPUT_TYPE_KBD */
-			/* strings can be non null-terminated */
-			char press_str[sizeof(void *) + sizeof(int)];
-			char repeat_str[sizeof(void *) + sizeof(int)];
-			char release_str[sizeof(void *) + sizeof(int)];
+			char press_str[sizeof(void *) + sizeof(int)] __nonstring;
+			char repeat_str[sizeof(void *) + sizeof(int)] __nonstring;
+			char release_str[sizeof(void *) + sizeof(int)] __nonstring;
 		} kbd;
 	} u;
 };
-- 
GitLab


From f0604f63033d4020f019d2aaee805c1075b1077b Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Date: Wed, 19 Sep 2018 01:41:21 +0200
Subject: [PATCH 0029/1890] Compiler Attributes: ext4: remove local __nonstring
 definition

Commit 072ebb3bffe6 ("ext4: add nonstring annotations to ext4.h")
introduced a local definition of __nonstring to suppress some false
positives in gcc 8's -Wstringop-truncation.

Since now we support __nonstring for everyone, remove it.

Tested-by: Sedat Dilek <sedat.dilek@gmail.com> # on top of v4.19-rc5, clang 7
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 fs/ext4/ext4.h | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index caff935fbeb8f..2acdfdad3d3f6 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -45,15 +45,6 @@
 
 #include <linux/compiler.h>
 
-/* Until this gets included into linux/compiler-gcc.h */
-#ifndef __nonstring
-#if defined(GCC_VERSION) && (GCC_VERSION >= 80000)
-#define __nonstring __attribute__((nonstring))
-#else
-#define __nonstring
-#endif
-#endif
-
 /*
  * The fourth extended filesystem constants/structures
  */
-- 
GitLab


From d2266bbfa9e3e32e3b642965088ca461bd24a94f Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Wed, 3 Oct 2018 00:49:21 +0800
Subject: [PATCH 0030/1890] x86/earlyprintk: Add a force option for pciserial
 device

The "pciserial" earlyprintk variant helps much on many modern x86
platforms, but unfortunately there are still some platforms with PCI
UART devices which have the wrong PCI class code. In that case, the
current class code check does not allow for them to be used for logging.

Add a sub-option "force" which overrides the class code check and thus
the use of such device can be enforced.

 [ bp: massage formulations. ]

Suggested-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Feng Tang <feng.tang@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: "Stuart R . Anderson" <stuart.r.anderson@intel.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Feng Tang <feng.tang@intel.com>
Cc: Frederic Weisbecker <frederic@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: H Peter Anvin <hpa@linux.intel.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Kai-Heng Feng <kai.heng.feng@canonical.com>
Cc: Kate Stewart <kstewart@linuxfoundation.org>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Philippe Ombredanne <pombredanne@nexb.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Thymo van Beers <thymovanbeers@gmail.com>
Cc: alan@linux.intel.com
Cc: linux-doc@vger.kernel.org
Link: http://lkml.kernel.org/r/20181002164921.25833-1-feng.tang@intel.com
---
 .../admin-guide/kernel-parameters.txt         |  6 +++-
 arch/x86/kernel/early_printk.c                | 29 ++++++++++++-------
 2 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 92eb1f42240d7..34e6800dea0ee 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1063,7 +1063,7 @@
 			earlyprintk=serial[,0x...[,baudrate]]
 			earlyprintk=ttySn[,baudrate]
 			earlyprintk=dbgp[debugController#]
-			earlyprintk=pciserial,bus:device.function[,baudrate]
+			earlyprintk=pciserial[,force],bus:device.function[,baudrate]
 			earlyprintk=xdbc[xhciController#]
 
 			earlyprintk is useful when the kernel crashes before
@@ -1095,6 +1095,10 @@
 
 			The sclp output can only be used on s390.
 
+			The optional "force" to "pciserial" enables use of a
+			PCI device even when its classcode is not of the
+			UART class.
+
 	edac_report=	[HW,EDAC] Control how to report EDAC event
 			Format: {"on" | "off" | "force"}
 			on: enable EDAC to report H/W event. May be overridden
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 5e801c8c8ce7c..374a52fa52969 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -213,8 +213,9 @@ static unsigned int mem32_serial_in(unsigned long addr, int offset)
  * early_pci_serial_init()
  *
  * This function is invoked when the early_printk param starts with "pciserial"
- * The rest of the param should be ",B:D.F,baud" where B, D & F describe the
- * location of a PCI device that must be a UART device.
+ * The rest of the param should be "[force],B:D.F,baud", where B, D & F describe
+ * the location of a PCI device that must be a UART device. "force" is optional
+ * and overrides the use of an UART device with a wrong PCI class code.
  */
 static __init void early_pci_serial_init(char *s)
 {
@@ -224,17 +225,23 @@ static __init void early_pci_serial_init(char *s)
 	u32 classcode, bar0;
 	u16 cmdreg;
 	char *e;
+	int force = 0;
 
-
-	/*
-	 * First, part the param to get the BDF values
-	 */
 	if (*s == ',')
 		++s;
 
 	if (*s == 0)
 		return;
 
+	/* Force the use of an UART device with wrong class code */
+	if (!strncmp(s, "force,", 6)) {
+		force = 1;
+		s += 6;
+	}
+
+	/*
+	 * Part the param to get the BDF values
+	 */
 	bus = (u8)simple_strtoul(s, &e, 16);
 	s = e;
 	if (*s != ':')
@@ -253,7 +260,7 @@ static __init void early_pci_serial_init(char *s)
 		s++;
 
 	/*
-	 * Second, find the device from the BDF
+	 * Find the device from the BDF
 	 */
 	cmdreg = read_pci_config(bus, slot, func, PCI_COMMAND);
 	classcode = read_pci_config(bus, slot, func, PCI_CLASS_REVISION);
@@ -264,8 +271,10 @@ static __init void early_pci_serial_init(char *s)
 	 */
 	if (((classcode >> 16 != PCI_CLASS_COMMUNICATION_MODEM) &&
 	     (classcode >> 16 != PCI_CLASS_COMMUNICATION_SERIAL)) ||
-	   (((classcode >> 8) & 0xff) != 0x02)) /* 16550 I/F at BAR0 */
-		return;
+	   (((classcode >> 8) & 0xff) != 0x02)) /* 16550 I/F at BAR0 */ {
+		if (!force)
+			return;
+	}
 
 	/*
 	 * Determine if it is IO or memory mapped
@@ -289,7 +298,7 @@ static __init void early_pci_serial_init(char *s)
 	}
 
 	/*
-	 * Lastly, initialize the hardware
+	 * Initialize the hardware
 	 */
 	if (*s) {
 		if (strcmp(s, "nocfg") == 0)
-- 
GitLab


From 617a629c08bfffb05249131079d9a38322902e5b Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg59@google.com>
Date: Thu, 24 May 2018 13:27:45 -0700
Subject: [PATCH 0031/1890] apparmor: Add a wildcard secid

Reserve a secid value that we can use as a wildcard, allowing us to
define policy that's expected to match against all secids.

Signed-off-by: Matthew Garrett <mjg59@google.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/include/secid.h | 3 +++
 security/apparmor/secid.c         | 3 +--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/security/apparmor/include/secid.h b/security/apparmor/include/secid.h
index dee6fa3b6081e..fa2062711b63e 100644
--- a/security/apparmor/include/secid.h
+++ b/security/apparmor/include/secid.h
@@ -22,6 +22,9 @@ struct aa_label;
 /* secid value that will not be allocated */
 #define AA_SECID_INVALID 0
 
+/* secid value that matches any other secid */
+#define AA_SECID_WILDCARD 1
+
 struct aa_label *aa_secid_to_label(u32 secid);
 int apparmor_secid_to_secctx(u32 secid, char **secdata, u32 *seclen);
 int apparmor_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid);
diff --git a/security/apparmor/secid.c b/security/apparmor/secid.c
index f2f22d00db188..8c951c493beb3 100644
--- a/security/apparmor/secid.c
+++ b/security/apparmor/secid.c
@@ -32,8 +32,7 @@
  * secids - do not pin labels with a refcount. They rely on the label
  * properly updating/freeing them
  */
-
-#define AA_FIRST_SECID 1
+#define AA_FIRST_SECID 2
 
 static DEFINE_IDR(aa_secids);
 static DEFINE_SPINLOCK(secid_lock);
-- 
GitLab


From 9caafbe2b4cf4c635826a2832e93cf648605de8b Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg59@google.com>
Date: Thu, 24 May 2018 13:27:46 -0700
Subject: [PATCH 0032/1890] apparmor: Parse secmark policy

Add support for parsing secmark policy provided by userspace, and
store that in the overall policy.

Signed-off-by: Matthew Garrett <mjg59@google.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/include/net.h    | 10 +++++
 security/apparmor/include/policy.h |  3 ++
 security/apparmor/policy.c         |  3 ++
 security/apparmor/policy_unpack.c  | 61 ++++++++++++++++++++++++++++++
 4 files changed, 77 insertions(+)

diff --git a/security/apparmor/include/net.h b/security/apparmor/include/net.h
index ec7228e857a90..7334ac966d018 100644
--- a/security/apparmor/include/net.h
+++ b/security/apparmor/include/net.h
@@ -83,6 +83,13 @@ struct aa_sk_ctx {
 	__e;					\
 })
 
+struct aa_secmark {
+	u8 audit;
+	u8 deny;
+	u32 secid;
+	char *label;
+};
+
 extern struct aa_sfs_entry aa_sfs_entry_network[];
 
 void audit_net_cb(struct audit_buffer *ab, void *va);
@@ -103,4 +110,7 @@ int aa_sk_perm(const char *op, u32 request, struct sock *sk);
 int aa_sock_file_perm(struct aa_label *label, const char *op, u32 request,
 		      struct socket *sock);
 
+int apparmor_secmark_check(struct aa_label *label, char *op, u32 request,
+			   u32 secid, struct sock *sk);
+
 #endif /* __AA_NET_H */
diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h
index ab64c6b5db5ac..8e6707c837bef 100644
--- a/security/apparmor/include/policy.h
+++ b/security/apparmor/include/policy.h
@@ -155,6 +155,9 @@ struct aa_profile {
 
 	struct aa_rlimit rlimits;
 
+	int secmark_count;
+	struct aa_secmark *secmark;
+
 	struct aa_loaddata *rawdata;
 	unsigned char *hash;
 	char *dirname;
diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c
index 1590e2de4e841..8d846a747b846 100644
--- a/security/apparmor/policy.c
+++ b/security/apparmor/policy.c
@@ -231,6 +231,9 @@ void aa_free_profile(struct aa_profile *profile)
 	for (i = 0; i < profile->xattr_count; i++)
 		kzfree(profile->xattrs[i]);
 	kzfree(profile->xattrs);
+	for (i=0; i < profile->secmark_count; i++)
+		kzfree(profile->secmark[i].label);
+	kzfree(profile->secmark);
 	kzfree(profile->dirname);
 	aa_put_dfa(profile->xmatch);
 	aa_put_dfa(profile->policy.dfa);
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index 21cb384d712a2..379682e2a8d5d 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -292,6 +292,19 @@ static bool unpack_nameX(struct aa_ext *e, enum aa_code code, const char *name)
 	return 0;
 }
 
+static bool unpack_u8(struct aa_ext *e, u8 *data, const char *name)
+{
+	if (unpack_nameX(e, AA_U8, name)) {
+		if (!inbounds(e, sizeof(u8)))
+			return 0;
+		if (data)
+			*data = get_unaligned((u8 *)e->pos);
+		e->pos += sizeof(u8);
+		return 1;
+	}
+	return 0;
+}
+
 static bool unpack_u32(struct aa_ext *e, u32 *data, const char *name)
 {
 	if (unpack_nameX(e, AA_U32, name)) {
@@ -529,6 +542,49 @@ static bool unpack_xattrs(struct aa_ext *e, struct aa_profile *profile)
 	return 0;
 }
 
+static bool unpack_secmark(struct aa_ext *e, struct aa_profile *profile)
+{
+	void *pos = e->pos;
+	int i, size;
+
+	if (unpack_nameX(e, AA_STRUCT, "secmark")) {
+		size = unpack_array(e, NULL);
+
+		profile->secmark = kcalloc(size, sizeof(struct aa_secmark),
+					   GFP_KERNEL);
+		if (!profile->secmark)
+			goto fail;
+
+		profile->secmark_count = size;
+
+		for (i = 0; i < size; i++) {
+			if (!unpack_u8(e, &profile->secmark[i].audit, NULL))
+				goto fail;
+			if (!unpack_u8(e, &profile->secmark[i].deny, NULL))
+				goto fail;
+			if (!unpack_strdup(e, &profile->secmark[i].label, NULL))
+				goto fail;
+		}
+		if (!unpack_nameX(e, AA_ARRAYEND, NULL))
+			goto fail;
+		if (!unpack_nameX(e, AA_STRUCTEND, NULL))
+			goto fail;
+	}
+
+	return 1;
+
+fail:
+	if (profile->secmark) {
+		for (i = 0; i < size; i++)
+			kfree(profile->secmark[i].label);
+		kfree(profile->secmark);
+		profile->secmark_count = 0;
+	}
+
+	e->pos = pos;
+	return 0;
+}
+
 static bool unpack_rlimits(struct aa_ext *e, struct aa_profile *profile)
 {
 	void *pos = e->pos;
@@ -727,6 +783,11 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 		goto fail;
 	}
 
+	if (!unpack_secmark(e, profile)) {
+		info = "failed to unpack profile secmark rules";
+		goto fail;
+	}
+
 	if (unpack_nameX(e, AA_STRUCT, "policydb")) {
 		/* generic policy dfa - optional and may be NULL */
 		info = "failed to unpack policydb";
-- 
GitLab


From ab9f2115081ab7ba63b77a759e0f3eb5d6463d7f Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg59@google.com>
Date: Thu, 24 May 2018 13:27:47 -0700
Subject: [PATCH 0033/1890] apparmor: Allow filtering based on secmark policy

Add support for dropping or accepting packets based on their secmark
tags.

Signed-off-by: Matthew Garrett <mjg59@google.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/lsm.c | 112 +++++++++++++++++++++++++++++++++++++++-
 security/apparmor/net.c |  66 +++++++++++++++++++++++
 2 files changed, 177 insertions(+), 1 deletion(-)

diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index f09fea0b4db7e..2c842f24821b7 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -23,6 +23,8 @@
 #include <linux/sysctl.h>
 #include <linux/audit.h>
 #include <linux/user_namespace.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
 #include <net/sock.h>
 
 #include "include/apparmor.h"
@@ -1030,7 +1032,13 @@ static int apparmor_socket_shutdown(struct socket *sock, int how)
  */
 static int apparmor_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
 {
-	return 0;
+	struct aa_sk_ctx *ctx = SK_CTX(sk);
+
+	if (!skb->secmark)
+		return 0;
+
+	return apparmor_secmark_check(ctx->label, OP_RECVMSG, AA_MAY_RECEIVE,
+				      skb->secmark, sk);
 }
 
 
@@ -1126,6 +1134,18 @@ static void apparmor_sock_graft(struct sock *sk, struct socket *parent)
 		ctx->label = aa_get_current_label();
 }
 
+static int apparmor_inet_conn_request(struct sock *sk, struct sk_buff *skb,
+				      struct request_sock *req)
+{
+	struct aa_sk_ctx *ctx = SK_CTX(sk);
+
+	if (!skb->secmark)
+		return 0;
+
+	return apparmor_secmark_check(ctx->label, OP_CONNECT, AA_MAY_CONNECT,
+				      skb->secmark, sk);
+}
+
 static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = {
 	LSM_HOOK_INIT(ptrace_access_check, apparmor_ptrace_access_check),
 	LSM_HOOK_INIT(ptrace_traceme, apparmor_ptrace_traceme),
@@ -1183,6 +1203,7 @@ static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = {
 	LSM_HOOK_INIT(socket_getpeersec_dgram,
 		      apparmor_socket_getpeersec_dgram),
 	LSM_HOOK_INIT(sock_graft, apparmor_sock_graft),
+	LSM_HOOK_INIT(inet_conn_request, apparmor_inet_conn_request),
 
 	LSM_HOOK_INIT(cred_alloc_blank, apparmor_cred_alloc_blank),
 	LSM_HOOK_INIT(cred_free, apparmor_cred_free),
@@ -1538,6 +1559,95 @@ static inline int apparmor_init_sysctl(void)
 }
 #endif /* CONFIG_SYSCTL */
 
+static unsigned int apparmor_ip_postroute(void *priv,
+					  struct sk_buff *skb,
+					  const struct nf_hook_state *state)
+{
+	struct aa_sk_ctx *ctx;
+	struct sock *sk;
+
+	if (!skb->secmark)
+		return NF_ACCEPT;
+
+	sk = skb_to_full_sk(skb);
+	if (sk == NULL)
+		return NF_ACCEPT;
+
+	ctx = SK_CTX(sk);
+	if (!apparmor_secmark_check(ctx->label, OP_SENDMSG, AA_MAY_SEND,
+				    skb->secmark, sk))
+		return NF_ACCEPT;
+
+	return NF_DROP_ERR(-ECONNREFUSED);
+
+}
+
+static unsigned int apparmor_ipv4_postroute(void *priv,
+					    struct sk_buff *skb,
+					    const struct nf_hook_state *state)
+{
+	return apparmor_ip_postroute(priv, skb, state);
+}
+
+static unsigned int apparmor_ipv6_postroute(void *priv,
+					    struct sk_buff *skb,
+					    const struct nf_hook_state *state)
+{
+	return apparmor_ip_postroute(priv, skb, state);
+}
+
+static const struct nf_hook_ops apparmor_nf_ops[] = {
+	{
+		.hook =         apparmor_ipv4_postroute,
+		.pf =           NFPROTO_IPV4,
+		.hooknum =      NF_INET_POST_ROUTING,
+		.priority =     NF_IP_PRI_SELINUX_FIRST,
+	},
+#if IS_ENABLED(CONFIG_IPV6)
+	{
+		.hook =         apparmor_ipv6_postroute,
+		.pf =           NFPROTO_IPV6,
+		.hooknum =      NF_INET_POST_ROUTING,
+		.priority =     NF_IP6_PRI_SELINUX_FIRST,
+	},
+#endif
+};
+
+static int __net_init apparmor_nf_register(struct net *net)
+{
+	int ret;
+
+	ret = nf_register_net_hooks(net, apparmor_nf_ops,
+				    ARRAY_SIZE(apparmor_nf_ops));
+	return ret;
+}
+
+static void __net_exit apparmor_nf_unregister(struct net *net)
+{
+	nf_unregister_net_hooks(net, apparmor_nf_ops,
+				ARRAY_SIZE(apparmor_nf_ops));
+}
+
+static struct pernet_operations apparmor_net_ops = {
+	.init = apparmor_nf_register,
+	.exit = apparmor_nf_unregister,
+};
+
+static int __init apparmor_nf_ip_init(void)
+{
+	int err;
+
+	if (!apparmor_enabled)
+		return 0;
+
+	err = register_pernet_subsys(&apparmor_net_ops);
+	if (err)
+		panic("Apparmor: register_pernet_subsys: error %d\n", err);
+
+	return 0;
+}
+__initcall(apparmor_nf_ip_init);
+
 static int __init apparmor_init(void)
 {
 	int error;
diff --git a/security/apparmor/net.c b/security/apparmor/net.c
index d5d72dd1ca1f9..f9a678ce994f9 100644
--- a/security/apparmor/net.c
+++ b/security/apparmor/net.c
@@ -18,6 +18,7 @@
 #include "include/label.h"
 #include "include/net.h"
 #include "include/policy.h"
+#include "include/secid.h"
 
 #include "net_names.h"
 
@@ -188,3 +189,68 @@ int aa_sock_file_perm(struct aa_label *label, const char *op, u32 request,
 
 	return aa_label_sk_perm(label, op, request, sock->sk);
 }
+
+static int apparmor_secmark_init(struct aa_secmark *secmark)
+{
+	struct aa_label *label;
+
+	if (secmark->label[0] == '*') {
+		secmark->secid = AA_SECID_WILDCARD;
+		return 0;
+	}
+
+	label = aa_label_strn_parse(&root_ns->unconfined->label,
+				    secmark->label, strlen(secmark->label),
+				    GFP_ATOMIC, false, false);
+
+	if (IS_ERR(label))
+		return PTR_ERR(label);
+
+	secmark->secid = label->secid;
+
+	return 0;
+}
+
+static int aa_secmark_perm(struct aa_profile *profile, u32 request, u32 secid,
+			   struct common_audit_data *sa, struct sock *sk)
+{
+	int i, ret;
+	struct aa_perms perms = { };
+
+	if (profile->secmark_count == 0)
+		return 0;
+
+	for (i = 0; i < profile->secmark_count; i++) {
+		if (!profile->secmark[i].secid) {
+			ret = apparmor_secmark_init(&profile->secmark[i]);
+			if (ret)
+				return ret;
+		}
+
+		if (profile->secmark[i].secid == secid ||
+		    profile->secmark[i].secid == AA_SECID_WILDCARD) {
+			if (profile->secmark[i].deny)
+				perms.deny = ALL_PERMS_MASK;
+			else
+				perms.allow = ALL_PERMS_MASK;
+
+			if (profile->secmark[i].audit)
+				perms.audit = ALL_PERMS_MASK;
+		}
+	}
+
+	aa_apply_modes_to_perms(profile, &perms);
+
+	return aa_check_perms(profile, &perms, request, sa, audit_net_cb);
+}
+
+int apparmor_secmark_check(struct aa_label *label, char *op, u32 request,
+			   u32 secid, struct sock *sk)
+{
+	struct aa_profile *profile;
+	DEFINE_AUDIT_SK(sa, op, sk);
+
+	return fn_for_each_confined(label, profile,
+				    aa_secmark_perm(profile, request, secid,
+						    &sa, sk));
+}
-- 
GitLab


From 0fb871cc42537465e322f727bec6abfd375faa83 Mon Sep 17 00:00:00 2001
From: Lance Roy <ldr709@gmail.com>
Date: Tue, 2 Oct 2018 22:39:01 -0700
Subject: [PATCH 0034/1890] apparmor: Replace spin_is_locked() with lockdep

lockdep_assert_held() is better suited to checking locking requirements,
since it won't get confused when someone else holds the lock. This is
also a step towards possibly removing spin_is_locked().

Signed-off-by: Lance Roy <ldr709@gmail.com>
Cc: John Johansen <john.johansen@canonical.com>
Cc: James Morris <jmorris@namei.org>
Cc: "Serge E. Hallyn" <serge@hallyn.com>
Cc: <linux-security-module@vger.kernel.org>
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/apparmor/file.c b/security/apparmor/file.c
index 4285943f7260f..d0afed9ebd0ed 100644
--- a/security/apparmor/file.c
+++ b/security/apparmor/file.c
@@ -496,7 +496,7 @@ static void update_file_ctx(struct aa_file_ctx *fctx, struct aa_label *label,
 	/* update caching of label on file_ctx */
 	spin_lock(&fctx->lock);
 	old = rcu_dereference_protected(fctx->label,
-					spin_is_locked(&fctx->lock));
+					lockdep_is_held(&fctx->lock));
 	l = aa_label_merge(old, label, GFP_ATOMIC);
 	if (l) {
 		if (l != old) {
-- 
GitLab


From ca3fde5214e1d24f78269b337d3f22afd6bf445e Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Sat, 29 Sep 2018 03:49:26 +0200
Subject: [PATCH 0035/1890] apparmor: don't try to replace stale label in
 ptraceme check

begin_current_label_crit_section() must run in sleepable context because
when label_is_stale() is true, aa_replace_current_label() runs, which uses
prepare_creds(), which can sleep.

Until now, the ptraceme access check (which runs with tasklist_lock held)
violated this rule.

Fixes: b2d09ae449ced ("apparmor: move ptrace checks to using labels")
Reported-by: Cyrill Gorcunov <gorcunov@gmail.com>
Reported-by: kernel test robot <rong.a.chen@intel.com>
Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/lsm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index 2c842f24821b7..d08aac05c65a3 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -132,11 +132,11 @@ static int apparmor_ptrace_traceme(struct task_struct *parent)
 	struct aa_label *tracer, *tracee;
 	int error;
 
-	tracee = begin_current_label_crit_section();
+	tracee = __begin_current_label_crit_section();
 	tracer = aa_get_task_label(parent);
 	error = aa_may_ptrace(tracer, tracee, AA_PTRACE_TRACE);
 	aa_put_label(tracer);
-	end_current_label_crit_section(tracee);
+	__end_current_label_crit_section(tracee);
 
 	return error;
 }
-- 
GitLab


From 250f2da49cb8e582215a65c03f50e8ddf5cd119c Mon Sep 17 00:00:00 2001
From: Zubin Mithra <zsm@chromium.org>
Date: Thu, 27 Sep 2018 14:49:17 -0700
Subject: [PATCH 0036/1890] apparmor: Fix uninitialized value in
 aa_split_fqname

Syzkaller reported a OOB-read with the stacktrace below. This occurs
inside __aa_lookupn_ns as `n` is not initialized. `n` is obtained from
aa_splitn_fqname. In cases where `name` is invalid, aa_splitn_fqname
returns without initializing `ns_name` and `ns_len`.

Fix this by always initializing `ns_name` and `ns_len`.

	__dump_stack lib/dump_stack.c:77 [inline]
	dump_stack+0x1c4/0x2b4 lib/dump_stack.c:113
	print_address_description.cold.8+0x9/0x1ff mm/kasan/report.c:256
	kasan_report_error mm/kasan/report.c:354 [inline]
	kasan_report.cold.9+0x242/0x309 mm/kasan/report.c:412
	__asan_report_load1_noabort+0x14/0x20 mm/kasan/report.c:430
	memcmp+0xe3/0x160 lib/string.c:861
	strnstr+0x4b/0x70 lib/string.c:934
	__aa_lookupn_ns+0xc1/0x570 security/apparmor/policy_ns.c:209
	aa_lookupn_ns+0x88/0x1e0 security/apparmor/policy_ns.c:240
	aa_fqlookupn_profile+0x1b9/0x1010 security/apparmor/policy.c:468
	fqlookupn_profile+0x80/0xc0 security/apparmor/label.c:1844
	aa_label_strn_parse+0xa3a/0x1230 security/apparmor/label.c:1908
	aa_label_parse+0x42/0x50 security/apparmor/label.c:1943
	aa_change_profile+0x513/0x3510 security/apparmor/domain.c:1362
	apparmor_setprocattr+0xaa4/0x1150 security/apparmor/lsm.c:658
	security_setprocattr+0x66/0xc0 security/security.c:1298
	proc_pid_attr_write+0x301/0x540 fs/proc/base.c:2555
	__vfs_write+0x119/0x9f0 fs/read_write.c:485
	vfs_write+0x1fc/0x560 fs/read_write.c:549
	ksys_write+0x101/0x260 fs/read_write.c:598
	__do_sys_write fs/read_write.c:610 [inline]
	__se_sys_write fs/read_write.c:607 [inline]
	__x64_sys_write+0x73/0xb0 fs/read_write.c:607
	do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
	entry_SYSCALL_64_after_hwframe+0x49/0xbe

Fixes: 3b0aaf5866bf ("apparmor: add lib fn to find the "split" for fqnames")
Reported-by: syzbot+61e4b490d9d2da591b50@syzkaller.appspotmail.com
Signed-off-by: Zubin Mithra <zsm@chromium.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/lib.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c
index 974affe505314..76491e7f4177f 100644
--- a/security/apparmor/lib.c
+++ b/security/apparmor/lib.c
@@ -90,10 +90,12 @@ const char *aa_splitn_fqname(const char *fqname, size_t n, const char **ns_name,
 	const char *end = fqname + n;
 	const char *name = skipn_spaces(fqname, n);
 
-	if (!name)
-		return NULL;
 	*ns_name = NULL;
 	*ns_len = 0;
+
+	if (!name)
+		return NULL;
+
 	if (name[0] == ':') {
 		char *split = strnchr(&name[1], end - &name[1], ':');
 		*ns_name = skipn_spaces(&name[1], end - &name[1]);
-- 
GitLab


From ab6ead7d07ca87e3e66e6d524c8aa311732878de Mon Sep 17 00:00:00 2001
From: Janusz Krzysztofik <jmkrzyszt@gmail.com>
Date: Tue, 2 Oct 2018 11:45:29 +0200
Subject: [PATCH 0037/1890] ARM: OMAP1: ams-delta: Fix impossible .irq < 0

Since the very beginning, unsigned int .irq member of struct
plat_serial8250_port introduced by commit eff443df679e ("OMAP1:
AMS_DELTA: add modem support") was statically initialized to a negative
value -EINVAL.  Moreover, commit 0812db943748 ("ARM: OMAP1: ams-delta:
assign MODEM IRQ from GPIO descriptor") has introduced some new code
which checks for that member carrying a negative value which is
impossible.

Use IRQ_NOTCONNECTED instead of -EINVAL. Also, drop the valueless check
and let the modem device be registered regardless of .irq value, and
the value handled by "serial8250" driver.

Fixes: 0812db943748 ("ARM: OMAP1: ams-delta: assign MODEM IRQ from GPIO descriptor")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Janusz Krzysztofik <jmkrzyszt@gmail.com>
Acked-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap1/board-ams-delta.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c
index 318925ae3ebe3..b01b68494cacb 100644
--- a/arch/arm/mach-omap1/board-ams-delta.c
+++ b/arch/arm/mach-omap1/board-ams-delta.c
@@ -765,7 +765,7 @@ static struct plat_serial8250_port ams_delta_modem_ports[] = {
 	{
 		.membase	= IOMEM(MODEM_VIRT),
 		.mapbase	= MODEM_PHYS,
-		.irq		= -EINVAL, /* changed later */
+		.irq		= IRQ_NOTCONNECTED, /* changed later */
 		.flags		= UPF_BOOT_AUTOCONF,
 		.irqflags	= IRQF_TRIGGER_RISING,
 		.iotype		= UPIO_MEM,
@@ -856,8 +856,7 @@ static int __init modem_nreset_init(void)
 
 
 /*
- * This function expects MODEM IRQ number already assigned to the port
- * and fails if it's not.
+ * This function expects MODEM IRQ number already assigned to the port.
  * The MODEM device requires its RESET# pin kept high during probe.
  * That requirement can be fulfilled in several ways:
  * - with a descriptor of already functional modem_nreset regulator
@@ -880,9 +879,6 @@ static int __init ams_delta_modem_init(void)
 	if (!machine_is_ams_delta())
 		return -ENODEV;
 
-	if (ams_delta_modem_ports[0].irq < 0)
-		return ams_delta_modem_ports[0].irq;
-
 	omap_cfg_reg(M14_1510_GPIO2);
 
 	/* Initialize the modem_nreset regulator consumer before use */
-- 
GitLab


From f42259ef810ce83f3e1a8ea4ce12dfda873fbe44 Mon Sep 17 00:00:00 2001
From: Hang Yuan <hang.yuan@linux.intel.com>
Date: Wed, 19 Sep 2018 14:42:09 +0800
Subject: [PATCH 0038/1890] drm/i915/gvt: invalidate old ggtt page when update
 ggtt entry

Previously only cancelled dma map of a ggtt page when the ggtt entry was
cleared. This patch will cancel dma map of an old ggtt page as well when
the ggtt entry is updated with new page address.

Fixes: 7598e8700e9a(drm/i915/gvt: Missed to cancel dma map for ggtt entries)
Signed-off-by: Hang Yuan <hang.yuan@linux.intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/gtt.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index 00aad8164dec2..c11e353ca9043 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -2259,16 +2259,18 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
 		} else
 			ops->set_pfn(&m, dma_addr >> PAGE_SHIFT);
 	} else {
-		ggtt_get_host_entry(ggtt_mm, &m, g_gtt_index);
-		ggtt_invalidate_pte(vgpu, &m);
 		ops->set_pfn(&m, gvt->gtt.scratch_mfn);
 		ops->clear_present(&m);
 	}
 
 out:
+	ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index);
+
+	ggtt_get_host_entry(ggtt_mm, &e, g_gtt_index);
+	ggtt_invalidate_pte(vgpu, &e);
+
 	ggtt_set_host_entry(ggtt_mm, &m, g_gtt_index);
 	ggtt_invalidate(gvt->dev_priv);
-	ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index);
 	return 0;
 }
 
-- 
GitLab


From e884bce1d9321047ea002b97699e4f7a74c3fae3 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 10 Oct 2018 16:41:40 -0400
Subject: [PATCH 0039/1890] ext4: don't open-code ERR_CAST

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ext4/ialloc.c | 2 +-
 fs/ext4/namei.c  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 2addcb8730e19..014f6a698cb71 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -1216,7 +1216,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
 	bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
 	bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
 	if (IS_ERR(bitmap_bh))
-		return (struct inode *) bitmap_bh;
+		return ERR_CAST(bitmap_bh);
 
 	/* Having the inode bit set should be a 100% indicator that this
 	 * is a valid orphan (no e2fsck run on fs).  Orphans also include
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 116ff68c5bd43..c31717e343d08 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1556,7 +1556,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
 
 	bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
 	if (IS_ERR(bh))
-		return (struct dentry *) bh;
+		return ERR_CAST(bh);
 	inode = NULL;
 	if (bh) {
 		__u32 ino = le32_to_cpu(de->inode);
@@ -1600,7 +1600,7 @@ struct dentry *ext4_get_parent(struct dentry *child)
 
 	bh = ext4_find_entry(d_inode(child), &dotdot, &de, NULL);
 	if (IS_ERR(bh))
-		return (struct dentry *) bh;
+		return ERR_CAST(bh);
 	if (!bh)
 		return ERR_PTR(-ENOENT);
 	ino = le32_to_cpu(de->inode);
-- 
GitLab


From fe650c8ba7c9dde2816d09e335b0be37757e49df Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 12 Oct 2018 12:12:24 +0200
Subject: [PATCH 0040/1890] ACPI / PM: Export acpi_device_get_power() for use
 by modular build drivers

Export acpi_device_get_power() for use by modular build drivers.

Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/acpi/device_pm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index a7c2673ffd36e..824ae985ad93b 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -126,6 +126,7 @@ int acpi_device_get_power(struct acpi_device *device, int *state)
 
 	return 0;
 }
+EXPORT_SYMBOL(acpi_device_get_power);
 
 static int acpi_dev_pm_explicit_set(struct acpi_device *adev, int state)
 {
-- 
GitLab


From 1688c8717118f37191d824862a006c8373d261de Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 12 Oct 2018 12:12:25 +0200
Subject: [PATCH 0041/1890] pwm: lpss: Add ACPI HID for second PWM controller
 on Cherry Trail devices

The second PWM controller on Cherry Trail devices uses a separate ACPI
HID: "80862289", add this so that the driver will properly bind to the
second PWM controller.

The second PWM controller is usually not used, the main thing gained by
this is properly putting the PWM controller in D3 on suspend.

Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-lpss-platform.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/pwm/pwm-lpss-platform.c b/drivers/pwm/pwm-lpss-platform.c
index 5561b9e190f84..7304f36ee715a 100644
--- a/drivers/pwm/pwm-lpss-platform.c
+++ b/drivers/pwm/pwm-lpss-platform.c
@@ -81,6 +81,7 @@ static SIMPLE_DEV_PM_OPS(pwm_lpss_platform_pm_ops,
 static const struct acpi_device_id pwm_lpss_acpi_match[] = {
 	{ "80860F09", (unsigned long)&pwm_lpss_byt_info },
 	{ "80862288", (unsigned long)&pwm_lpss_bsw_info },
+	{ "80862289", (unsigned long)&pwm_lpss_bsw_info },
 	{ "80865AC8", (unsigned long)&pwm_lpss_bxt_info },
 	{ },
 };
-- 
GitLab


From 9dc419b6c7e4bb67c0966758c1e4c9a9c9a4309d Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 12 Oct 2018 12:12:26 +0200
Subject: [PATCH 0042/1890] pwm: lpss: Move struct pwm_lpss_chip definition to
 the header file

Move struct pwm_lpss_chip definition from pwm-lpss.c to pwm-lpss.h,
so that the pci/platform drivers can access the info member
(struct pwm_lpss_boardinfo *).

This is a preparation patch for adding platform specific quirks, which
the drivers need access to, to pwm_lpss_boardinfo.

Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-lpss.c | 9 ---------
 drivers/pwm/pwm-lpss.h | 9 ++++++++-
 2 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/drivers/pwm/pwm-lpss.c b/drivers/pwm/pwm-lpss.c
index 4721a264bac25..e602835fd6de4 100644
--- a/drivers/pwm/pwm-lpss.c
+++ b/drivers/pwm/pwm-lpss.c
@@ -32,15 +32,6 @@
 /* Size of each PWM register space if multiple */
 #define PWM_SIZE			0x400
 
-#define MAX_PWMS			4
-
-struct pwm_lpss_chip {
-	struct pwm_chip chip;
-	void __iomem *regs;
-	const struct pwm_lpss_boardinfo *info;
-	u32 saved_ctrl[MAX_PWMS];
-};
-
 static inline struct pwm_lpss_chip *to_lpwm(struct pwm_chip *chip)
 {
 	return container_of(chip, struct pwm_lpss_chip, chip);
diff --git a/drivers/pwm/pwm-lpss.h b/drivers/pwm/pwm-lpss.h
index 7a4238ad1fcb1..8f029ed263af0 100644
--- a/drivers/pwm/pwm-lpss.h
+++ b/drivers/pwm/pwm-lpss.h
@@ -16,7 +16,14 @@
 #include <linux/device.h>
 #include <linux/pwm.h>
 
-struct pwm_lpss_chip;
+#define MAX_PWMS			4
+
+struct pwm_lpss_chip {
+	struct pwm_chip chip;
+	void __iomem *regs;
+	const struct pwm_lpss_boardinfo *info;
+	u32 saved_ctrl[MAX_PWMS];
+};
 
 struct pwm_lpss_boardinfo {
 	unsigned long clk_rate;
-- 
GitLab


From 6a425ecd19a2c0e19a501323216ecf987de07841 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 12 Oct 2018 12:12:27 +0200
Subject: [PATCH 0043/1890] pwm: lpss: Check PWM powerstate after resume on
 Cherry Trail devices

The _PS0 method for the integrated graphics on some Cherry Trail devices
(observed on a HP Pavilion X2 10-p0XX) turns on the PWM chip (puts it in
D0), causing an inconsistency between the state the pm-core thinks it is
in (left runtime suspended as it was before the suspend/resume) and the
state it actually is in.

Interestingly enough this is done on a device where the pwm controller is
not used for the backlight at all, since it uses an eDP panel. On devices
where the PWM is used this is not a problem since we will resume it
ourselves anyways.

This inconsistency causes us to never suspend the pwm controller again,
which causes the device to not be able to reach S0ix states when suspended.

This commit adds a resume-complete handler, which when we think the device
is still run-time suspended checks the actual power-state and if necessary
updates the rpm-core's internal state.

This fixes the Pavilion X2 10-p0XX not reaching S0ix states when suspended.

Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-lpss-platform.c | 25 ++++++++++++++++++++++---
 drivers/pwm/pwm-lpss.h          |  2 ++
 2 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/drivers/pwm/pwm-lpss-platform.c b/drivers/pwm/pwm-lpss-platform.c
index 7304f36ee715a..b6edf8af26cc9 100644
--- a/drivers/pwm/pwm-lpss-platform.c
+++ b/drivers/pwm/pwm-lpss-platform.c
@@ -30,6 +30,7 @@ static const struct pwm_lpss_boardinfo pwm_lpss_bsw_info = {
 	.clk_rate = 19200000,
 	.npwm = 1,
 	.base_unit_bits = 16,
+	.check_power_on_resume = true,
 };
 
 /* Broxton */
@@ -74,9 +75,27 @@ static int pwm_lpss_remove_platform(struct platform_device *pdev)
 	return pwm_lpss_remove(lpwm);
 }
 
-static SIMPLE_DEV_PM_OPS(pwm_lpss_platform_pm_ops,
-			 pwm_lpss_suspend,
-			 pwm_lpss_resume);
+static void pwm_lpss_complete(struct device *dev)
+{
+	struct pwm_lpss_chip *lpwm = dev_get_drvdata(dev);
+	int ret, state;
+
+	/* The PWM may be turned on by AML code, update our state to match */
+	if (pm_runtime_suspended(dev) && lpwm->info->check_power_on_resume) {
+		pm_runtime_disable(dev);
+
+		ret = acpi_device_get_power(ACPI_COMPANION(dev), &state);
+		if (ret == 0 && state == ACPI_STATE_D0)
+			pm_runtime_set_active(dev);
+
+		pm_runtime_enable(dev);
+	}
+}
+
+static const struct dev_pm_ops pwm_lpss_platform_pm_ops = {
+	.complete = pwm_lpss_complete,
+	SET_SYSTEM_SLEEP_PM_OPS(pwm_lpss_suspend, pwm_lpss_resume)
+};
 
 static const struct acpi_device_id pwm_lpss_acpi_match[] = {
 	{ "80860F09", (unsigned long)&pwm_lpss_byt_info },
diff --git a/drivers/pwm/pwm-lpss.h b/drivers/pwm/pwm-lpss.h
index 8f029ed263af0..1a2575d25bead 100644
--- a/drivers/pwm/pwm-lpss.h
+++ b/drivers/pwm/pwm-lpss.h
@@ -30,6 +30,8 @@ struct pwm_lpss_boardinfo {
 	unsigned int npwm;
 	unsigned long base_unit_bits;
 	bool bypass;
+	/* Some devices have AML code messing with the state underneath us */
+	bool check_power_on_resume;
 };
 
 struct pwm_lpss_chip *pwm_lpss_probe(struct device *dev, struct resource *r,
-- 
GitLab


From 42885551cedb45961879d2fc3dc3c4dc545cc23e Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 12 Oct 2018 12:12:28 +0200
Subject: [PATCH 0044/1890] pwm: lpss: Release runtime-pm reference from the
 driver's remove callback

For each pwm output which gets enabled through pwm_lpss_apply(), we do a
pm_runtime_get_sync().

This commit adds pm_runtime_put() calls to pwm_lpss_remove() to balance
these when the driver gets removed with some of the outputs still enabled.

Fixes: f080be27d7d9 ("pwm: lpss: Add support for runtime PM")
Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-lpss.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/pwm/pwm-lpss.c b/drivers/pwm/pwm-lpss.c
index e602835fd6de4..723ca9de83256 100644
--- a/drivers/pwm/pwm-lpss.c
+++ b/drivers/pwm/pwm-lpss.c
@@ -205,6 +205,12 @@ EXPORT_SYMBOL_GPL(pwm_lpss_probe);
 
 int pwm_lpss_remove(struct pwm_lpss_chip *lpwm)
 {
+	int i;
+
+	for (i = 0; i < lpwm->info->npwm; i++) {
+		if (pwm_is_enabled(&lpwm->chip.pwms[i]))
+			pm_runtime_put(lpwm->chip.dev);
+	}
 	return pwmchip_remove(&lpwm->chip);
 }
 EXPORT_SYMBOL_GPL(pwm_lpss_remove);
-- 
GitLab


From 280fec4c3ad6701567d7d840506b9b1463a48b71 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 12 Oct 2018 12:12:29 +0200
Subject: [PATCH 0045/1890] pwm: lpss: Add get_state callback

Add a get_state callback so that the initial state correctly reflects
the actual hardware state.

Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-lpss.c | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/drivers/pwm/pwm-lpss.c b/drivers/pwm/pwm-lpss.c
index 723ca9de83256..ea93ef9f3672f 100644
--- a/drivers/pwm/pwm-lpss.c
+++ b/drivers/pwm/pwm-lpss.c
@@ -159,8 +159,42 @@ static int pwm_lpss_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 	return 0;
 }
 
+/* This function gets called once from pwmchip_add to get the initial state */
+static void pwm_lpss_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
+			       struct pwm_state *state)
+{
+	struct pwm_lpss_chip *lpwm = to_lpwm(chip);
+	unsigned long base_unit_range;
+	unsigned long long base_unit, freq, on_time_div;
+	u32 ctrl;
+
+	base_unit_range = BIT(lpwm->info->base_unit_bits);
+
+	ctrl = pwm_lpss_read(pwm);
+	on_time_div = 255 - (ctrl & PWM_ON_TIME_DIV_MASK);
+	base_unit = (ctrl >> PWM_BASE_UNIT_SHIFT) & (base_unit_range - 1);
+
+	freq = base_unit * lpwm->info->clk_rate;
+	do_div(freq, base_unit_range);
+	if (freq == 0)
+		state->period = NSEC_PER_SEC;
+	else
+		state->period = NSEC_PER_SEC / (unsigned long)freq;
+
+	on_time_div *= state->period;
+	do_div(on_time_div, 255);
+	state->duty_cycle = on_time_div;
+
+	state->polarity = PWM_POLARITY_NORMAL;
+	state->enabled = !!(ctrl & PWM_ENABLE);
+
+	if (state->enabled)
+		pm_runtime_get(chip->dev);
+}
+
 static const struct pwm_ops pwm_lpss_ops = {
 	.apply = pwm_lpss_apply,
+	.get_state = pwm_lpss_get_state,
 	.owner = THIS_MODULE,
 };
 
-- 
GitLab


From e4ab5172ea6be4d3a95411d7a7867f3c2be12754 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Wed, 22 Aug 2018 00:02:21 +0200
Subject: [PATCH 0046/1890] pwm: Use SPDX identifier for Renesas drivers

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-rcar.c        |  5 +----
 drivers/pwm/pwm-renesas-tpu.c | 10 +---------
 2 files changed, 2 insertions(+), 13 deletions(-)

diff --git a/drivers/pwm/pwm-rcar.c b/drivers/pwm/pwm-rcar.c
index 748f614d53755..a41812fc6f957 100644
--- a/drivers/pwm/pwm-rcar.c
+++ b/drivers/pwm/pwm-rcar.c
@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * R-Car PWM Timer driver
  *
  * Copyright (C) 2015 Renesas Electronics Corporation
- *
- * This is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
  */
 
 #include <linux/clk.h>
diff --git a/drivers/pwm/pwm-renesas-tpu.c b/drivers/pwm/pwm-renesas-tpu.c
index 29267d12fb4c9..4a855a21b782d 100644
--- a/drivers/pwm/pwm-renesas-tpu.c
+++ b/drivers/pwm/pwm-renesas-tpu.c
@@ -1,16 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * R-Mobile TPU PWM driver
  *
  * Copyright (C) 2012 Renesas Solutions Corp.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/clk.h>
-- 
GitLab


From 01e194bbd64a7c65af428561fa97653be8b74564 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Sat, 22 Sep 2018 22:43:24 +0300
Subject: [PATCH 0047/1890] dt-bindings: pwm: renesas: tpu: Fix "compatible"
 prop description

The "compatible" property description contradicts even the example given:
it only says that there must be a single value while the example has the
fallback value too -- which makes much more sense. Moreover, the generic
property value is misdocumented as being R-Car (and RZ/G1) specific...

Fixes: 382457e562bb ("pwm: renesas-tpu: Add DT support")
Fixes: 3ba111a01822 ("dt-bindings: pwm: renesas-tpu: Document r8a774[35] support")
Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.txt | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.txt b/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.txt
index d53a16715da6a..d276693cf3014 100644
--- a/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.txt
+++ b/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.txt
@@ -2,13 +2,14 @@
 
 Required Properties:
 
-  - compatible: should be one of the following.
+  - compatible: must contain one or more of the following:
     - "renesas,tpu-r8a73a4": for R8A73A4 (R-Mobile APE6) compatible PWM controller.
     - "renesas,tpu-r8a7740": for R8A7740 (R-Mobile A1) compatible PWM controller.
     - "renesas,tpu-r8a7743": for R8A7743 (RZ/G1M) compatible PWM controller.
     - "renesas,tpu-r8a7745": for R8A7745 (RZ/G1E) compatible PWM controller.
     - "renesas,tpu-r8a7790": for R8A7790 (R-Car H2) compatible PWM controller.
-    - "renesas,tpu": for generic R-Car and RZ/G1 TPU PWM controller.
+    - "renesas,tpu": for the generic TPU PWM controller; this is a fallback for
+		     the entries listed above.
 
   - reg: Base address and length of each memory resource used by the PWM
     controller hardware module.
-- 
GitLab


From 2360fc6a704e8dc1f8183a048980052bfe64388b Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Mon, 1 Oct 2018 22:57:39 +0300
Subject: [PATCH 0048/1890] dt-bindings: pwm: renesas: pwm-rcar: Document
 R8A779{7|8}0 bindings

Document the R-Car V3{M|H} (R8A779{7|8}0) SoC in the  Renesas R-Car PWM
bindings. R8A77970's hardware is a generic R-Car gen3 PWM, while R8A77980
has an extra error injection register...

Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.txt b/Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.txt
index e1ef6afbe3a74..e68a34e57ff60 100644
--- a/Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.txt
+++ b/Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.txt
@@ -12,6 +12,8 @@ Required Properties:
  - "renesas,pwm-r8a7795": for R-Car H3
  - "renesas,pwm-r8a7796": for R-Car M3-W
  - "renesas,pwm-r8a77965": for R-Car M3-N
+ - "renesas,pwm-r8a77970": for R-Car V3M
+ - "renesas,pwm-r8a77980": for R-Car V3H
  - "renesas,pwm-r8a77990": for R-Car E3
  - "renesas,pwm-r8a77995": for R-Car D3
 - reg: base address and length of the registers block for the PWM.
-- 
GitLab


From 136d822a9e4e947609d2fbf8d3fa5f15b8c20970 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Sat, 22 Sep 2018 22:57:29 +0300
Subject: [PATCH 0049/1890] dt-bindings: pwm: renesas: tpu: Document
 R8A779{7|8}0 bindings

Document the R-Car V3{M|H} (R8A779{7|8}0) SoC in the Renesas TPU
bindings; the TPU hardware in those is the Renesas standard 4-channel
timer pulse unit.

Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.txt | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.txt b/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.txt
index d276693cf3014..af5f4c7455973 100644
--- a/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.txt
+++ b/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.txt
@@ -8,6 +8,10 @@ Required Properties:
     - "renesas,tpu-r8a7743": for R8A7743 (RZ/G1M) compatible PWM controller.
     - "renesas,tpu-r8a7745": for R8A7745 (RZ/G1E) compatible PWM controller.
     - "renesas,tpu-r8a7790": for R8A7790 (R-Car H2) compatible PWM controller.
+    - "renesas,tpu-r8a77970": for R8A77970 (R-Car V3M) compatible PWM
+			      controller.
+    - "renesas,tpu-r8a77980": for R8A77980 (R-Car V3H) compatible PWM
+			      controller.
     - "renesas,tpu": for the generic TPU PWM controller; this is a fallback for
 		     the entries listed above.
 
-- 
GitLab


From a4675881ed71f73f5ea96afda6a1ddc1526fde4d Mon Sep 17 00:00:00 2001
From: Biju Das <biju.das@bp.renesas.com>
Date: Thu, 4 Oct 2018 17:17:19 +0100
Subject: [PATCH 0050/1890] dt-bindings: pwm: rcar: Add r8a7744 support

Document RZ/G1N (R8A7744) SoC bindings.

Signed-off-by: Biju Das <biju.das@bp.renesas.com>
Reviewed-by: Chris Paterson <Chris.Paterson2@renesas.com>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.txt b/Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.txt
index e68a34e57ff60..bdbf1e4070e2e 100644
--- a/Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.txt
+++ b/Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.txt
@@ -3,6 +3,7 @@
 Required Properties:
 - compatible: should be "renesas,pwm-rcar" and one of the following.
  - "renesas,pwm-r8a7743": for RZ/G1M
+ - "renesas,pwm-r8a7744": for RZ/G1N
  - "renesas,pwm-r8a7745": for RZ/G1E
  - "renesas,pwm-r8a7778": for R-Car M1A
  - "renesas,pwm-r8a7779": for R-Car H1
-- 
GitLab


From d5a7060022cd443cc6a054bc73383acac6a8f53d Mon Sep 17 00:00:00 2001
From: Biju Das <biju.das@bp.renesas.com>
Date: Thu, 4 Oct 2018 17:21:34 +0100
Subject: [PATCH 0051/1890] dt-bindings: pwm: renesas-tpu: Document r8a7744
 support

Document r8a7744 specific compatible strings. No driver change is
needed as the fallback compatible string "renesas,tpu" activates the
right code in the driver.

Signed-off-by: Biju Das <biju.das@bp.renesas.com>
Reviewed-by: Chris Paterson <Chris.Paterson2@renesas.com>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.txt b/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.txt
index af5f4c7455973..848a92b53d810 100644
--- a/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.txt
+++ b/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.txt
@@ -6,6 +6,7 @@ Required Properties:
     - "renesas,tpu-r8a73a4": for R8A73A4 (R-Mobile APE6) compatible PWM controller.
     - "renesas,tpu-r8a7740": for R8A7740 (R-Mobile A1) compatible PWM controller.
     - "renesas,tpu-r8a7743": for R8A7743 (RZ/G1M) compatible PWM controller.
+    - "renesas,tpu-r8a7744": for R8A7744 (RZ/G1N) compatible PWM controller.
     - "renesas,tpu-r8a7745": for R8A7745 (RZ/G1E) compatible PWM controller.
     - "renesas,tpu-r8a7790": for R8A7790 (R-Car H2) compatible PWM controller.
     - "renesas,tpu-r8a77970": for R8A77970 (R-Car V3M) compatible PWM
-- 
GitLab


From c289d6625237aa785b484b4e94c23b3b91ea7e60 Mon Sep 17 00:00:00 2001
From: Fabrice Gasnier <fabrice.gasnier@st.com>
Date: Mon, 1 Oct 2018 15:23:56 +0200
Subject: [PATCH 0052/1890] Revert "pwm: Set class for exported channels in
 sysfs"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit 7e5d1fd75c3dde9fc10c4472b9368089d1b81d00 ("pwm: Set
class for exported channels in sysfs") as it causes regression with
multiple pwm chip[1], when exporting a pwm channel (echo X > export):

- ABI (Documentation/ABI/testing/sysfs-class-pwm) states pwmX should be
  created in /sys/class/pwm/pwmchipN/pwmX
- Reverted patch causes new entry to be also created directly in
  /sys/class/pwm/pwmX
- 1st time, exporting pwmX will create an entry in /sys/class/pwm/pwmX
- class attributes are added under pwmX folder, such as export, unexport
  npwm, symlinks. This is wrong as it belongs to pwmchipN. It may cause
  bad behavior and report wrong values.
- when another export happens on another pwmchip, it can't be created
  (e.g. -EEXIST). This is causing the issue with multiple pwmchip.

Example on stm32 (stm32429i-eval) platform:
$ ls /sys/class/pwm
pwmchip0 pwmchip4

$ cd /sys/class/pwm/pwmchip0/
$ echo 0 > export
$ ls /sys/class/pwm
pwm0 pwmchip0 pwmchip4

$ cd /sys/class/pwm/pwmchip4/
$ echo 0 > export
sysfs: cannot create duplicate filename '/class/pwm/pwm0'
...Exception stack follows...

This is also seen on other platform [2]

[1] https://lkml.org/lkml/2018/9/25/713
[2] https://lkml.org/lkml/2018/9/25/447

Signed-off-by: Fabrice Gasnier <fabrice.gasnier@st.com>
Tested-by: Gottfried Haider <gottfried.haider@gmail.com>
Tested-by: Michal VokÃ¡Ä <michal.vokac@ysoft.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/sysfs.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/pwm/sysfs.c b/drivers/pwm/sysfs.c
index 7c71cdb8a9d8f..4726d43aaa0cb 100644
--- a/drivers/pwm/sysfs.c
+++ b/drivers/pwm/sysfs.c
@@ -263,7 +263,6 @@ static int pwm_export_child(struct device *parent, struct pwm_device *pwm)
 	export->pwm = pwm;
 	mutex_init(&export->lock);
 
-	export->child.class = parent->class;
 	export->child.release = pwm_export_release;
 	export->child.parent = parent;
 	export->child.devt = MKDEV(0, 0);
-- 
GitLab


From 552c02e3e7cfe2744b59de285aaea70021ae95c9 Mon Sep 17 00:00:00 2001
From: Fabrice Gasnier <fabrice.gasnier@st.com>
Date: Mon, 1 Oct 2018 15:23:57 +0200
Subject: [PATCH 0053/1890] pwm: Send a uevent on the pwmchip device upon
 channel sysfs (un)export
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch sends a uevent (KOBJ_CHANGE) on the pwmchipN device,
everytime a pwmX channel has been exported/unexported via sysfs. This
allows udev to implement rules on such events, like:

SUBSYSTEM=="pwm*", PROGRAM="/bin/sh -c '\
        chown -R root:gpio /sys/class/pwm && chmod -R 770 /sys/class/pwm;\
        chown -R root:gpio
/sys/devices/platform/soc/*.pwm/pwm/pwmchip* && chmod -R 770
/sys/devices/platform/soc/*.pwm/pwm/pwmchip*\
'"

This is a replacement patch for commit 7e5d1fd75c3d ("pwm: Set class for
exported channels in sysfs"), see [1].

basic testing:
$ udevadm monitor --environment &
$ echo 0 > /sys/class/pwm/pwmchip0/export
KERNEL[197.321736] change   /devices/.../pwm/pwmchip0 (pwm)
ACTION=change
DEVPATH=/devices/.../pwm/pwmchip0
EXPORT=pwm0
SEQNUM=2045
SUBSYSTEM=pwm

[1] https://lkml.org/lkml/2018/9/25/713

Signed-off-by: Fabrice Gasnier <fabrice.gasnier@st.com>
Tested-by: Gottfried Haider <gottfried.haider@gmail.com>
Tested-by: Michal VokÃ¡Ä <michal.vokac@ysoft.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/sysfs.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/pwm/sysfs.c b/drivers/pwm/sysfs.c
index 4726d43aaa0cb..ceb233dd60484 100644
--- a/drivers/pwm/sysfs.c
+++ b/drivers/pwm/sysfs.c
@@ -249,6 +249,7 @@ static void pwm_export_release(struct device *child)
 static int pwm_export_child(struct device *parent, struct pwm_device *pwm)
 {
 	struct pwm_export *export;
+	char *pwm_prop[2];
 	int ret;
 
 	if (test_and_set_bit(PWMF_EXPORTED, &pwm->flags))
@@ -276,6 +277,10 @@ static int pwm_export_child(struct device *parent, struct pwm_device *pwm)
 		export = NULL;
 		return ret;
 	}
+	pwm_prop[0] = kasprintf(GFP_KERNEL, "EXPORT=pwm%u", pwm->hwpwm);
+	pwm_prop[1] = NULL;
+	kobject_uevent_env(&parent->kobj, KOBJ_CHANGE, pwm_prop);
+	kfree(pwm_prop[0]);
 
 	return 0;
 }
@@ -288,6 +293,7 @@ static int pwm_unexport_match(struct device *child, void *data)
 static int pwm_unexport_child(struct device *parent, struct pwm_device *pwm)
 {
 	struct device *child;
+	char *pwm_prop[2];
 
 	if (!test_and_clear_bit(PWMF_EXPORTED, &pwm->flags))
 		return -ENODEV;
@@ -296,6 +302,11 @@ static int pwm_unexport_child(struct device *parent, struct pwm_device *pwm)
 	if (!child)
 		return -ENODEV;
 
+	pwm_prop[0] = kasprintf(GFP_KERNEL, "UNEXPORT=pwm%u", pwm->hwpwm);
+	pwm_prop[1] = NULL;
+	kobject_uevent_env(&parent->kobj, KOBJ_CHANGE, pwm_prop);
+	kfree(pwm_prop[0]);
+
 	/* for device_find_child() */
 	put_device(child);
 	device_unregister(child);
-- 
GitLab


From 786e0cfa9d6fc1cc8856c1864c988d233649348c Mon Sep 17 00:00:00 2001
From: Fabrizio Castro <fabrizio.castro@bp.renesas.com>
Date: Tue, 21 Aug 2018 17:03:46 +0100
Subject: [PATCH 0054/1890] dt-bindings: pwm: rcar: Add r8a774a1 support

Document RZ/G2M (R8A774A1) SoC bindings.

Signed-off-by: Fabrizio Castro <fabrizio.castro@bp.renesas.com>
Reviewed-by: Biju Das <biju.das@bp.renesas.com>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.txt b/Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.txt
index bdbf1e4070e2e..7f31fe7e20934 100644
--- a/Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.txt
+++ b/Documentation/devicetree/bindings/pwm/renesas,pwm-rcar.txt
@@ -5,6 +5,7 @@ Required Properties:
  - "renesas,pwm-r8a7743": for RZ/G1M
  - "renesas,pwm-r8a7744": for RZ/G1N
  - "renesas,pwm-r8a7745": for RZ/G1E
+ - "renesas,pwm-r8a774a1": for RZ/G2M
  - "renesas,pwm-r8a7778": for R-Car M1A
  - "renesas,pwm-r8a7779": for R-Car H1
  - "renesas,pwm-r8a7790": for R-Car H2
-- 
GitLab


From e1af4779617928efa84562de4de5dc071e7deb08 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 5 Oct 2018 18:11:47 +0200
Subject: [PATCH 0055/1890] apparmor: add #ifdef checks for secmark filtering

The newly added code fails to build when either SECMARK or
NETFILTER are disabled:

security/apparmor/lsm.c: In function 'apparmor_socket_sock_rcv_skb':
security/apparmor/lsm.c:1138:12: error: 'struct sk_buff' has no member named 'secmark'; did you mean 'mark'?

security/apparmor/lsm.c:1671:21: error: 'struct nf_hook_state' declared inside parameter list will not be visible outside of this definition or declaration [-Werror]

Add a set of #ifdef checks around it to only enable the code that
we can compile and that makes sense in that configuration.

Fixes: ab9f2115081a ("apparmor: Allow filtering based on secmark policy")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/lsm.c | 10 ++++++++++
 security/apparmor/net.c |  2 ++
 2 files changed, 12 insertions(+)

diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index d08aac05c65a3..656a143ce8fe2 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -1022,6 +1022,7 @@ static int apparmor_socket_shutdown(struct socket *sock, int how)
 	return aa_sock_perm(OP_SHUTDOWN, AA_MAY_SHUTDOWN, sock);
 }
 
+#ifdef CONFIG_NETWORK_SECMARK
 /**
  * apparmor_socket_sock_recv_skb - check perms before associating skb to sk
  *
@@ -1040,6 +1041,7 @@ static int apparmor_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	return apparmor_secmark_check(ctx->label, OP_RECVMSG, AA_MAY_RECEIVE,
 				      skb->secmark, sk);
 }
+#endif
 
 
 static struct aa_label *sk_peer_label(struct sock *sk)
@@ -1134,6 +1136,7 @@ static void apparmor_sock_graft(struct sock *sk, struct socket *parent)
 		ctx->label = aa_get_current_label();
 }
 
+#ifdef CONFIG_NETWORK_SECMARK
 static int apparmor_inet_conn_request(struct sock *sk, struct sk_buff *skb,
 				      struct request_sock *req)
 {
@@ -1145,6 +1148,7 @@ static int apparmor_inet_conn_request(struct sock *sk, struct sk_buff *skb,
 	return apparmor_secmark_check(ctx->label, OP_CONNECT, AA_MAY_CONNECT,
 				      skb->secmark, sk);
 }
+#endif
 
 static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = {
 	LSM_HOOK_INIT(ptrace_access_check, apparmor_ptrace_access_check),
@@ -1197,13 +1201,17 @@ static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = {
 	LSM_HOOK_INIT(socket_getsockopt, apparmor_socket_getsockopt),
 	LSM_HOOK_INIT(socket_setsockopt, apparmor_socket_setsockopt),
 	LSM_HOOK_INIT(socket_shutdown, apparmor_socket_shutdown),
+#ifdef CONFIG_NETWORK_SECMARK
 	LSM_HOOK_INIT(socket_sock_rcv_skb, apparmor_socket_sock_rcv_skb),
+#endif
 	LSM_HOOK_INIT(socket_getpeersec_stream,
 		      apparmor_socket_getpeersec_stream),
 	LSM_HOOK_INIT(socket_getpeersec_dgram,
 		      apparmor_socket_getpeersec_dgram),
 	LSM_HOOK_INIT(sock_graft, apparmor_sock_graft),
+#ifdef CONFIG_NETWORK_SECMARK
 	LSM_HOOK_INIT(inet_conn_request, apparmor_inet_conn_request),
+#endif
 
 	LSM_HOOK_INIT(cred_alloc_blank, apparmor_cred_alloc_blank),
 	LSM_HOOK_INIT(cred_free, apparmor_cred_free),
@@ -1559,6 +1567,7 @@ static inline int apparmor_init_sysctl(void)
 }
 #endif /* CONFIG_SYSCTL */
 
+#if defined(CONFIG_NETFILTER) && defined(CONFIG_NETWORK_SECMARK)
 static unsigned int apparmor_ip_postroute(void *priv,
 					  struct sk_buff *skb,
 					  const struct nf_hook_state *state)
@@ -1647,6 +1656,7 @@ static int __init apparmor_nf_ip_init(void)
 	return 0;
 }
 __initcall(apparmor_nf_ip_init);
+#endif
 
 static int __init apparmor_init(void)
 {
diff --git a/security/apparmor/net.c b/security/apparmor/net.c
index f9a678ce994f9..c07fde444792d 100644
--- a/security/apparmor/net.c
+++ b/security/apparmor/net.c
@@ -190,6 +190,7 @@ int aa_sock_file_perm(struct aa_label *label, const char *op, u32 request,
 	return aa_label_sk_perm(label, op, request, sock->sk);
 }
 
+#ifdef CONFIG_NETWORK_SECMARK
 static int apparmor_secmark_init(struct aa_secmark *secmark)
 {
 	struct aa_label *label;
@@ -254,3 +255,4 @@ int apparmor_secmark_check(struct aa_label *label, char *op, u32 request,
 				    aa_secmark_perm(profile, request, secid,
 						    &sa, sk));
 }
+#endif
-- 
GitLab


From 995f608e7a349c837d6c0b3ffa7d1a94d01f7203 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 12 Oct 2018 22:46:50 -0400
Subject: [PATCH 0056/1890] ntfs: don't open-code ERR_CAST

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ntfs/namei.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c
index 4690cd75d8d79..3986c7a1f6a88 100644
--- a/fs/ntfs/namei.c
+++ b/fs/ntfs/namei.c
@@ -312,7 +312,7 @@ static struct dentry *ntfs_get_parent(struct dentry *child_dent)
 	/* Get the mft record of the inode belonging to the child dentry. */
 	mrec = map_mft_record(ni);
 	if (IS_ERR(mrec))
-		return (struct dentry *)mrec;
+		return ERR_CAST(mrec);
 	/* Find the first file name attribute in the mft record. */
 	ctx = ntfs_attr_get_search_ctx(ni, mrec);
 	if (unlikely(!ctx)) {
-- 
GitLab


From 202dc3cc10b4d37e5251431acf8d5040a8876c7d Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 9 Oct 2018 19:41:58 +0200
Subject: [PATCH 0057/1890] serial: sh-sci: Fix receive on SCIFA/SCIFB variants
 with DMA

On SCIFA and SCIFB serial ports with DMA support (i.e. some ports on
R-Car Gen2 and RZ/G1 SoCs), receive DMA operations are submitted before
the DMA channel pointer is initialized.  Hence this fails, and the
driver tries to fall back to PIO.  However, at this early phase in the
initialization sequence, fallback to PIO does not work, leading to a
serial port that cannot receive any data.

Fix this by calling sci_submit_rx() after initialization of the DMA
channel pointer.

Reported-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Fixes: 2c4ee23530ffc022 ("serial: sh-sci: Postpone DMA release when falling back to PIO")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Tested-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/sh-sci.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index ab3f6e91853da..e19bfbba8a019 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -1614,10 +1614,10 @@ static void sci_request_dma(struct uart_port *port)
 		hrtimer_init(&s->rx_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 		s->rx_timer.function = rx_timer_fn;
 
+		s->chan_rx_saved = s->chan_rx = chan;
+
 		if (port->type == PORT_SCIFA || port->type == PORT_SCIFB)
 			sci_submit_rx(s);
-
-		s->chan_rx_saved = s->chan_rx = chan;
 	}
 }
 
-- 
GitLab


From fe278d1a959283c185ceef0703d8570bf907b95d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 14 Oct 2018 20:13:04 +0200
Subject: [PATCH 0058/1890] xtensa: remove ZONE_DMA

ZONE_DMA is intended for magic < 32-bit pools (usually ISA DMA), which
isn't required on xtensa.  Move all the non-highmem memory into
ZONE_NORMAL instead to match other architectures.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/Kconfig   | 3 ---
 arch/xtensa/mm/init.c | 2 +-
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index b9ad83a0ee5db..00b9a734048b0 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -1,7 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
-config ZONE_DMA
-	def_bool y
-
 config XTENSA
 	def_bool y
 	select ARCH_HAS_SG_CHAIN
diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c
index 34aead7dcb487..b385e6b73065f 100644
--- a/arch/xtensa/mm/init.c
+++ b/arch/xtensa/mm/init.c
@@ -71,7 +71,7 @@ void __init zones_init(void)
 {
 	/* All pages are DMA-able, so we put them all in the DMA zone. */
 	unsigned long zones_size[MAX_NR_ZONES] = {
-		[ZONE_DMA] = max_low_pfn - ARCH_PFN_OFFSET,
+		[ZONE_NORMAL] = max_low_pfn - ARCH_PFN_OFFSET,
 #ifdef CONFIG_HIGHMEM
 		[ZONE_HIGHMEM] = max_pfn - max_low_pfn,
 #endif
-- 
GitLab


From 3c413e7e39224d171ef697277cd4629b59103101 Mon Sep 17 00:00:00 2001
From: Vignesh R <vigneshr@ti.com>
Date: Tue, 16 Oct 2018 11:34:01 +0530
Subject: [PATCH 0059/1890] dt-bindings: pwm: tiecap: Add TI AM654 SoC specific
 compatible

Add a new compatible string "ti,am654-ecap" to support PWM ECAP IP of
TI AM654 SoC.

Signed-off-by: Vignesh R <vigneshr@ti.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 Documentation/devicetree/bindings/pwm/pwm-tiecap.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/pwm/pwm-tiecap.txt b/Documentation/devicetree/bindings/pwm/pwm-tiecap.txt
index 06a363d9ccef9..b9a1d7402128b 100644
--- a/Documentation/devicetree/bindings/pwm/pwm-tiecap.txt
+++ b/Documentation/devicetree/bindings/pwm/pwm-tiecap.txt
@@ -7,6 +7,7 @@ Required properties:
   for da850  - compatible = "ti,da850-ecap", "ti,am3352-ecap", "ti,am33xx-ecap";
   for dra746 - compatible = "ti,dra746-ecap", "ti,am3352-ecap";
   for 66ak2g - compatible = "ti,k2g-ecap", "ti,am3352-ecap";
+  for am654  - compatible = "ti,am654-ecap", "ti,am3352-ecap";
 - #pwm-cells: should be 3. See pwm.txt in this directory for a description of
   the cells format. The PWM channel index ranges from 0 to 4. The only third
   cell flag supported by this binding is PWM_POLARITY_INVERTED.
-- 
GitLab


From 6f37709fb0a61f09ca96348044c5bb13e333e518 Mon Sep 17 00:00:00 2001
From: Vignesh R <vigneshr@ti.com>
Date: Tue, 16 Oct 2018 11:34:02 +0530
Subject: [PATCH 0060/1890] pwm: Enable TI ECAP driver for ARCH_K3

K3 devices have the same ECAP IP as OMAP SoCs. Enable driver to be built
for K3 devices. Also, drop reference to AM33xx in help text, as IP is
found on multiple TI SoCs.

Signed-off-by: Vignesh R <vigneshr@ti.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/Kconfig | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig
index 504d252716f2e..27e5dd47a01f9 100644
--- a/drivers/pwm/Kconfig
+++ b/drivers/pwm/Kconfig
@@ -447,10 +447,9 @@ config PWM_TEGRA
 
 config  PWM_TIECAP
 	tristate "ECAP PWM support"
-	depends on ARCH_OMAP2PLUS || ARCH_DAVINCI_DA8XX || ARCH_KEYSTONE
+	depends on ARCH_OMAP2PLUS || ARCH_DAVINCI_DA8XX || ARCH_KEYSTONE || ARCH_K3
 	help
-	  PWM driver support for the ECAP APWM controller found on AM33XX
-	  TI SOC
+	  PWM driver support for the ECAP APWM controller found on TI SOCs
 
 	  To compile this driver as a module, choose M here: the module
 	  will be called pwm-tiecap.
-- 
GitLab


From 4743765babb278a7d399df5733fc8a6b6bbedf3e Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sun, 14 Oct 2018 17:12:01 +0200
Subject: [PATCH 0061/1890] pwm: lpss: Force runtime-resume on suspend on
 Cherry Trail

On Cherry Trail devices under Windows the PWM controller used for the
backlight is considered part of the GPU even though it is part of the LPSS
block and thus is an entirely different independent hardware unit.

Because of this on Cherry Trail the GPU's (GFX0 ACPI node) _PS3 and _PS0
methods save and restore the PWM controller registers.

If userspace blanks the screen before suspending, such as e.g. GNOME
does, then the PWM controller will be runtime-suspended when the suspend
starts. This causes the GFX0 _PS? methods to save a value of 0xffffffff
for the PWM control register and to restore this value on resume.

0xffffffff is not a valid value for the register and writing this causes
problems such as e.g. a flickering backlight.

This commit adds a prepare method to the dev_pm_ops and makes it return 0
on Cherry Trail devices forcing a runtime-resume before other device's
suspend methods run. This fixes the reading and writing back of 0xffffffff.

Since we now always runtime-resume the device on suspend, it will be
resumed on resume too and we no longer need to check for the GFX0 _PS0
method having resumed it underneath us, so this commit removes the now no
longer necessary complete dev_pm_op.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-lpss-platform.c | 24 +++++++++++-------------
 drivers/pwm/pwm-lpss.h          |  7 +++++--
 2 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/drivers/pwm/pwm-lpss-platform.c b/drivers/pwm/pwm-lpss-platform.c
index b6edf8af26cc9..757230e1f575e 100644
--- a/drivers/pwm/pwm-lpss-platform.c
+++ b/drivers/pwm/pwm-lpss-platform.c
@@ -30,7 +30,7 @@ static const struct pwm_lpss_boardinfo pwm_lpss_bsw_info = {
 	.clk_rate = 19200000,
 	.npwm = 1,
 	.base_unit_bits = 16,
-	.check_power_on_resume = true,
+	.other_devices_aml_touches_pwm_regs = true,
 };
 
 /* Broxton */
@@ -61,6 +61,7 @@ static int pwm_lpss_probe_platform(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, lpwm);
 
+	dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_SMART_PREPARE);
 	pm_runtime_set_active(&pdev->dev);
 	pm_runtime_enable(&pdev->dev);
 
@@ -75,25 +76,22 @@ static int pwm_lpss_remove_platform(struct platform_device *pdev)
 	return pwm_lpss_remove(lpwm);
 }
 
-static void pwm_lpss_complete(struct device *dev)
+static int pwm_lpss_prepare(struct device *dev)
 {
 	struct pwm_lpss_chip *lpwm = dev_get_drvdata(dev);
-	int ret, state;
 
-	/* The PWM may be turned on by AML code, update our state to match */
-	if (pm_runtime_suspended(dev) && lpwm->info->check_power_on_resume) {
-		pm_runtime_disable(dev);
+	/*
+	 * If other device's AML code touches the PWM regs on suspend/resume
+	 * force runtime-resume the PWM controller to allow this.
+	 */
+	if (lpwm->info->other_devices_aml_touches_pwm_regs)
+		return 0; /* Force runtime-resume */
 
-		ret = acpi_device_get_power(ACPI_COMPANION(dev), &state);
-		if (ret == 0 && state == ACPI_STATE_D0)
-			pm_runtime_set_active(dev);
-
-		pm_runtime_enable(dev);
-	}
+	return 1; /* If runtime-suspended leave as is */
 }
 
 static const struct dev_pm_ops pwm_lpss_platform_pm_ops = {
-	.complete = pwm_lpss_complete,
+	.prepare = pwm_lpss_prepare,
 	SET_SYSTEM_SLEEP_PM_OPS(pwm_lpss_suspend, pwm_lpss_resume)
 };
 
diff --git a/drivers/pwm/pwm-lpss.h b/drivers/pwm/pwm-lpss.h
index 1a2575d25bead..3236be835bd9c 100644
--- a/drivers/pwm/pwm-lpss.h
+++ b/drivers/pwm/pwm-lpss.h
@@ -30,8 +30,11 @@ struct pwm_lpss_boardinfo {
 	unsigned int npwm;
 	unsigned long base_unit_bits;
 	bool bypass;
-	/* Some devices have AML code messing with the state underneath us */
-	bool check_power_on_resume;
+	/*
+	 * On some devices the _PS0/_PS3 AML code of the GPU (GFX0) device
+	 * messes with the PWM0 controllers state,
+	 */
+	bool other_devices_aml_touches_pwm_regs;
 };
 
 struct pwm_lpss_chip *pwm_lpss_probe(struct device *dev, struct resource *r,
-- 
GitLab


From 2153bbc12f77fb2203276befc0f0dddbfb023bb1 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sun, 14 Oct 2018 17:12:02 +0200
Subject: [PATCH 0062/1890] pwm: lpss: Only set update bit if we are actually
 changing the settings

According to the datasheet the update bit must be set if the on-time-div
or the base-unit changes.

Now that we properly order device resume on Cherry Trail so that the GFX0
_PS0 method no longer exits with an error, we end up with a sequence of
events where we are writing the same values twice in a row.

First the _PS0 method restores the duty cycle of 0% the GPU driver set
on suspend and then the GPU driver first updates just the enabled bit in
the pwm_state from 0 to 1, causing us to write the same values again,
before restoring the pre-suspend duty-cycle in a separate pwm_apply call.

When writing the update bit the second time, without changing any of
the values the update bit clears immediately / instantly, instead of
staying 1 for a while as usual. After this the next setting of the update
bit seems to be ignored, causing the restoring of the pre-suspend
duty-cycle to not get applied. This makes the backlight come up with
a 0% dutycycle after suspend/resume.

Any further brightness changes after this do work.

This commit moves the setting of the update bit into pwm_lpss_prepare()
and only sets the bit if we have actually changed any of the values.

This avoids the setting of the update bit the second time we configure
the PWM to 0% dutycycle, this fixes the backlight coming up with 0%
duty-cycle after a suspend/resume.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-lpss.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/pwm/pwm-lpss.c b/drivers/pwm/pwm-lpss.c
index ea93ef9f3672f..2ac3a2aa9e53f 100644
--- a/drivers/pwm/pwm-lpss.c
+++ b/drivers/pwm/pwm-lpss.c
@@ -88,7 +88,7 @@ static void pwm_lpss_prepare(struct pwm_lpss_chip *lpwm, struct pwm_device *pwm,
 	unsigned long long on_time_div;
 	unsigned long c = lpwm->info->clk_rate, base_unit_range;
 	unsigned long long base_unit, freq = NSEC_PER_SEC;
-	u32 ctrl;
+	u32 orig_ctrl, ctrl;
 
 	do_div(freq, period_ns);
 
@@ -105,13 +105,17 @@ static void pwm_lpss_prepare(struct pwm_lpss_chip *lpwm, struct pwm_device *pwm,
 	do_div(on_time_div, period_ns);
 	on_time_div = 255ULL - on_time_div;
 
-	ctrl = pwm_lpss_read(pwm);
+	orig_ctrl = ctrl = pwm_lpss_read(pwm);
 	ctrl &= ~PWM_ON_TIME_DIV_MASK;
 	ctrl &= ~(base_unit_range << PWM_BASE_UNIT_SHIFT);
 	base_unit &= base_unit_range;
 	ctrl |= (u32) base_unit << PWM_BASE_UNIT_SHIFT;
 	ctrl |= on_time_div;
-	pwm_lpss_write(pwm, ctrl);
+
+	if (orig_ctrl != ctrl) {
+		pwm_lpss_write(pwm, ctrl);
+		pwm_lpss_write(pwm, ctrl | PWM_SW_UPDATE);
+	}
 }
 
 static inline void pwm_lpss_cond_enable(struct pwm_device *pwm, bool cond)
@@ -135,7 +139,6 @@ static int pwm_lpss_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 				return ret;
 			}
 			pwm_lpss_prepare(lpwm, pwm, state->duty_cycle, state->period);
-			pwm_lpss_write(pwm, pwm_lpss_read(pwm) | PWM_SW_UPDATE);
 			pwm_lpss_cond_enable(pwm, lpwm->info->bypass == false);
 			ret = pwm_lpss_wait_for_update(pwm);
 			if (ret) {
@@ -148,7 +151,6 @@ static int pwm_lpss_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 			if (ret)
 				return ret;
 			pwm_lpss_prepare(lpwm, pwm, state->duty_cycle, state->period);
-			pwm_lpss_write(pwm, pwm_lpss_read(pwm) | PWM_SW_UPDATE);
 			return pwm_lpss_wait_for_update(pwm);
 		}
 	} else if (pwm_is_enabled(pwm)) {
-- 
GitLab


From 5de4480ae7f8f1969065aa88be98111e36075bb0 Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mfasheh@suse.de>
Date: Mon, 10 Sep 2018 16:21:17 -0700
Subject: [PATCH 0063/1890] vfs: allow dedupe of user owned read-only files

The permission check in vfs_dedupe_file_range_one() is too coarse - We only
allow dedupe of the destination file if the user is root, or they have the
file open for write.

This effectively limits a non-root user from deduping their own read-only
files. In addition, the write file descriptor that the user is forced to
hold open can prevent execution of files. As file data during a dedupe
does not change, the behavior is unexpected and this has caused a number of
issue reports. For an example, see:

https://github.com/markfasheh/duperemove/issues/129

So change the check so we allow dedupe on the target if:

- the root or admin is asking for it
- the process has write access
- the owner of the file is asking for the dedupe
- the process could get write access

That way users can open read-only and still get dedupe.

Signed-off-by: Mark Fasheh <mfasheh@suse.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/read_write.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/fs/read_write.c b/fs/read_write.c
index 39b4a21dd9337..be0e8723a0494 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1964,6 +1964,20 @@ int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
 }
 EXPORT_SYMBOL(vfs_dedupe_file_range_compare);
 
+/* Check whether we are allowed to dedupe the destination file */
+static bool allow_file_dedupe(struct file *file)
+{
+	if (capable(CAP_SYS_ADMIN))
+		return true;
+	if (file->f_mode & FMODE_WRITE)
+		return true;
+	if (uid_eq(current_fsuid(), file_inode(file)->i_uid))
+		return true;
+	if (!inode_permission(file_inode(file), MAY_WRITE))
+		return true;
+	return false;
+}
+
 int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
 			      struct file *dst_file, loff_t dst_pos, u64 len)
 {
@@ -1978,7 +1992,7 @@ int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
 		goto out_drop_write;
 
 	ret = -EINVAL;
-	if (!(capable(CAP_SYS_ADMIN) || (dst_file->f_mode & FMODE_WRITE)))
+	if (!allow_file_dedupe(dst_file))
 		goto out_drop_write;
 
 	ret = -EXDEV;
-- 
GitLab


From 85c95f208f481ab3c34c4622f508272cd4803afd Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mfasheh@suse.de>
Date: Mon, 10 Sep 2018 16:21:18 -0700
Subject: [PATCH 0064/1890] vfs: dedupe should return EPERM if permission is
 not granted

Right now we return EINVAL if a process does not have permission to dedupe a
file. This was an oversight on my part. EPERM gives a true description of
the nature of our error, and EINVAL is already used for the case that the
filesystem does not support dedupe.

Signed-off-by: Mark Fasheh <mfasheh@suse.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Acked-by: David Sterba <dsterba@suse.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/read_write.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/read_write.c b/fs/read_write.c
index be0e8723a0494..c734bc2880a5d 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1991,7 +1991,7 @@ int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
 	if (ret < 0)
 		goto out_drop_write;
 
-	ret = -EINVAL;
+	ret = -EPERM;
 	if (!allow_file_dedupe(dst_file))
 		goto out_drop_write;
 
-- 
GitLab


From d65b1f20292425b798ec74e313a7ad3f11b8af0d Mon Sep 17 00:00:00 2001
From: Yue Haibing <yuehaibing@huawei.com>
Date: Thu, 6 Sep 2018 01:54:07 +0000
Subject: [PATCH 0065/1890] iomap: remove duplicated include from iomap.c

Remove duplicated include.

Signed-off-by: Yue Haibing <yuehaibing@huawei.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/iomap.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/iomap.c b/fs/iomap.c
index 74762b1ec233f..ef26c2225c931 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -30,7 +30,6 @@
 #include <linux/task_io_accounting_ops.h>
 #include <linux/dax.h>
 #include <linux/sched/signal.h>
-#include <linux/swap.h>
 
 #include "internal.h"
 
-- 
GitLab


From 55338ac2a9839557516b00661e6a05daf996fda0 Mon Sep 17 00:00:00 2001
From: nixiaoming <nixiaoming@huawei.com>
Date: Sun, 22 Jul 2018 16:37:08 +0800
Subject: [PATCH 0066/1890] Delete invalid assignment statements in do_sendfile

Assigning value -EINVAL to "retval" here, but that stored value is
overwritten before it can be used.

retval = -EINVAL;
....
retval = rw_verify_area(WRITE, out.file, &out_pos, count);

value_overwrite: Overwriting previous write to "retval" with value
from rw_verify_area

delete invalid assignment statements

Signed-off-by: n00202754 <nixiaoming@huawei.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/read_write.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/read_write.c b/fs/read_write.c
index c734bc2880a5d..7f79b1fc490ec 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1407,7 +1407,6 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
 		goto fput_in;
 	if (!(out.file->f_mode & FMODE_WRITE))
 		goto fput_out;
-	retval = -EINVAL;
 	in_inode = file_inode(in.file);
 	out_inode = file_inode(out.file);
 	out_pos = out.file->f_pos;
-- 
GitLab


From 515f1867addaba49c1c6ac73abfaffbc192c1db4 Mon Sep 17 00:00:00 2001
From: Chengguang Xu <cgxu519@gmx.com>
Date: Wed, 13 Jun 2018 12:05:13 +0800
Subject: [PATCH 0067/1890] fs/exofs: fix potential memory leak in mount option
 parsing

There are some cases can cause memory leak when parsing
option 'osdname'.

Signed-off-by: Chengguang Xu <cgxu519@gmx.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/exofs/super.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 41cf2fbee50da..7d61e3fa378c2 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -101,6 +101,7 @@ static int parse_options(char *options, struct exofs_mountopt *opts)
 		token = match_token(p, tokens, args);
 		switch (token) {
 		case Opt_name:
+			kfree(opts->dev_name);
 			opts->dev_name = match_strdup(&args[0]);
 			if (unlikely(!opts->dev_name)) {
 				EXOFS_ERR("Error allocating dev_name");
@@ -866,8 +867,10 @@ static struct dentry *exofs_mount(struct file_system_type *type,
 	int ret;
 
 	ret = parse_options(data, &opts);
-	if (ret)
+	if (ret) {
+		kfree(opts.dev_name);
 		return ERR_PTR(ret);
+	}
 
 	if (!opts.dev_name)
 		opts.dev_name = dev_name;
-- 
GitLab


From 3642b29a63674020401e41185ed5b0e3c056ab4a Mon Sep 17 00:00:00 2001
From: Chengguang Xu <cgxu519@gmx.com>
Date: Wed, 13 Jun 2018 12:05:14 +0800
Subject: [PATCH 0068/1890] fs/exofs: only use true/false for asignment of bool
 type variable

Signed-off-by: Chengguang Xu <cgxu519@gmx.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/exofs/super.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 7d61e3fa378c2..906839a4da8ff 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -118,7 +118,7 @@ static int parse_options(char *options, struct exofs_mountopt *opts)
 					  EXOFS_MIN_PID);
 				return -EINVAL;
 			}
-			s_pid = 1;
+			s_pid = true;
 			break;
 		case Opt_to:
 			if (match_int(&args[0], &option))
-- 
GitLab


From cf7905165fee15fe26ac4a91ee0a35b966249430 Mon Sep 17 00:00:00 2001
From: Alexey Budankov <alexey.budankov@linux.intel.com>
Date: Tue, 9 Oct 2018 17:36:24 +0300
Subject: [PATCH 0069/1890] perf record: Encode -k clockid frequency into Perf
 trace

Store -k clockid frequency into Perf trace to enable timestamps
derived metrics conversion into wall clock time on reporting stage.

Below is the example of perf report output:

  tools/perf/perf record -k raw -- ../../matrix/linux/matrix.gcc
  ...
  [ perf record: Captured and wrote 31.222 MB perf.data (818054 samples) ]

  tools/perf/perf report --header
  # ========
  ...
  # event : name = cycles:ppp, , size = 112, { sample_period, sample_freq } = 4000, sample_type = IP|TID|TIME|PERIOD, disabled = 1, inherit = 1, mmap = 1, comm = 1, freq = 1, enable_on_exec = 1, task = 1, precise_ip = 3, sample_id_all = 1, exclude_guest = 1, mmap2 = 1, comm_exec = 1, use_clockid = 1, clockid = 4
  ...
  # clockid frequency: 1000 MHz
  ...
  # ========

Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/23a4a1dc-b160-85a0-347d-40a2ed6d007b@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-record.c | 24 ++++++++++++++++++++++--
 tools/perf/perf.h           |  1 +
 tools/perf/util/env.h       |  1 +
 tools/perf/util/header.c    | 23 +++++++++++++++++++++++
 tools/perf/util/header.h    |  1 +
 5 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 0980dfe3396b1..10cf889c6d75d 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -592,6 +592,9 @@ static void record__init_features(struct record *rec)
 	if (!rec->opts.full_auxtrace)
 		perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
 
+	if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
+		perf_header__clear_feat(&session->header, HEADER_CLOCKID);
+
 	perf_header__clear_feat(&session->header, HEADER_STAT);
 }
 
@@ -897,6 +900,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 
 	record__init_features(rec);
 
+	if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
+		session->header.env.clockid_res_ns = rec->opts.clockid_res_ns;
+
 	if (forks) {
 		err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
 						    argv, data->is_pipe,
@@ -1337,6 +1343,19 @@ static const struct clockid_map clockids[] = {
 	CLOCKID_END,
 };
 
+static int get_clockid_res(clockid_t clk_id, u64 *res_ns)
+{
+	struct timespec res;
+
+	*res_ns = 0;
+	if (!clock_getres(clk_id, &res))
+		*res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC;
+	else
+		pr_warning("WARNING: Failed to determine specified clock resolution.\n");
+
+	return 0;
+}
+
 static int parse_clockid(const struct option *opt, const char *str, int unset)
 {
 	struct record_opts *opts = (struct record_opts *)opt->value;
@@ -1360,7 +1379,7 @@ static int parse_clockid(const struct option *opt, const char *str, int unset)
 
 	/* if its a number, we're done */
 	if (sscanf(str, "%d", &opts->clockid) == 1)
-		return 0;
+		return get_clockid_res(opts->clockid, &opts->clockid_res_ns);
 
 	/* allow a "CLOCK_" prefix to the name */
 	if (!strncasecmp(str, "CLOCK_", 6))
@@ -1369,7 +1388,8 @@ static int parse_clockid(const struct option *opt, const char *str, int unset)
 	for (cm = clockids; cm->name; cm++) {
 		if (!strcasecmp(str, cm->name)) {
 			opts->clockid = cm->clockid;
-			return 0;
+			return get_clockid_res(opts->clockid,
+					       &opts->clockid_res_ns);
 		}
 	}
 
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 21bf7f5a3cf51..0ed4a34c74c4b 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -81,6 +81,7 @@ struct record_opts {
 	unsigned     initial_delay;
 	bool         use_clockid;
 	clockid_t    clockid;
+	u64          clockid_res_ns;
 	unsigned int proc_map_timeout;
 };
 
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index 1f3ccc3685303..d01b8355f4cab 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -63,6 +63,7 @@ struct perf_env {
 	struct numa_node	*numa_nodes;
 	struct memory_node	*memory_nodes;
 	unsigned long long	 memory_bsize;
+	u64                     clockid_res_ns;
 };
 
 extern struct perf_env perf_env;
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 1ec1d9bc2d635..4fd45be95a433 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1034,6 +1034,13 @@ static int write_auxtrace(struct feat_fd *ff,
 	return err;
 }
 
+static int write_clockid(struct feat_fd *ff,
+			 struct perf_evlist *evlist __maybe_unused)
+{
+	return do_write(ff, &ff->ph->env.clockid_res_ns,
+			sizeof(ff->ph->env.clockid_res_ns));
+}
+
 static int cpu_cache_level__sort(const void *a, const void *b)
 {
 	struct cpu_cache_level *cache_a = (struct cpu_cache_level *)a;
@@ -1508,6 +1515,12 @@ static void print_cpu_topology(struct feat_fd *ff, FILE *fp)
 		fprintf(fp, "# Core ID and Socket ID information is not available\n");
 }
 
+static void print_clockid(struct feat_fd *ff, FILE *fp)
+{
+	fprintf(fp, "# clockid frequency: %"PRIu64" MHz\n",
+		ff->ph->env.clockid_res_ns * 1000);
+}
+
 static void free_event_desc(struct perf_evsel *events)
 {
 	struct perf_evsel *evsel;
@@ -2531,6 +2544,15 @@ static int process_mem_topology(struct feat_fd *ff,
 	return ret;
 }
 
+static int process_clockid(struct feat_fd *ff,
+			   void *data __maybe_unused)
+{
+	if (do_read_u64(ff, &ff->ph->env.clockid_res_ns))
+		return -1;
+
+	return 0;
+}
+
 struct feature_ops {
 	int (*write)(struct feat_fd *ff, struct perf_evlist *evlist);
 	void (*print)(struct feat_fd *ff, FILE *fp);
@@ -2590,6 +2612,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
 	FEAT_OPN(CACHE,		cache,		true),
 	FEAT_OPR(SAMPLE_TIME,	sample_time,	false),
 	FEAT_OPR(MEM_TOPOLOGY,	mem_topology,	true),
+	FEAT_OPR(CLOCKID,       clockid,        false)
 };
 
 struct header_print_data {
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index e17903caa71da..0d553ddca0a30 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -38,6 +38,7 @@ enum {
 	HEADER_CACHE,
 	HEADER_SAMPLE_TIME,
 	HEADER_MEM_TOPOLOGY,
+	HEADER_CLOCKID,
 	HEADER_LAST_FEATURE,
 	HEADER_FEAT_BITS	= 256,
 };
-- 
GitLab


From 0ab41886648bb75b951bd41d8b5cecaca8e0ad66 Mon Sep 17 00:00:00 2001
From: David Miller <davem@davemloft.net>
Date: Tue, 16 Oct 2018 20:55:55 -0700
Subject: [PATCH 0070/1890] perf annotate: Add Sparc support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

E.g.:

  $ perf annotate --stdio2
  Samples: 7K of event 'cycles:ppp', 4000 Hz, Event count (approx.): 3086733887
  __gettimeofday  /lib32/libc-2.27.so [Percent: local period]
  Percentâ”‚
         â”‚
         â”‚
         â”‚    Disassembly of section .text:
         â”‚
         â”‚    000a6fa0 <__gettimeofday@@GLIBC_2.0>:
    0.47 â”‚      save   %sp, -96, %sp
    0.73 â”‚      sethi  %hi(0xe9000), %l7
         â”‚    â†’ call   __frame_state_for@@GLIBC_2.0+0x480
    0.30 â”‚      add    %l7, 0x58, %l7     ! e9058 <nftw64@@GLIBC_2.3.3+0x818>
    1.33 â”‚      mov    %i0, %o0
         â”‚      mov    %i1, %o1
    0.43 â”‚      mov    0x74, %g1
         â”‚      ta     0x10
   88.92 â”‚    â†“ bcc    30
    2.95 â”‚      clr    %g1
         â”‚      neg    %o0
         â”‚      mov    1, %g1
    0.31 â”‚30:   cmp    %g1, 0
         â”‚      bne,pn %icc, a6fe4 <__gettimeofday@@GLIBC_2.0+0x44>
         â”‚      mov    %o0, %i0
    1.96 â”‚    â† return %i7 + 8
    2.62 â”‚      nop
         â”‚      sethi  %hi(0), %g1
         â”‚      neg    %o0, %g2
         â”‚      add    %g1, 0x160, %g1
         â”‚      ld     [ %l7 + %g1 ], %g1
         â”‚      st     %g2, [ %g7 + %g1 ]
         â”‚    â† return %i7 + 8
         â”‚      mov    -1, %o0

Signed-off-by: David S. Miller <davem@davemloft.net>
Link: http://lkml.kernel.org/r/20181016.205555.1070918198627611771.davem@davemloft.net
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/sparc/annotate/instructions.c | 169 ++++++++++++++++++
 tools/perf/util/annotate.c                    |   8 +
 2 files changed, 177 insertions(+)
 create mode 100644 tools/perf/arch/sparc/annotate/instructions.c

diff --git a/tools/perf/arch/sparc/annotate/instructions.c b/tools/perf/arch/sparc/annotate/instructions.c
new file mode 100644
index 0000000000000..2614c010c2352
--- /dev/null
+++ b/tools/perf/arch/sparc/annotate/instructions.c
@@ -0,0 +1,169 @@
+// SPDX-License-Identifier: GPL-2.0
+
+static int is_branch_cond(const char *cond)
+{
+	if (cond[0] == '\0')
+		return 1;
+
+	if (cond[0] == 'a' && cond[1] == '\0')
+		return 1;
+
+	if (cond[0] == 'c' &&
+	    (cond[1] == 'c' || cond[1] == 's') &&
+	    cond[2] == '\0')
+		return 1;
+
+	if (cond[0] == 'e' &&
+	    (cond[1] == '\0' ||
+	     (cond[1] == 'q' && cond[2] == '\0')))
+		return 1;
+
+	if (cond[0] == 'g' &&
+	    (cond[1] == '\0' ||
+	     (cond[1] == 't' && cond[2] == '\0') ||
+	     (cond[1] == 'e' && cond[2] == '\0') ||
+	     (cond[1] == 'e' && cond[2] == 'u' && cond[3] == '\0')))
+		return 1;
+
+	if (cond[0] == 'l' &&
+	    (cond[1] == '\0' ||
+	     (cond[1] == 't' && cond[2] == '\0') ||
+	     (cond[1] == 'u' && cond[2] == '\0') ||
+	     (cond[1] == 'e' && cond[2] == '\0') ||
+	     (cond[1] == 'e' && cond[2] == 'u' && cond[3] == '\0')))
+		return 1;
+
+	if (cond[0] == 'n' &&
+	    (cond[1] == '\0' ||
+	     (cond[1] == 'e' && cond[2] == '\0') ||
+	     (cond[1] == 'z' && cond[2] == '\0') ||
+	     (cond[1] == 'e' && cond[2] == 'g' && cond[3] == '\0')))
+		return 1;
+
+	if (cond[0] == 'b' &&
+	    cond[1] == 'p' &&
+	    cond[2] == 'o' &&
+	    cond[3] == 's' &&
+	    cond[4] == '\0')
+		return 1;
+
+	if (cond[0] == 'v' &&
+	    (cond[1] == 'c' || cond[1] == 's') &&
+	    cond[2] == '\0')
+		return 1;
+
+	if (cond[0] == 'b' &&
+	    cond[1] == 'z' &&
+	    cond[2] == '\0')
+		return 1;
+
+	return 0;
+}
+
+static int is_branch_reg_cond(const char *cond)
+{
+	if ((cond[0] == 'n' || cond[0] == 'l') &&
+	    cond[1] == 'z' &&
+	    cond[2] == '\0')
+		return 1;
+
+	if (cond[0] == 'z' &&
+	    cond[1] == '\0')
+		return 1;
+
+	if ((cond[0] == 'g' || cond[0] == 'l') &&
+	    cond[1] == 'e' &&
+	    cond[2] == 'z' &&
+	    cond[3] == '\0')
+		return 1;
+
+	if (cond[0] == 'g' &&
+	    cond[1] == 'z' &&
+	    cond[2] == '\0')
+		return 1;
+
+	return 0;
+}
+
+static int is_branch_float_cond(const char *cond)
+{
+	if (cond[0] == '\0')
+		return 1;
+
+	if ((cond[0] == 'a' || cond[0] == 'e' ||
+	     cond[0] == 'z' || cond[0] == 'g' ||
+	     cond[0] == 'l' || cond[0] == 'n' ||
+	     cond[0] == 'o' || cond[0] == 'u') &&
+	    cond[1] == '\0')
+		return 1;
+
+	if (((cond[0] == 'g' && cond[1] == 'e') ||
+	     (cond[0] == 'l' && (cond[1] == 'e' ||
+				 cond[1] == 'g')) ||
+	     (cond[0] == 'n' && (cond[1] == 'e' ||
+				 cond[1] == 'z')) ||
+	     (cond[0] == 'u' && (cond[1] == 'e' ||
+				 cond[1] == 'g' ||
+				 cond[1] == 'l'))) &&
+	    cond[2] == '\0')
+		return 1;
+
+	if (cond[0] == 'u' &&
+	    (cond[1] == 'g' || cond[1] == 'l') &&
+	    cond[2] == 'e' &&
+	    cond[3] == '\0')
+		return 1;
+
+	return 0;
+}
+
+static struct ins_ops *sparc__associate_instruction_ops(struct arch *arch, const char *name)
+{
+	struct ins_ops *ops = NULL;
+
+	if (!strcmp(name, "call") ||
+	    !strcmp(name, "jmp") ||
+	    !strcmp(name, "jmpl")) {
+		ops = &call_ops;
+	} else if (!strcmp(name, "ret") ||
+		   !strcmp(name, "retl") ||
+		   !strcmp(name, "return")) {
+		ops = &ret_ops;
+	} else if (!strcmp(name, "mov")) {
+		ops = &mov_ops;
+	} else {
+		if (name[0] == 'c' &&
+		    (name[1] == 'w' || name[1] == 'x'))
+			name += 2;
+
+		if (name[0] == 'b') {
+			const char *cond = name + 1;
+
+			if (cond[0] == 'r') {
+				if (is_branch_reg_cond(cond + 1))
+					ops = &jump_ops;
+			} else if (is_branch_cond(cond)) {
+				ops = &jump_ops;
+			}
+		} else if (name[0] == 'f' && name[1] == 'b') {
+			if (is_branch_float_cond(name + 2))
+				ops = &jump_ops;
+		}
+	}
+
+	if (ops)
+		arch__associate_ins_ops(arch, name, ops);
+
+	return ops;
+}
+
+static int sparc__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
+{
+	if (!arch->initialized) {
+		arch->initialized = true;
+		arch->associate_instruction_ops = sparc__associate_instruction_ops;
+		arch->objdump.comment_char = '#';
+	}
+
+	return 0;
+}
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 28cd6a17491b2..6936daf89dddc 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -139,6 +139,7 @@ static int arch__associate_ins_ops(struct arch* arch, const char *name, struct i
 #include "arch/x86/annotate/instructions.c"
 #include "arch/powerpc/annotate/instructions.c"
 #include "arch/s390/annotate/instructions.c"
+#include "arch/sparc/annotate/instructions.c"
 
 static struct arch architectures[] = {
 	{
@@ -170,6 +171,13 @@ static struct arch architectures[] = {
 			.comment_char = '#',
 		},
 	},
+	{
+		.name = "sparc",
+		.init = sparc__annotate_init,
+		.objdump = {
+			.comment_char = '#',
+		},
+	},
 };
 
 static void ins__delete(struct ins_operands *ops)
-- 
GitLab


From d87b9790b3deb7601872d34fd05af3da78147583 Mon Sep 17 00:00:00 2001
From: David Miller <davem@davemloft.net>
Date: Tue, 16 Oct 2018 21:15:45 -0700
Subject: [PATCH 0071/1890] perf jitdump: Add Sparc support.

Signed-off-by: David S. Miller <davem@davemloft.net>
Link: http://lkml.kernel.org/r/20181016.211545.1487970139012324624.davem@davemloft.net
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/sparc/Makefile | 2 ++
 tools/perf/util/genelf.h       | 6 ++++++
 2 files changed, 8 insertions(+)

diff --git a/tools/perf/arch/sparc/Makefile b/tools/perf/arch/sparc/Makefile
index 7fbca175099ec..275dea7ff59a0 100644
--- a/tools/perf/arch/sparc/Makefile
+++ b/tools/perf/arch/sparc/Makefile
@@ -1,3 +1,5 @@
 ifndef NO_DWARF
 PERF_HAVE_DWARF_REGS := 1
 endif
+
+PERF_HAVE_JITDUMP := 1
diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h
index de322d51c7fe2..b72440bf9a796 100644
--- a/tools/perf/util/genelf.h
+++ b/tools/perf/util/genelf.h
@@ -29,6 +29,12 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent
 #elif defined(__powerpc__)
 #define GEN_ELF_ARCH	EM_PPC
 #define GEN_ELF_CLASS	ELFCLASS32
+#elif defined(__sparc__) && defined(__arch64__)
+#define GEN_ELF_ARCH	EM_SPARCV9
+#define GEN_ELF_CLASS	ELFCLASS64
+#elif defined(__sparc__)
+#define GEN_ELF_ARCH	EM_SPARC
+#define GEN_ELF_CLASS	ELFCLASS32
 #else
 #error "unsupported architecture"
 #endif
-- 
GitLab


From d6afa561e1471ccfdaf7191230c0c59a37e45a5b Mon Sep 17 00:00:00 2001
From: David Miller <davem@davemloft.net>
Date: Wed, 17 Oct 2018 12:08:59 -0700
Subject: [PATCH 0072/1890] perf symbols: Set PLT entry/header sizes properly
 on Sparc

Using the sh_entsize for both values isn't correct.  It happens to be
correct on x86...

For both 32-bit and 64-bit sparc, there are four PLT entries in the PLT
section.

Signed-off-by: David S. Miller <davem@davemloft.net>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexis Berlemont <alexis.berlemont@gmail.com>
Cc: David Tolnay <dtolnay@gmail.com>
Cc: Hanjun Guo <guohanjun@huawei.com>
Cc: Hemant Kumar <hemant@linux.vnet.ibm.com>
Cc: Li Bin <huawei.libin@huawei.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Milian Wolff <milian.wolff@kdab.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Cc: zhangmengting@huawei.com
Fixes: b2f7605076d6 ("perf symbols: Fix plt entry calculation for ARM and AARCH64")
Link: http://lkml.kernel.org/r/20181017.120859.2268840244308635255.davem@davemloft.net
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/symbol-elf.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 29770ea61768b..6e70cc00c1618 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -324,7 +324,17 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss)
 			plt_entry_size = 16;
 			break;
 
-		default: /* FIXME: s390/alpha/mips/parisc/poperpc/sh/sparc/xtensa need to be checked */
+		case EM_SPARC:
+			plt_header_size = 48;
+			plt_entry_size = 12;
+			break;
+
+		case EM_SPARCV9:
+			plt_header_size = 128;
+			plt_entry_size = 32;
+			break;
+
+		default: /* FIXME: s390/alpha/mips/parisc/poperpc/sh/xtensa need to be checked */
 			plt_header_size = shdr_plt.sh_entsize;
 			plt_entry_size = shdr_plt.sh_entsize;
 			break;
-- 
GitLab


From eef3dc34a1e0b01d53328b88c25237bcc7323777 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Wed, 17 Oct 2018 17:54:00 -0700
Subject: [PATCH 0073/1890] ARM: OMAP2+: prm44xx: Fix section annotation on
 omap44xx_prm_enable_io_wakeup

When building the kernel with Clang, the following section mismatch
warning appears:

WARNING: vmlinux.o(.text+0x38b3c): Section mismatch in reference from
the function omap44xx_prm_late_init() to the function
.init.text:omap44xx_prm_enable_io_wakeup()
The function omap44xx_prm_late_init() references
the function __init omap44xx_prm_enable_io_wakeup().
This is often because omap44xx_prm_late_init lacks a __init
annotation or the annotation of omap44xx_prm_enable_io_wakeup is wrong.

Remove the __init annotation from omap44xx_prm_enable_io_wakeup so there
is no more mismatch.

Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/prm44xx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/mach-omap2/prm44xx.c b/arch/arm/mach-omap2/prm44xx.c
index 7b95729e83594..38a1be6c3694f 100644
--- a/arch/arm/mach-omap2/prm44xx.c
+++ b/arch/arm/mach-omap2/prm44xx.c
@@ -351,7 +351,7 @@ static void omap44xx_prm_reconfigure_io_chain(void)
  * to occur, WAKEUPENABLE bits must be set in the pad mux registers, and
  * omap44xx_prm_reconfigure_io_chain() must be called.  No return value.
  */
-static void __init omap44xx_prm_enable_io_wakeup(void)
+static void omap44xx_prm_enable_io_wakeup(void)
 {
 	s32 inst = omap4_prmst_get_prm_dev_inst();
 
-- 
GitLab


From fe0640eb30b7da261ae84d252ed9ed3c7e68dfd8 Mon Sep 17 00:00:00 2001
From: "ndesaulniers@google.com" <ndesaulniers@google.com>
Date: Mon, 15 Oct 2018 10:22:21 -0700
Subject: [PATCH 0074/1890] compiler.h: update definition of unreachable()

Fixes the objtool warning seen with Clang:
arch/x86/mm/fault.o: warning: objtool: no_context()+0x220: unreachable
instruction

Fixes commit 815f0ddb346c ("include/linux/compiler*.h: make compiler-*.h
mutually exclusive")

Josh noted that the fallback definition was meant to work around a
pre-gcc-4.6 bug. GCC still needs to work around
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82365, so compiler-gcc.h
defines its own version of unreachable().  Clang and ICC can use this
shared definition.

Link: https://github.com/ClangBuiltLinux/linux/issues/204
Suggested-by: Andy Lutomirski <luto@amacapital.net>
Suggested-by: Josh Poimboeuf <jpoimboe@redhat.com>
Tested-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 include/linux/compiler.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index b5fb034fa6fa1..2e0b6322588be 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -124,7 +124,10 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 # define ASM_UNREACHABLE
 #endif
 #ifndef unreachable
-# define unreachable() do { annotate_reachable(); do { } while (1); } while (0)
+# define unreachable() do {		\
+	annotate_unreachable();		\
+	__builtin_unreachable();	\
+} while (0)
 #endif
 
 /*
-- 
GitLab


From 1ff2fea5e30ca15752777441ecb64a169fe22e9e Mon Sep 17 00:00:00 2001
From: "ndesaulniers@google.com" <ndesaulniers@google.com>
Date: Mon, 15 Oct 2018 14:24:27 -0700
Subject: [PATCH 0075/1890] compiler-gcc: remove comment about gcc 4.5 from
 unreachable()

Remove the comment about being unable to detect __builtin_unreachable.
__builtin_unreachable was implemented in the GCC 4.5 timeframe. The
kernel's minimum supported version of GCC is 4.6 since commit
cafa0010cd51 ("Raise the minimum required gcc version to 4.6"). Commit
cb984d101b30 ("compiler-gcc: integrate the various compiler-gcc[345].h
files") shows that unreachable() had different guards based on GCC
version.

Suggested-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 include/linux/compiler-gcc.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index cfac027e16251..2010493e10408 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -96,10 +96,6 @@
  * Mark a position in code as unreachable.  This can be used to
  * suppress control flow warnings after asm blocks that transfer
  * control elsewhere.
- *
- * Early snapshots of gcc 4.5 don't support this and we can't detect
- * this in the preprocessor, but we can live with this because they're
- * unreleased.  Really, we need to have autoconf for the kernel.
  */
 #define unreachable() \
 	do {					\
-- 
GitLab


From e96550956fbcd090629c0e2b5b8cded2eded2adf Mon Sep 17 00:00:00 2001
From: Lyude Paul <lyude@redhat.com>
Date: Mon, 8 Oct 2018 19:24:30 -0400
Subject: [PATCH 0076/1890] drm/atomic_helper: Disallow new modesets on
 unregistered connectors

With the exception of modesets which would switch the DPMS state of a
connector from on to off, we want to make sure that we disallow all
modesets which would result in enabling a new monitor or a new mode
configuration on a monitor if the connector for the display in question
is no longer registered. This allows us to stop userspace from trying to
enable new displays on connectors for an MST topology that were just
removed from the system, without preventing userspace from disabling
DPMS on those connectors.

Changes since v5:
- Fix typo in comment, nothing else

Signed-off-by: Lyude Paul <lyude@redhat.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: stable@vger.kernel.org
Link: https://patchwork.freedesktop.org/patch/msgid/20181008232437.5571-2-lyude@redhat.com
(cherry picked from commit 4d80273976bf880c4bed9359b8f2d45663140c86)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/drm_atomic_helper.c | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index e49b22381048a..20bd176138a0b 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -308,6 +308,26 @@ update_connector_routing(struct drm_atomic_state *state,
 		return 0;
 	}
 
+	crtc_state = drm_atomic_get_new_crtc_state(state,
+						   new_connector_state->crtc);
+	/*
+	 * For compatibility with legacy users, we want to make sure that
+	 * we allow DPMS On->Off modesets on unregistered connectors. Modesets
+	 * which would result in anything else must be considered invalid, to
+	 * avoid turning on new displays on dead connectors.
+	 *
+	 * Since the connector can be unregistered at any point during an
+	 * atomic check or commit, this is racy. But that's OK: all we care
+	 * about is ensuring that userspace can't do anything but shut off the
+	 * display on a connector that was destroyed after its been notified,
+	 * not before.
+	 */
+	if (!READ_ONCE(connector->registered) && crtc_state->active) {
+		DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] is not registered\n",
+				 connector->base.id, connector->name);
+		return -EINVAL;
+	}
+
 	funcs = connector->helper_private;
 
 	if (funcs->atomic_best_encoder)
@@ -352,7 +372,6 @@ update_connector_routing(struct drm_atomic_state *state,
 
 	set_best_encoder(state, new_connector_state, new_encoder);
 
-	crtc_state = drm_atomic_get_new_crtc_state(state, new_connector_state->crtc);
 	crtc_state->connectors_changed = true;
 
 	DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] using [ENCODER:%d:%s] on [CRTC:%d:%s]\n",
-- 
GitLab


From 34ca26a98ad67edd6e4870fe2d4aa047d41a51dd Mon Sep 17 00:00:00 2001
From: Lyude Paul <lyude@redhat.com>
Date: Tue, 9 Oct 2018 16:44:24 -0400
Subject: [PATCH 0077/1890] drm/atomic_helper: Allow DPMS On<->Off changes for
 unregistered connectors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It appears when testing my previous fix for some of the legacy
modesetting issues with MST, I misattributed some kernel splats that
started appearing on my machine after a rebase as being from upstream.
But it appears they actually came from my patch series:

[    2.980512] [drm:drm_atomic_helper_check_modeset [drm_kms_helper]] Updating routing for [CONNECTOR:65:eDP-1]
[    2.980516] [drm:drm_atomic_helper_check_modeset [drm_kms_helper]] [CONNECTOR:65:eDP-1] is not registered
[    2.980516] ------------[ cut here ]------------
[    2.980519] Could not determine valid watermarks for inherited state
[    2.980553] WARNING: CPU: 3 PID: 551 at drivers/gpu/drm/i915/intel_display.c:14983 intel_modeset_init+0x14d7/0x19f0 [i915]
[    2.980556] Modules linked in: i915(O+) i2c_algo_bit drm_kms_helper(O) syscopyarea sysfillrect sysimgblt fb_sys_fops drm(O) intel_rapl x86_pkg_temp_thermal iTCO_wdt wmi_bmof coretemp crc32_pclmul psmouse i2c_i801 mei_me mei i2c_core lpc_ich mfd_core tpm_tis tpm_tis_core wmi tpm thinkpad_acpi pcc_cpufreq video ehci_pci crc32c_intel serio_raw ehci_hcd xhci_pci xhci_hcd
[    2.980577] CPU: 3 PID: 551 Comm: systemd-udevd Tainted: G           O      4.19.0-rc7Lyude-Test+ #1
[    2.980579] Hardware name: LENOVO 20BWS1KY00/20BWS1KY00, BIOS JBET63WW (1.27 ) 11/10/2016
[    2.980605] RIP: 0010:intel_modeset_init+0x14d7/0x19f0 [i915]
[    2.980607] Code: 89 df e8 ec 27 02 00 e9 24 f2 ff ff be 03 00 00 00 48 89 df e8 da 27 02 00 e9 26 f2 ff ff 48 c7 c7 c8 d1 34 a0 e8 23 cf dc e0 <0f> 0b e9 7c fd ff ff f6 c4 04 0f 85 37 f7 ff ff 48 8b 83 60 08 00
[    2.980611] RSP: 0018:ffffc90000287988 EFLAGS: 00010282
[    2.980614] RAX: 0000000000000000 RBX: ffff88031b488000 RCX: 0000000000000006
[    2.980617] RDX: 0000000000000007 RSI: 0000000000000086 RDI: ffff880321ad54d0
[    2.980620] RBP: ffffc90000287a10 R08: 000000000000040a R09: 0000000000000065
[    2.980623] R10: ffff88030ebb8f00 R11: ffffffff81416590 R12: ffff88031b488000
[    2.980626] R13: ffff88031b4883a0 R14: ffffc900002879a8 R15: ffff880319099800
[    2.980630] FS:  00007f475620d180(0000) GS:ffff880321ac0000(0000) knlGS:0000000000000000
[    2.980633] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    2.980636] CR2: 00007f9ef28018a0 CR3: 000000031b72c001 CR4: 00000000003606e0
[    2.980639] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[    2.980642] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[    2.980645] Call Trace:
[    2.980675]  i915_driver_load+0xb0e/0xdc0 [i915]
[    2.980681]  ? kernfs_add_one+0xe7/0x130
[    2.980709]  i915_pci_probe+0x46/0x60 [i915]
[    2.980715]  pci_device_probe+0xd4/0x150
[    2.980719]  really_probe+0x243/0x3b0
[    2.980722]  driver_probe_device+0xba/0x100
[    2.980726]  __driver_attach+0xe4/0x110
[    2.980729]  ? driver_probe_device+0x100/0x100
[    2.980733]  bus_for_each_dev+0x74/0xb0
[    2.980736]  driver_attach+0x1e/0x20
[    2.980739]  bus_add_driver+0x159/0x230
[    2.980743]  ? 0xffffffffa0393000
[    2.980746]  driver_register+0x70/0xc0
[    2.980749]  ? 0xffffffffa0393000
[    2.980753]  __pci_register_driver+0x57/0x60
[    2.980780]  i915_init+0x55/0x58 [i915]
[    2.980785]  do_one_initcall+0x4a/0x1c4
[    2.980789]  ? do_init_module+0x27/0x210
[    2.980793]  ? kmem_cache_alloc_trace+0x131/0x190
[    2.980797]  do_init_module+0x60/0x210
[    2.980800]  load_module+0x2063/0x22e0
[    2.980804]  ? vfs_read+0x116/0x140
[    2.980807]  ? vfs_read+0x116/0x140
[    2.980811]  __do_sys_finit_module+0xbd/0x120
[    2.980814]  ? __do_sys_finit_module+0xbd/0x120
[    2.980818]  __x64_sys_finit_module+0x1a/0x20
[    2.980821]  do_syscall_64+0x5a/0x110
[    2.980824]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[    2.980826] RIP: 0033:0x7f4754e32879
[    2.980828] Code: 00 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d f7 45 2c 00 f7 d8 64 89 01 48
[    2.980831] RSP: 002b:00007fff43fd97d8 EFLAGS: 00000246 ORIG_RAX: 0000000000000139
[    2.980834] RAX: ffffffffffffffda RBX: 0000559a44ca64f0 RCX: 00007f4754e32879
[    2.980836] RDX: 0000000000000000 RSI: 00007f475599f4cd RDI: 0000000000000018
[    2.980838] RBP: 00007f475599f4cd R08: 0000000000000000 R09: 0000000000000000
[    2.980839] R10: 0000000000000018 R11: 0000000000000246 R12: 0000000000000000
[    2.980841] R13: 0000559a44c92fd0 R14: 0000000000020000 R15: 0000000000000000
[    2.980881] WARNING: CPU: 3 PID: 551 at drivers/gpu/drm/i915/intel_display.c:14983 intel_modeset_init+0x14d7/0x19f0 [i915]
[    2.980884] ---[ end trace 5eb47a76277d4731 ]---

The cause of this appears to be due to the fact that if there's
pre-existing display state that was set by the BIOS when i915 loads, it
will attempt to perform a modeset before the driver is registered with
userspace. Since this happens before the driver's registered with
userspace, it's connectors are also unregistered and thus-states which
would turn on DPMS on a connector end up getting rejected since the
connector isn't registered.

These bugs managed to get past Intel's CI partially due to the fact it
never ran a full test on my patches for some reason, but also because
all of the tests unload the GPU once before running. Since this bug is
only really triggered when the drivers tries to perform a modeset before
it's been fully registered with userspace when coming from whatever
display configuration the firmware left us with, it likely would never
have been picked up by CI in the first place.

After some discussion with vsyrjala, we decided the best course of
action would be to just move the unregistered connector checks out of
update_connector_routing() and into drm_atomic_set_crtc_for_connector().
The reason for this being that legacy modesetting isn't going to be
expecting failures anywhere (at least this is the case with X), so
ideally we want to ensure that any DPMS changes will still work even on
unregistered connectors. Instead, we now only reject new modesets which
would change the current CRTC assigned to an unregistered connector
unless no new CRTC is being assigned to replace the connector's previous
one.

Signed-off-by: Lyude Paul <lyude@redhat.com>
Reported-by: Ville SyrjÃ¤lÃ¤ <ville.syrjala@linux.intel.com>
Fixes: 4d80273976bf ("drm/atomic_helper: Disallow new modesets on unregistered connectors")
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Ville SyrjÃ¤lÃ¤ <ville.syrjala@linux.intel.com>
Cc: stable@vger.kernel.org
Reviewed-by: Ville SyrjÃ¤lÃ¤ <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181009204424.21462-1-lyude@redhat.com
(cherry picked from commit b5d29843d8ef86d4cde4742e095b81b7fd41e688)
Fixes: e96550956fbc ("drm/atomic_helper: Disallow new modesets on unregistered connectors")
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/drm_atomic_helper.c | 21 +--------------------
 drivers/gpu/drm/drm_atomic_uapi.c   | 21 +++++++++++++++++++++
 2 files changed, 22 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index 20bd176138a0b..e49b22381048a 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -308,26 +308,6 @@ update_connector_routing(struct drm_atomic_state *state,
 		return 0;
 	}
 
-	crtc_state = drm_atomic_get_new_crtc_state(state,
-						   new_connector_state->crtc);
-	/*
-	 * For compatibility with legacy users, we want to make sure that
-	 * we allow DPMS On->Off modesets on unregistered connectors. Modesets
-	 * which would result in anything else must be considered invalid, to
-	 * avoid turning on new displays on dead connectors.
-	 *
-	 * Since the connector can be unregistered at any point during an
-	 * atomic check or commit, this is racy. But that's OK: all we care
-	 * about is ensuring that userspace can't do anything but shut off the
-	 * display on a connector that was destroyed after its been notified,
-	 * not before.
-	 */
-	if (!READ_ONCE(connector->registered) && crtc_state->active) {
-		DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] is not registered\n",
-				 connector->base.id, connector->name);
-		return -EINVAL;
-	}
-
 	funcs = connector->helper_private;
 
 	if (funcs->atomic_best_encoder)
@@ -372,6 +352,7 @@ update_connector_routing(struct drm_atomic_state *state,
 
 	set_best_encoder(state, new_connector_state, new_encoder);
 
+	crtc_state = drm_atomic_get_new_crtc_state(state, new_connector_state->crtc);
 	crtc_state->connectors_changed = true;
 
 	DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] using [ENCODER:%d:%s] on [CRTC:%d:%s]\n",
diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c
index d5b7f315098c2..a22d6f269b07f 100644
--- a/drivers/gpu/drm/drm_atomic_uapi.c
+++ b/drivers/gpu/drm/drm_atomic_uapi.c
@@ -299,6 +299,27 @@ drm_atomic_set_crtc_for_connector(struct drm_connector_state *conn_state,
 	struct drm_connector *connector = conn_state->connector;
 	struct drm_crtc_state *crtc_state;
 
+	/*
+	 * For compatibility with legacy users, we want to make sure that
+	 * we allow DPMS On<->Off modesets on unregistered connectors, since
+	 * legacy modesetting users will not be expecting these to fail. We do
+	 * not however, want to allow legacy users to assign a connector
+	 * that's been unregistered from sysfs to another CRTC, since doing
+	 * this with a now non-existent connector could potentially leave us
+	 * in an invalid state.
+	 *
+	 * Since the connector can be unregistered at any point during an
+	 * atomic check or commit, this is racy. But that's OK: all we care
+	 * about is ensuring that userspace can't use this connector for new
+	 * configurations after it's been notified that the connector is no
+	 * longer present.
+	 */
+	if (!READ_ONCE(connector->registered) && crtc) {
+		DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] is not registered\n",
+				 connector->base.id, connector->name);
+		return -EINVAL;
+	}
+
 	if (conn_state->crtc == crtc)
 		return 0;
 
-- 
GitLab


From de9f8eea5a44b0b756d3d6345af7f8e630a3c8c0 Mon Sep 17 00:00:00 2001
From: Lyude Paul <lyude@redhat.com>
Date: Tue, 16 Oct 2018 16:39:46 -0400
Subject: [PATCH 0078/1890] drm/atomic_helper: Stop modesets on unregistered
 connectors harder
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Unfortunately, it appears our fix in:
commit b5d29843d8ef ("drm/atomic_helper: Allow DPMS On<->Off changes
for unregistered connectors")

Which attempted to work around the problems introduced by:
commit 4d80273976bf ("drm/atomic_helper: Disallow new modesets on
unregistered connectors")

Is still not the right solution, as modesets can still be triggered
outside of drm_atomic_set_crtc_for_connector().

So in order to fix this, while still being careful that we don't break
modesets that a driver may perform before being registered with
userspace, we replace connector->registered with a tristate member,
connector->registration_state. This allows us to keep track of whether
or not a connector is still initializing and hasn't been exposed to
userspace, is currently registered and exposed to userspace, or has been
legitimately removed from the system after having once been present.

Using this info, we can prevent userspace from performing new modesets
on unregistered connectors while still allowing the driver to perform
modesets on unregistered connectors before the driver has finished being
registered.

Changes since v1:
- Fix WARN_ON() in drm_connector_cleanup() that CI caught with this
  patchset in igt@drv_module_reload@basic-reload-inject and
  igt@drv_module_reload@basic-reload by checking if the connector is
  registered instead of unregistered, as calling drm_connector_cleanup()
  on a connector that hasn't been registered with userspace yet should
  stay valid.
- Remove unregistered_connector_check(), and just go back to what we
  were doing before in commit 4d80273976bf ("drm/atomic_helper: Disallow
  new modesets on unregistered connectors") except replacing
  READ_ONCE(connector->registered) with drm_connector_is_unregistered().
  This gets rid of the behavior of allowing DPMS On<->Off, but that should
  be fine as it's more consistent with the UAPI we had before - danvet
- s/drm_connector_unregistered/drm_connector_is_unregistered/ - danvet
- Update documentation, fix some typos.

Fixes: b5d29843d8ef ("drm/atomic_helper: Allow DPMS On<->Off changes for unregistered connectors")
Cc: Ville SyrjÃ¤lÃ¤ <ville.syrjala@linux.intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: stable@vger.kernel.org
Cc: David Airlie <airlied@linux.ie>
Signed-off-by: Lyude Paul <lyude@redhat.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20181016203946.9601-1-lyude@redhat.com
(cherry picked from commit 39b50c603878f4f8ae541ac4088a805d588abc79)
Fixes: e96550956fbc ("drm/atomic_helper: Disallow new modesets on unregistered connectors")
Fixes: 34ca26a98ad6 ("drm/atomic_helper: Allow DPMS On<->Off changes for unregistered connectors")
Cc: stable@vger.kernel.org
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/drm_atomic_helper.c | 21 ++++++++-
 drivers/gpu/drm/drm_atomic_uapi.c   | 21 ---------
 drivers/gpu/drm/drm_connector.c     | 11 +++--
 drivers/gpu/drm/i915/intel_dp_mst.c |  8 ++--
 include/drm/drm_connector.h         | 71 ++++++++++++++++++++++++++++-
 5 files changed, 99 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index e49b22381048a..1cc3a045ec2f1 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -308,6 +308,26 @@ update_connector_routing(struct drm_atomic_state *state,
 		return 0;
 	}
 
+	crtc_state = drm_atomic_get_new_crtc_state(state,
+						   new_connector_state->crtc);
+	/*
+	 * For compatibility with legacy users, we want to make sure that
+	 * we allow DPMS On->Off modesets on unregistered connectors. Modesets
+	 * which would result in anything else must be considered invalid, to
+	 * avoid turning on new displays on dead connectors.
+	 *
+	 * Since the connector can be unregistered at any point during an
+	 * atomic check or commit, this is racy. But that's OK: all we care
+	 * about is ensuring that userspace can't do anything but shut off the
+	 * display on a connector that was destroyed after its been notified,
+	 * not before.
+	 */
+	if (drm_connector_is_unregistered(connector) && crtc_state->active) {
+		DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] is not registered\n",
+				 connector->base.id, connector->name);
+		return -EINVAL;
+	}
+
 	funcs = connector->helper_private;
 
 	if (funcs->atomic_best_encoder)
@@ -352,7 +372,6 @@ update_connector_routing(struct drm_atomic_state *state,
 
 	set_best_encoder(state, new_connector_state, new_encoder);
 
-	crtc_state = drm_atomic_get_new_crtc_state(state, new_connector_state->crtc);
 	crtc_state->connectors_changed = true;
 
 	DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] using [ENCODER:%d:%s] on [CRTC:%d:%s]\n",
diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c
index a22d6f269b07f..d5b7f315098c2 100644
--- a/drivers/gpu/drm/drm_atomic_uapi.c
+++ b/drivers/gpu/drm/drm_atomic_uapi.c
@@ -299,27 +299,6 @@ drm_atomic_set_crtc_for_connector(struct drm_connector_state *conn_state,
 	struct drm_connector *connector = conn_state->connector;
 	struct drm_crtc_state *crtc_state;
 
-	/*
-	 * For compatibility with legacy users, we want to make sure that
-	 * we allow DPMS On<->Off modesets on unregistered connectors, since
-	 * legacy modesetting users will not be expecting these to fail. We do
-	 * not however, want to allow legacy users to assign a connector
-	 * that's been unregistered from sysfs to another CRTC, since doing
-	 * this with a now non-existent connector could potentially leave us
-	 * in an invalid state.
-	 *
-	 * Since the connector can be unregistered at any point during an
-	 * atomic check or commit, this is racy. But that's OK: all we care
-	 * about is ensuring that userspace can't use this connector for new
-	 * configurations after it's been notified that the connector is no
-	 * longer present.
-	 */
-	if (!READ_ONCE(connector->registered) && crtc) {
-		DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] is not registered\n",
-				 connector->base.id, connector->name);
-		return -EINVAL;
-	}
-
 	if (conn_state->crtc == crtc)
 		return 0;
 
diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c
index 1e40e5decbe91..4943cef178beb 100644
--- a/drivers/gpu/drm/drm_connector.c
+++ b/drivers/gpu/drm/drm_connector.c
@@ -379,7 +379,8 @@ void drm_connector_cleanup(struct drm_connector *connector)
 	/* The connector should have been removed from userspace long before
 	 * it is finally destroyed.
 	 */
-	if (WARN_ON(connector->registered))
+	if (WARN_ON(connector->registration_state ==
+		    DRM_CONNECTOR_REGISTERED))
 		drm_connector_unregister(connector);
 
 	if (connector->tile_group) {
@@ -436,7 +437,7 @@ int drm_connector_register(struct drm_connector *connector)
 		return 0;
 
 	mutex_lock(&connector->mutex);
-	if (connector->registered)
+	if (connector->registration_state != DRM_CONNECTOR_INITIALIZING)
 		goto unlock;
 
 	ret = drm_sysfs_connector_add(connector);
@@ -456,7 +457,7 @@ int drm_connector_register(struct drm_connector *connector)
 
 	drm_mode_object_register(connector->dev, &connector->base);
 
-	connector->registered = true;
+	connector->registration_state = DRM_CONNECTOR_REGISTERED;
 	goto unlock;
 
 err_debugfs:
@@ -478,7 +479,7 @@ EXPORT_SYMBOL(drm_connector_register);
 void drm_connector_unregister(struct drm_connector *connector)
 {
 	mutex_lock(&connector->mutex);
-	if (!connector->registered) {
+	if (connector->registration_state != DRM_CONNECTOR_REGISTERED) {
 		mutex_unlock(&connector->mutex);
 		return;
 	}
@@ -489,7 +490,7 @@ void drm_connector_unregister(struct drm_connector *connector)
 	drm_sysfs_connector_remove(connector);
 	drm_debugfs_connector_remove(connector);
 
-	connector->registered = false;
+	connector->registration_state = DRM_CONNECTOR_UNREGISTERED;
 	mutex_unlock(&connector->mutex);
 }
 EXPORT_SYMBOL(drm_connector_unregister);
diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c
index 7f155b4f1a7d7..1b00f8ea145ba 100644
--- a/drivers/gpu/drm/i915/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/intel_dp_mst.c
@@ -77,7 +77,7 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder,
 	pipe_config->pbn = mst_pbn;
 
 	/* Zombie connectors can't have VCPI slots */
-	if (READ_ONCE(connector->registered)) {
+	if (!drm_connector_is_unregistered(connector)) {
 		slots = drm_dp_atomic_find_vcpi_slots(state,
 						      &intel_dp->mst_mgr,
 						      port,
@@ -313,7 +313,7 @@ static int intel_dp_mst_get_ddc_modes(struct drm_connector *connector)
 	struct edid *edid;
 	int ret;
 
-	if (!READ_ONCE(connector->registered))
+	if (drm_connector_is_unregistered(connector))
 		return intel_connector_update_modes(connector, NULL);
 
 	edid = drm_dp_mst_get_edid(connector, &intel_dp->mst_mgr, intel_connector->port);
@@ -329,7 +329,7 @@ intel_dp_mst_detect(struct drm_connector *connector, bool force)
 	struct intel_connector *intel_connector = to_intel_connector(connector);
 	struct intel_dp *intel_dp = intel_connector->mst_port;
 
-	if (!READ_ONCE(connector->registered))
+	if (drm_connector_is_unregistered(connector))
 		return connector_status_disconnected;
 	return drm_dp_mst_detect_port(connector, &intel_dp->mst_mgr,
 				      intel_connector->port);
@@ -372,7 +372,7 @@ intel_dp_mst_mode_valid(struct drm_connector *connector,
 	int bpp = 24; /* MST uses fixed bpp */
 	int max_rate, mode_rate, max_lanes, max_link_clock;
 
-	if (!READ_ONCE(connector->registered))
+	if (drm_connector_is_unregistered(connector))
 		return MODE_ERROR;
 
 	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h
index 91a877fa00cb5..9ccad6b062f2b 100644
--- a/include/drm/drm_connector.h
+++ b/include/drm/drm_connector.h
@@ -82,6 +82,53 @@ enum drm_connector_status {
 	connector_status_unknown = 3,
 };
 
+/**
+ * enum drm_connector_registration_status - userspace registration status for
+ * a &drm_connector
+ *
+ * This enum is used to track the status of initializing a connector and
+ * registering it with userspace, so that DRM can prevent bogus modesets on
+ * connectors that no longer exist.
+ */
+enum drm_connector_registration_state {
+	/**
+	 * @DRM_CONNECTOR_INITIALIZING: The connector has just been created,
+	 * but has yet to be exposed to userspace. There should be no
+	 * additional restrictions to how the state of this connector may be
+	 * modified.
+	 */
+	DRM_CONNECTOR_INITIALIZING = 0,
+
+	/**
+	 * @DRM_CONNECTOR_REGISTERED: The connector has been fully initialized
+	 * and registered with sysfs, as such it has been exposed to
+	 * userspace. There should be no additional restrictions to how the
+	 * state of this connector may be modified.
+	 */
+	DRM_CONNECTOR_REGISTERED = 1,
+
+	/**
+	 * @DRM_CONNECTOR_UNREGISTERED: The connector has either been exposed
+	 * to userspace and has since been unregistered and removed from
+	 * userspace, or the connector was unregistered before it had a chance
+	 * to be exposed to userspace (e.g. still in the
+	 * @DRM_CONNECTOR_INITIALIZING state). When a connector is
+	 * unregistered, there are additional restrictions to how its state
+	 * may be modified:
+	 *
+	 * - An unregistered connector may only have its DPMS changed from
+	 *   On->Off. Once DPMS is changed to Off, it may not be switched back
+	 *   to On.
+	 * - Modesets are not allowed on unregistered connectors, unless they
+	 *   would result in disabling its assigned CRTCs. This means
+	 *   disabling a CRTC on an unregistered connector is OK, but enabling
+	 *   one is not.
+	 * - Removing a CRTC from an unregistered connector is OK, but new
+	 *   CRTCs may never be assigned to an unregistered connector.
+	 */
+	DRM_CONNECTOR_UNREGISTERED = 2,
+};
+
 enum subpixel_order {
 	SubPixelUnknown = 0,
 	SubPixelHorizontalRGB,
@@ -853,10 +900,12 @@ struct drm_connector {
 	bool ycbcr_420_allowed;
 
 	/**
-	 * @registered: Is this connector exposed (registered) with userspace?
+	 * @registration_state: Is this connector initializing, exposed
+	 * (registered) with userspace, or unregistered?
+	 *
 	 * Protected by @mutex.
 	 */
-	bool registered;
+	enum drm_connector_registration_state registration_state;
 
 	/**
 	 * @modes:
@@ -1166,6 +1215,24 @@ static inline void drm_connector_unreference(struct drm_connector *connector)
 	drm_connector_put(connector);
 }
 
+/**
+ * drm_connector_is_unregistered - has the connector been unregistered from
+ * userspace?
+ * @connector: DRM connector
+ *
+ * Checks whether or not @connector has been unregistered from userspace.
+ *
+ * Returns:
+ * True if the connector was unregistered, false if the connector is
+ * registered or has not yet been registered with userspace.
+ */
+static inline bool
+drm_connector_is_unregistered(struct drm_connector *connector)
+{
+	return READ_ONCE(connector->registration_state) ==
+		DRM_CONNECTOR_UNREGISTERED;
+}
+
 const char *drm_get_connector_status_name(enum drm_connector_status status);
 const char *drm_get_subpixel_order_name(enum subpixel_order order);
 const char *drm_get_dpms_name(int val);
-- 
GitLab


From 7b0f61e91b6056c71649efa3204112a4b6cf5fc8 Mon Sep 17 00:00:00 2001
From: Lyude Paul <lyude@redhat.com>
Date: Mon, 8 Oct 2018 19:24:31 -0400
Subject: [PATCH 0079/1890] drm/nouveau: Fix nv50_mstc->best_encoder()

As mentioned in the previous commit, we currently prevent new modesets
on recently-removed MST connectors by returning no encoder from our
->best_encoder() callback once the MST port has disappeared. This is
wrong however, because it prevents legacy modesetting users from being
able to disable CRTCs on MST connectors after the connector's respective
topology has disappeared.

So, fix this by instead by just always returning a valid encoder.

Changes since v2:
- Remove usage of atomic MST helper for now, since that got replaced
  with a much simpler solution

Signed-off-by: Lyude Paul <lyude@redhat.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Ben Skeggs <bskeggs@redhat.com>
Cc: stable@vger.kernel.org
Link: https://patchwork.freedesktop.org/patch/msgid/20181008232437.5571-3-lyude@redhat.com
(cherry picked from commit e87b0bbc9f0380d403f8f2f6abba0d51c74d944f)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/nouveau/dispnv50/disp.c | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c
index 5f163a025e890..9d9a18ab95ece 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c
@@ -881,22 +881,16 @@ nv50_mstc_atomic_best_encoder(struct drm_connector *connector,
 {
 	struct nv50_head *head = nv50_head(connector_state->crtc);
 	struct nv50_mstc *mstc = nv50_mstc(connector);
-	if (mstc->port) {
-		struct nv50_mstm *mstm = mstc->mstm;
-		return &mstm->msto[head->base.index]->encoder;
-	}
-	return NULL;
+
+	return &mstc->mstm->msto[head->base.index]->encoder;
 }
 
 static struct drm_encoder *
 nv50_mstc_best_encoder(struct drm_connector *connector)
 {
 	struct nv50_mstc *mstc = nv50_mstc(connector);
-	if (mstc->port) {
-		struct nv50_mstm *mstm = mstc->mstm;
-		return &mstm->msto[0]->encoder;
-	}
-	return NULL;
+
+	return &mstc->mstm->msto[0]->encoder;
 }
 
 static enum drm_mode_status
-- 
GitLab


From 389373d3306553896a9e218493e5b6175c844eb0 Mon Sep 17 00:00:00 2001
From: Hongxu Jia <hongxu.jia@windriver.com>
Date: Thu, 18 Oct 2018 16:26:13 +0800
Subject: [PATCH 0080/1890] perf arm64: Fix generate system call table failed
 with /tmp mounted with noexec
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When /tmp is mounted with noexec, mksyscalltbl fails.

  [snip]
  |perf-1.0/tools/perf/arch/arm64/entry/syscalls//mksyscalltbl:
  /tmp/create-table-6VGPSt: Permission denied
  [snip]

Add variable TMPDIR as prefix dir of the temporary file, if it is set,
replace default /tmp.

Signed-off-by: Hongxu Jia <hongxu.jia@windriver.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kim Phillips <kim.phillips@arm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
Cc: SÃ©bastien Boisvert <sboisvert@gydle.com>
Cc: Thomas Richter <tmricht@linux.vnet.ibm.com>
Fixes: 2b5882435606 ("perf arm64: Generate system call table from asm/unistd.h")
LPU-Reference: 1539851173-14959-1-git-send-email-hongxu.jia@windriver.com
Link: https://lkml.kernel.org/n/tip-1qrgq840ci0c5cy4oww957ge@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/arm64/entry/syscalls/mksyscalltbl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl b/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl
index 2dbb8cade048f..c88fd32563ebc 100755
--- a/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl
+++ b/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl
@@ -23,7 +23,7 @@ create_table_from_c()
 {
 	local sc nr last_sc
 
-	create_table_exe=`mktemp /tmp/create-table-XXXXXX`
+	create_table_exe=`mktemp ${TMPDIR:-/tmp}/create-table-XXXXXX`
 
 	{
 
-- 
GitLab


From 4ba8b3ebf4f8f583c2c01da20e4d110a5881ffdd Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 18 Oct 2018 15:24:07 -0300
Subject: [PATCH 0081/1890] tools lib subcmd: Introduce OPTION_ULONG

For completeness, will be used in 'perf trace --max-events'.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Kim Phillips <kim.phillips@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-glaj3pwespxfj2fdjs9a20b6@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/subcmd/parse-options.c | 19 +++++++++++++++++++
 tools/lib/subcmd/parse-options.h |  2 ++
 2 files changed, 21 insertions(+)

diff --git a/tools/lib/subcmd/parse-options.c b/tools/lib/subcmd/parse-options.c
index cb7154eccbdc1..dbb9efbf718a0 100644
--- a/tools/lib/subcmd/parse-options.c
+++ b/tools/lib/subcmd/parse-options.c
@@ -116,6 +116,7 @@ static int get_value(struct parse_opt_ctx_t *p,
 		case OPTION_INTEGER:
 		case OPTION_UINTEGER:
 		case OPTION_LONG:
+		case OPTION_ULONG:
 		case OPTION_U64:
 		default:
 			break;
@@ -166,6 +167,7 @@ static int get_value(struct parse_opt_ctx_t *p,
 		case OPTION_INTEGER:
 		case OPTION_UINTEGER:
 		case OPTION_LONG:
+		case OPTION_ULONG:
 		case OPTION_U64:
 		default:
 			break;
@@ -295,6 +297,22 @@ static int get_value(struct parse_opt_ctx_t *p,
 			return opterror(opt, "expects a numerical value", flags);
 		return 0;
 
+	case OPTION_ULONG:
+		if (unset) {
+			*(unsigned long *)opt->value = 0;
+			return 0;
+		}
+		if (opt->flags & PARSE_OPT_OPTARG && !p->opt) {
+			*(unsigned long *)opt->value = opt->defval;
+			return 0;
+		}
+		if (get_arg(p, opt, flags, &arg))
+			return -1;
+		*(unsigned long *)opt->value = strtoul(arg, (char **)&s, 10);
+		if (*s)
+			return opterror(opt, "expects a numerical value", flags);
+		return 0;
+
 	case OPTION_U64:
 		if (unset) {
 			*(u64 *)opt->value = 0;
@@ -703,6 +721,7 @@ static void print_option_help(const struct option *opts, int full)
 	case OPTION_ARGUMENT:
 		break;
 	case OPTION_LONG:
+	case OPTION_ULONG:
 	case OPTION_U64:
 	case OPTION_INTEGER:
 	case OPTION_UINTEGER:
diff --git a/tools/lib/subcmd/parse-options.h b/tools/lib/subcmd/parse-options.h
index 92fdbe1519f6d..6ca2a8bfe716b 100644
--- a/tools/lib/subcmd/parse-options.h
+++ b/tools/lib/subcmd/parse-options.h
@@ -25,6 +25,7 @@ enum parse_opt_type {
 	OPTION_STRING,
 	OPTION_INTEGER,
 	OPTION_LONG,
+	OPTION_ULONG,
 	OPTION_CALLBACK,
 	OPTION_U64,
 	OPTION_UINTEGER,
@@ -133,6 +134,7 @@ struct option {
 #define OPT_INTEGER(s, l, v, h)     { .type = OPTION_INTEGER, .short_name = (s), .long_name = (l), .value = check_vtype(v, int *), .help = (h) }
 #define OPT_UINTEGER(s, l, v, h)    { .type = OPTION_UINTEGER, .short_name = (s), .long_name = (l), .value = check_vtype(v, unsigned int *), .help = (h) }
 #define OPT_LONG(s, l, v, h)        { .type = OPTION_LONG, .short_name = (s), .long_name = (l), .value = check_vtype(v, long *), .help = (h) }
+#define OPT_ULONG(s, l, v, h)        { .type = OPTION_ULONG, .short_name = (s), .long_name = (l), .value = check_vtype(v, unsigned long *), .help = (h) }
 #define OPT_U64(s, l, v, h)         { .type = OPTION_U64, .short_name = (s), .long_name = (l), .value = check_vtype(v, u64 *), .help = (h) }
 #define OPT_STRING(s, l, v, a, h)   { .type = OPTION_STRING,  .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), .argh = (a), .help = (h) }
 #define OPT_STRING_OPTARG(s, l, v, a, h, d) \
-- 
GitLab


From 5067a8cdd4ce3588fca2e0ee554f0f081650de8f Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 18 Oct 2018 16:38:27 -0300
Subject: [PATCH 0082/1890] perf trace: Introduce --max-events
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Allow stopping tracing after a number of events take place, considering
strace-like syscalls formatting as one event per enter/exit pair or when
in a multi-process tracing session a syscall is interrupted and printed
ending with '...'.

Examples included in the documentation:

Trace the first 4 open, openat or open_by_handle_at syscalls (in the future more syscalls may match here):

  $ perf trace -e open* --max-events 4
  [root@jouet perf]# trace -e open* --max-events 4
  2272.992 ( 0.037 ms): gnome-shell/1370 openat(dfd: CWD, filename: /proc/self/stat) = 31
  2277.481 ( 0.139 ms): gnome-shell/3039 openat(dfd: CWD, filename: /proc/self/stat) = 65
  3026.398 ( 0.076 ms): gnome-shell/3039 openat(dfd: CWD, filename: /proc/self/stat) = 65
  4294.665 ( 0.015 ms): sed/15879 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) = 3
  $

Trace the first minor page fault when running a workload:

  # perf trace -F min --max-stack=7 --max-events 1 sleep 1
     0.000 ( 0.000 ms): sleep/18006 minfault [__clear_user+0x1a] => 0x5626efa56080 (?k)
                                       __clear_user ([kernel.kallsyms])
                                       load_elf_binary ([kernel.kallsyms])
                                       search_binary_handler ([kernel.kallsyms])
                                       __do_execve_file.isra.33 ([kernel.kallsyms])
                                       __x64_sys_execve ([kernel.kallsyms])
                                       do_syscall_64 ([kernel.kallsyms])
                                       entry_SYSCALL_64 ([kernel.kallsyms])
  #

Trace the next min page page fault to take place on the first CPU:

  # perf trace -F min --call-graph=dwarf --max-events 1 --cpu 0
     0.000 ( 0.000 ms): Web Content/17136 minfault [js::gc::Chunk::fetchNextDecommittedArena+0x4b] => 0x7fbe6181b000 (?.)
                                       js::gc::FreeSpan::initAsEmpty (inlined)
                                       js::gc::Arena::setAsNotAllocated (inlined)
                                       js::gc::Chunk::fetchNextDecommittedArena (/usr/lib64/firefox/libxul.so)
                                       js::gc::Chunk::allocateArena (/usr/lib64/firefox/libxul.so)
                                       js::gc::GCRuntime::allocateArena (/usr/lib64/firefox/libxul.so)
                                       js::gc::ArenaLists::allocateFromArena (/usr/lib64/firefox/libxul.so)
                                       js::gc::GCRuntime::tryNewTenuredThing<JSString, (js::AllowGC)1> (inlined)
                                       js::AllocateString<JSString, (js::AllowGC)1> (/usr/lib64/firefox/libxul.so)
                                       js::Allocate<JSThinInlineString, (js::AllowGC)1> (inlined)
                                       JSThinInlineString::new_<(js::AllowGC)1> (inlined)
                                       AllocateInlineString<(js::AllowGC)1, unsigned char> (inlined)
                                       js::ConcatStrings<(js::AllowGC)1> (/usr/lib64/firefox/libxul.so)
                                       [0x18b26e6bc2bd] (/tmp/perf-17136.map)

Tracing the next four ext4 operations on a specific CPU:

  # perf trace -e ext4:*/call-graph=fp/ --max-events 4 --cpu 3
     0.000 mutt/3849 ext4:ext4_es_lookup_extent_enter:dev 253,2 ino 57277 lblk 0
                                       ext4_es_lookup_extent ([kernel.kallsyms])
                                       read (/usr/lib64/libc-2.26.so)
     0.097 mutt/3849 ext4:ext4_es_lookup_extent_exit:dev 253,2 ino 57277 found 0 [0/0) 0
                                       ext4_es_lookup_extent ([kernel.kallsyms])
                                       read (/usr/lib64/libc-2.26.so)
     0.141 mutt/3849 ext4:ext4_ext_map_blocks_enter:dev 253,2 ino 57277 lblk 0 len 1 flags
                                       ext4_ext_map_blocks ([kernel.kallsyms])
                                       read (/usr/lib64/libc-2.26.so)
     0.184 mutt/3849 ext4:ext4_ext_load_extent:dev 253,2 ino 57277 lblk 1516511 pblk 18446744071750013657
                                       __read_extent_tree_block ([kernel.kallsyms])
                                       __read_extent_tree_block ([kernel.kallsyms])
                                       ext4_find_extent ([kernel.kallsyms])
                                       ext4_ext_map_blocks ([kernel.kallsyms])
                                       ext4_map_blocks ([kernel.kallsyms])
                                       ext4_mpage_readpages ([kernel.kallsyms])
                                       read_pages ([kernel.kallsyms])
                                       __do_page_cache_readahead ([kernel.kallsyms])
                                       ondemand_readahead ([kernel.kallsyms])
                                       generic_file_read_iter ([kernel.kallsyms])
                                       __vfs_read ([kernel.kallsyms])
                                       vfs_read ([kernel.kallsyms])
                                       ksys_read ([kernel.kallsyms])
                                       do_syscall_64 ([kernel.kallsyms])
                                       entry_SYSCALL_64 ([kernel.kallsyms])
                                       read (/usr/lib64/libc-2.26.so)
  #

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Milian Wolff <milian.wolff@kdab.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: RudÃ¡ Moura <ruda.moura@gmail.com>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-sweh107bs7ol5bzls0m4tqdz@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-trace.txt | 47 +++++++++++++++++++++++++
 tools/perf/builtin-trace.c              | 21 +++++++++++
 2 files changed, 68 insertions(+)

diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index 115db9e06ecd8..0d1a1cd4d3281 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -171,6 +171,11 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
 --kernel-syscall-graph::
 	 Show the kernel callchains on the syscall exit path.
 
+--max-events=N::
+	Stop after processing N events. Note that strace-like events are considered
+	only at exit time or when a syscall is interrupted, i.e. in those cases this
+	option is equivalent to the number of lines printed.
+
 --max-stack::
         Set the stack depth limit when parsing the callchain, anything
         beyond the specified depth will be ignored. Note that at this point
@@ -238,6 +243,48 @@ Trace syscalls, major and minor pagefaults:
   As you can see, there was major pagefault in python process, from
   CRYPTO_push_info_ routine which faulted somewhere in libcrypto.so.
 
+Trace the first 4 open, openat or open_by_handle_at syscalls (in the future more syscalls may match here):
+
+  $ perf trace -e open* --max-events 4
+  [root@jouet perf]# trace -e open* --max-events 4
+  2272.992 ( 0.037 ms): gnome-shell/1370 openat(dfd: CWD, filename: /proc/self/stat) = 31
+  2277.481 ( 0.139 ms): gnome-shell/3039 openat(dfd: CWD, filename: /proc/self/stat) = 65
+  3026.398 ( 0.076 ms): gnome-shell/3039 openat(dfd: CWD, filename: /proc/self/stat) = 65
+  4294.665 ( 0.015 ms): sed/15879 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) = 3
+  $
+
+Trace the first minor page fault when running a workload:
+
+  # perf trace -F min --max-stack=7 --max-events 1 sleep 1
+     0.000 ( 0.000 ms): sleep/18006 minfault [__clear_user+0x1a] => 0x5626efa56080 (?k)
+                                       __clear_user ([kernel.kallsyms])
+                                       load_elf_binary ([kernel.kallsyms])
+                                       search_binary_handler ([kernel.kallsyms])
+                                       __do_execve_file.isra.33 ([kernel.kallsyms])
+                                       __x64_sys_execve ([kernel.kallsyms])
+                                       do_syscall_64 ([kernel.kallsyms])
+                                       entry_SYSCALL_64 ([kernel.kallsyms])
+  #
+
+Trace the next min page page fault to take place on the first CPU:
+
+  # perf trace -F min --call-graph=dwarf --max-events 1 --cpu 0
+     0.000 ( 0.000 ms): Web Content/17136 minfault [js::gc::Chunk::fetchNextDecommittedArena+0x4b] => 0x7fbe6181b000 (?.)
+                                       js::gc::FreeSpan::initAsEmpty (inlined)
+                                       js::gc::Arena::setAsNotAllocated (inlined)
+                                       js::gc::Chunk::fetchNextDecommittedArena (/usr/lib64/firefox/libxul.so)
+                                       js::gc::Chunk::allocateArena (/usr/lib64/firefox/libxul.so)
+                                       js::gc::GCRuntime::allocateArena (/usr/lib64/firefox/libxul.so)
+                                       js::gc::ArenaLists::allocateFromArena (/usr/lib64/firefox/libxul.so)
+                                       js::gc::GCRuntime::tryNewTenuredThing<JSString, (js::AllowGC)1> (inlined)
+                                       js::AllocateString<JSString, (js::AllowGC)1> (/usr/lib64/firefox/libxul.so)
+                                       js::Allocate<JSThinInlineString, (js::AllowGC)1> (inlined)
+                                       JSThinInlineString::new_<(js::AllowGC)1> (inlined)
+                                       AllocateInlineString<(js::AllowGC)1, unsigned char> (inlined)
+                                       js::ConcatStrings<(js::AllowGC)1> (/usr/lib64/firefox/libxul.so)
+                                       [0x18b26e6bc2bd] (/tmp/perf-17136.map)
+  #
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-script[1]
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 90289f31dd87c..74638034861ca 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -89,6 +89,8 @@ struct trace {
 	u64			base_time;
 	FILE			*output;
 	unsigned long		nr_events;
+	unsigned long		nr_events_printed;
+	unsigned long		max_events;
 	struct strlist		*ev_qualifier;
 	struct {
 		size_t		nr;
@@ -1664,6 +1666,8 @@ static int trace__printf_interrupted_entry(struct trace *trace)
 	printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
 	ttrace->entry_pending = false;
 
+	++trace->nr_events_printed;
+
 	return printed;
 }
 
@@ -1940,6 +1944,13 @@ errno_print: {
 
 	fputc('\n', trace->output);
 
+	/*
+	 * We only consider an 'event' for the sake of --max-events a non-filtered
+	 * sys_enter + sys_exit and other tracepoint events.
+	 */
+	if (++trace->nr_events_printed == trace->max_events && trace->max_events != ULONG_MAX)
+		interrupted = true;
+
 	if (callchain_ret > 0)
 		trace__fprintf_callchain(trace, sample);
 	else if (callchain_ret < 0)
@@ -2072,6 +2083,7 @@ static void bpf_output__fprintf(struct trace *trace,
 {
 	binary__fprintf(sample->raw_data, sample->raw_size, 8,
 			bpf_output__printer, NULL, trace->output);
+	++trace->nr_events_printed;
 }
 
 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
@@ -2127,6 +2139,7 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
 			event_format__fprintf(evsel->tp_format, sample->cpu,
 					      sample->raw_data, sample->raw_size,
 					      trace->output);
+			++trace->nr_events_printed;
 		}
 	}
 
@@ -2225,6 +2238,8 @@ static int trace__pgfault(struct trace *trace,
 		trace__fprintf_callchain(trace, sample);
 	else if (callchain_ret < 0)
 		pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
+
+	++trace->nr_events_printed;
 out:
 	err = 0;
 out_put:
@@ -2402,6 +2417,9 @@ static void trace__handle_event(struct trace *trace, union perf_event *event, st
 		tracepoint_handler handler = evsel->handler;
 		handler(trace, evsel, event, sample);
 	}
+
+	if (trace->nr_events_printed >= trace->max_events && trace->max_events != ULONG_MAX)
+		interrupted = true;
 }
 
 static int trace__add_syscall_newtp(struct trace *trace)
@@ -3249,6 +3267,7 @@ int cmd_trace(int argc, const char **argv)
 		.trace_syscalls = false,
 		.kernel_syscallchains = false,
 		.max_stack = UINT_MAX,
+		.max_events = ULONG_MAX,
 	};
 	const char *output_name = NULL;
 	const struct option trace_options[] = {
@@ -3301,6 +3320,8 @@ int cmd_trace(int argc, const char **argv)
 		     &record_parse_callchain_opt),
 	OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
 		    "Show the kernel callchains on the syscall exit path"),
+	OPT_ULONG(0, "max-events", &trace.max_events,
+		"Set the maximum number of events to print, exit after that is reached. "),
 	OPT_UINTEGER(0, "min-stack", &trace.min_stack,
 		     "Set the minimum stack depth when parsing the callchain, "
 		     "anything below the specified depth will be ignored."),
-- 
GitLab


From 2fda5ada07f36f6cde39a52e7f05d86ea8ffdc33 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 19 Oct 2018 15:47:34 -0300
Subject: [PATCH 0083/1890] perf evsel: Introduce per event max_events property

This simply adds the field to 'struct perf_evsel' and allows setting
it via the event parser, to test it lets trace trace:

First look at where in a function that receives an evsel we can put a probe
to read how evsel->max_events was setup:

  # perf probe -x ~/bin/perf -L trace__event_handler
  <trace__event_handler@/home/acme/git/perf/tools/perf/builtin-trace.c:0>
        0  static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
                                          union perf_event *event __maybe_unused,
                                          struct perf_sample *sample)
        3  {
        4         struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
        5         int callchain_ret = 0;

        7         if (sample->callchain) {
        8                 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
        9                 if (callchain_ret == 0) {
       10                         if (callchain_cursor.nr < trace->min_stack)
       11                                 goto out;
       12                         callchain_ret = 1;
                          }
                  }

See what variables we can probe at line 7:

  # perf probe -x ~/bin/perf -V trace__event_handler:7
  Available variables at trace__event_handler:7
          @<trace__event_handler+89>
                  int     callchain_ret
                  struct perf_evsel*      evsel
                  struct perf_sample*     sample
                  struct thread*  thread
                  struct trace*   trace
                  union perf_event*       event

Add a probe at that line asking for evsel->max_events to be collected and named
as "max_events":

  # perf probe -x ~/bin/perf trace__event_handler:7 'max_events=evsel->max_events'
  Added new event:
    probe_perf:trace__event_handler (on trace__event_handler:7 in /home/acme/bin/perf with max_events=evsel->max_events)

  You can now use it in all perf tools, such as:

  	perf record -e probe_perf:trace__event_handler -aR sleep 1

Now use 'perf trace', here aliased to just 'trace' and trace trace, i.e.
the first 'trace' is tracing just that 'probe_perf:trace__event_handler' event,
while the traced trace is tracing all scheduler tracepoints, will stop at two
events (--max-events 2) and will just set evsel->max_events for all the sched
tracepoints to 9, we will see the output of both traces intermixed:

  # trace -e *perf:*event_handler trace --max-events 2 -e sched:*/nr=9/
       0.000 :0/0 sched:sched_waking:comm=rcu_sched pid=10 prio=120 target_cpu=000
       0.009 :0/0 sched:sched_wakeup:comm=rcu_sched pid=10 prio=120 target_cpu=000
       0.000 trace/23949 probe_perf:trace__event_handler:(48c34a) max_events=0x9
       0.046 trace/23949 probe_perf:trace__event_handler:(48c34a) max_events=0x9
  #

Now, if the traced trace sends its output to /dev/null, we'll see just
what the first level trace outputs: that evsel->max_events is indeed
being set to 9:

  # trace -e *perf:*event_handler trace -o /dev/null --max-events 2 -e sched:*/nr=9/
       0.000 trace/23961 probe_perf:trace__event_handler:(48c34a) max_events=0x9
       0.030 trace/23961 probe_perf:trace__event_handler:(48c34a) max_events=0x9
  #

Now that we can set evsel->max_events, we can go to the next step, honour that
per-event property in 'perf trace'.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Milian Wolff <milian.wolff@kdab.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-og00yasj276joem6e14l1eas@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evsel.c        | 4 ++++
 tools/perf/util/evsel.h        | 3 +++
 tools/perf/util/parse-events.c | 8 ++++++++
 tools/perf/util/parse-events.h | 1 +
 tools/perf/util/parse-events.l | 1 +
 5 files changed, 17 insertions(+)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 29d7b97f66fbc..7e95ec1c19a82 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -232,6 +232,7 @@ void perf_evsel__init(struct perf_evsel *evsel,
 	evsel->leader	   = evsel;
 	evsel->unit	   = "";
 	evsel->scale	   = 1.0;
+	evsel->max_events  = ULONG_MAX;
 	evsel->evlist	   = NULL;
 	evsel->bpf_fd	   = -1;
 	INIT_LIST_HEAD(&evsel->node);
@@ -793,6 +794,9 @@ static void apply_config_terms(struct perf_evsel *evsel,
 		case PERF_EVSEL__CONFIG_TERM_MAX_STACK:
 			max_stack = term->val.max_stack;
 			break;
+		case PERF_EVSEL__CONFIG_TERM_MAX_EVENTS:
+			evsel->max_events = term->val.max_events;
+			break;
 		case PERF_EVSEL__CONFIG_TERM_INHERIT:
 			/*
 			 * attr->inherit should has already been set by
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 4107c39f4a54a..ad5d615c6db64 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -46,6 +46,7 @@ enum term_type {
 	PERF_EVSEL__CONFIG_TERM_STACK_USER,
 	PERF_EVSEL__CONFIG_TERM_INHERIT,
 	PERF_EVSEL__CONFIG_TERM_MAX_STACK,
+	PERF_EVSEL__CONFIG_TERM_MAX_EVENTS,
 	PERF_EVSEL__CONFIG_TERM_OVERWRITE,
 	PERF_EVSEL__CONFIG_TERM_DRV_CFG,
 	PERF_EVSEL__CONFIG_TERM_BRANCH,
@@ -65,6 +66,7 @@ struct perf_evsel_config_term {
 		bool	inherit;
 		bool	overwrite;
 		char	*branch;
+		unsigned long max_events;
 	} val;
 	bool weak;
 };
@@ -99,6 +101,7 @@ struct perf_evsel {
 	struct perf_counts	*prev_raw_counts;
 	int			idx;
 	u32			ids;
+	unsigned long		max_events;
 	char			*name;
 	double			scale;
 	const char		*unit;
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index f8cd3e7c91866..59be3466d64d3 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -926,6 +926,7 @@ static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = {
 	[PARSE_EVENTS__TERM_TYPE_NOINHERIT]		= "no-inherit",
 	[PARSE_EVENTS__TERM_TYPE_INHERIT]		= "inherit",
 	[PARSE_EVENTS__TERM_TYPE_MAX_STACK]		= "max-stack",
+	[PARSE_EVENTS__TERM_TYPE_MAX_EVENTS]		= "nr",
 	[PARSE_EVENTS__TERM_TYPE_OVERWRITE]		= "overwrite",
 	[PARSE_EVENTS__TERM_TYPE_NOOVERWRITE]		= "no-overwrite",
 	[PARSE_EVENTS__TERM_TYPE_DRV_CFG]		= "driver-config",
@@ -1037,6 +1038,9 @@ do {									   \
 	case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
 		CHECK_TYPE_VAL(NUM);
 		break;
+	case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS:
+		CHECK_TYPE_VAL(NUM);
+		break;
 	default:
 		err->str = strdup("unknown term");
 		err->idx = term->err_term;
@@ -1084,6 +1088,7 @@ static int config_term_tracepoint(struct perf_event_attr *attr,
 	case PARSE_EVENTS__TERM_TYPE_INHERIT:
 	case PARSE_EVENTS__TERM_TYPE_NOINHERIT:
 	case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
+	case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS:
 	case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
 	case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE:
 		return config_term_common(attr, term, err);
@@ -1162,6 +1167,9 @@ do {								\
 		case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
 			ADD_CONFIG_TERM(MAX_STACK, max_stack, term->val.num);
 			break;
+		case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS:
+			ADD_CONFIG_TERM(MAX_EVENTS, max_events, term->val.num);
+			break;
 		case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
 			ADD_CONFIG_TERM(OVERWRITE, overwrite, term->val.num ? 1 : 0);
 			break;
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 4473dac27aee2..5ed035cbcbb72 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -71,6 +71,7 @@ enum {
 	PARSE_EVENTS__TERM_TYPE_NOINHERIT,
 	PARSE_EVENTS__TERM_TYPE_INHERIT,
 	PARSE_EVENTS__TERM_TYPE_MAX_STACK,
+	PARSE_EVENTS__TERM_TYPE_MAX_EVENTS,
 	PARSE_EVENTS__TERM_TYPE_NOOVERWRITE,
 	PARSE_EVENTS__TERM_TYPE_OVERWRITE,
 	PARSE_EVENTS__TERM_TYPE_DRV_CFG,
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 5f761f3ed0f33..7805c71aaae2e 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -269,6 +269,7 @@ time			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_TIME); }
 call-graph		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CALLGRAPH); }
 stack-size		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_STACKSIZE); }
 max-stack		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_MAX_STACK); }
+nr			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_MAX_EVENTS); }
 inherit			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_INHERIT); }
 no-inherit		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOINHERIT); }
 overwrite		{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_OVERWRITE); }
-- 
GitLab


From 0400d65501930e6e99848572b3818914c3b94256 Mon Sep 17 00:00:00 2001
From: Camelia Groza <camelia.groza@nxp.com>
Date: Thu, 20 Sep 2018 14:47:01 +0300
Subject: [PATCH 0084/1890] powerpc/dts/fsl: t2080rdb: reorder the Cortina PHY
 XFI lanes

According to the T2080RDB schematics, for the CS4315 PHY, the XFI 1 lane is
connected to SFP 2 and the XFI 2 lane is connected to SFP 1. Change the
device tree to reflect the correct PHY order and port association.

Signed-off-by: Camelia Groza <camelia.groza@nxp.com>
Signed-off-by: Scott Wood <oss@buserror.net>
---
 arch/powerpc/boot/dts/fsl/t2080rdb.dts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/boot/dts/fsl/t2080rdb.dts b/arch/powerpc/boot/dts/fsl/t2080rdb.dts
index 55c0210a771d1..092a400740f84 100644
--- a/arch/powerpc/boot/dts/fsl/t2080rdb.dts
+++ b/arch/powerpc/boot/dts/fsl/t2080rdb.dts
@@ -77,12 +77,12 @@ ethernet@e6000 {
 		};
 
 		ethernet@f0000 {
-			phy-handle = <&xg_cs4315_phy1>;
+			phy-handle = <&xg_cs4315_phy2>;
 			phy-connection-type = "xgmii";
 		};
 
 		ethernet@f2000 {
-			phy-handle = <&xg_cs4315_phy2>;
+			phy-handle = <&xg_cs4315_phy1>;
 			phy-connection-type = "xgmii";
 		};
 
-- 
GitLab


From b7e8452b860c299f342a012922bdd9ab8f2bb722 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 20 Oct 2018 09:04:41 -0300
Subject: [PATCH 0085/1890] perf evsel: Mark a evsel as disabled when asking
 the kernel do disable it

Because there may be more such events in the ring buffer that should be
discarded when an app decides to stop considering them.

At some point we'll do this with eBPF, this way we stop them at origin,
before they are placed in the ring buffer.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-uzufuxws4hufigx07ue1dpv6@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evlist.c |  2 +-
 tools/perf/util/evsel.c  | 23 +++++++++++++++++------
 tools/perf/util/evsel.h  |  1 +
 3 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index be440df296150..e88e6f9b1463f 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -358,7 +358,7 @@ void perf_evlist__disable(struct perf_evlist *evlist)
 	struct perf_evsel *pos;
 
 	evlist__for_each_entry(evlist, pos) {
-		if (!perf_evsel__is_group_leader(pos) || !pos->fd)
+		if (pos->disabled || !perf_evsel__is_group_leader(pos) || !pos->fd)
 			continue;
 		perf_evsel__disable(pos);
 	}
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 7e95ec1c19a82..6d187059a3736 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1207,16 +1207,27 @@ int perf_evsel__append_addr_filter(struct perf_evsel *evsel, const char *filter)
 
 int perf_evsel__enable(struct perf_evsel *evsel)
 {
-	return perf_evsel__run_ioctl(evsel,
-				     PERF_EVENT_IOC_ENABLE,
-				     0);
+	int err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, 0);
+
+	if (!err)
+		evsel->disabled = false;
+
+	return err;
 }
 
 int perf_evsel__disable(struct perf_evsel *evsel)
 {
-	return perf_evsel__run_ioctl(evsel,
-				     PERF_EVENT_IOC_DISABLE,
-				     0);
+	int err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, 0);
+	/*
+	 * We mark it disabled here so that tools that disable a event can
+	 * ignore events after they disable it. I.e. the ring buffer may have
+	 * already a few more events queued up before the kernel got the stop
+	 * request.
+	 */
+	if (!err)
+		evsel->disabled = true;
+
+	return err;
 }
 
 int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index ad5d615c6db64..4ef50f157b50b 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -122,6 +122,7 @@ struct perf_evsel {
 	bool			snapshot;
 	bool 			supported;
 	bool 			needs_swap;
+	bool 			disabled;
 	bool			no_aux_samples;
 	bool			immediate;
 	bool			system_wide;
-- 
GitLab


From 4291bf5cb93918232f88a3a70d8f70a72fbb6ab0 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 20 Oct 2018 09:18:50 -0300
Subject: [PATCH 0086/1890] perf trace: Drop addr_location refcounts

When we use machine__resolve() we grab a reference to
addr_location.thread (and in the future to other elements there) via
machine__findnew_thread(), so we must pair that with
addr_location__put(), else we'll never drop that thread when it exits
and no other remaining data structures have pointers to it. Fix it.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-ivg9hifzeuokb1f5jxc2wob4@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-trace.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 74638034861ca..77b8748ad5edb 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1814,12 +1814,14 @@ static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evse
 	int max_stack = evsel->attr.sample_max_stack ?
 			evsel->attr.sample_max_stack :
 			trace->max_stack;
+	int err;
 
-	if (machine__resolve(trace->host, &al, sample) < 0 ||
-	    thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, max_stack))
+	if (machine__resolve(trace->host, &al, sample) < 0)
 		return -1;
 
-	return 0;
+	err = thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, max_stack);
+	addr_location__put(&al);
+	return err;
 }
 
 static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
-- 
GitLab


From a937c6658b8e77e1f65cde2be9970811752121bb Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 20 Oct 2018 09:27:52 -0300
Subject: [PATCH 0087/1890] perf trace: Drop thread refcount in
 trace__event_handler()

We must pair:

   thread = machine__findnew_thread();

with thread__put(thread). Fix it.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Fixes: c4191e55b874 ("perf trace: Show comm and tid for tracepoint events")
Link: https://lkml.kernel.org/n/tip-dkxsb8cwg87rmkrzrbns1o4z@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-trace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 77b8748ad5edb..589e0412652a5 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2152,8 +2152,8 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
 		trace__fprintf_callchain(trace, sample);
 	else if (callchain_ret < 0)
 		pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
-	thread__put(thread);
 out:
+	thread__put(thread);
 	return 0;
 }
 
-- 
GitLab


From cbb5df7e96070f1f728ff7885443646ebba703d4 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Mon, 22 Oct 2018 11:30:15 +0200
Subject: [PATCH 0088/1890] perf stat: Poll for monitored tasks being alive

Adding the check for tasks we monitor via -p/-t options, and finish stat
if there's no longer task to monitor.

Requested-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Link: http://lkml.kernel.org/r/20181022093015.9106-1-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index b86aba1c8028f..d1028d7755bbc 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -409,6 +409,28 @@ static struct perf_evsel *perf_evsel__reset_weak_group(struct perf_evsel *evsel)
 	return leader;
 }
 
+static bool is_target_alive(struct target *_target,
+			    struct thread_map *threads)
+{
+	struct stat st;
+	int i;
+
+	if (!target__has_task(_target))
+		return true;
+
+	for (i = 0; i < threads->nr; i++) {
+		char path[PATH_MAX];
+
+		scnprintf(path, PATH_MAX, "%s/%d", procfs__mountpoint(),
+			  threads->map[i].pid);
+
+		if (!stat(path, &st))
+			return true;
+	}
+
+	return false;
+}
+
 static int __run_perf_stat(int argc, const char **argv, int run_idx)
 {
 	int interval = stat_config.interval;
@@ -579,6 +601,8 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
 		enable_counters();
 		while (!done) {
 			nanosleep(&ts, NULL);
+			if (!is_target_alive(&target, evsel_list->threads))
+				break;
 			if (timeout)
 				break;
 			if (interval) {
-- 
GitLab


From c1c9b9695cc8868048f45c7e2559f65bc0be7382 Mon Sep 17 00:00:00 2001
From: Milian Wolff <milian.wolff@kdab.com>
Date: Sun, 21 Oct 2018 21:14:23 +0200
Subject: [PATCH 0089/1890] perf script: Allow extended console debug output

The script tool isn't using a browser, yet use_browser wasn't set
explicitly to zero. This in turn lead to confusing output such as:

  ```
  $ perf script -vvv ...
  ...
  overlapping maps in /home/milian/foobar (disable tui for more info)
  ...
  ```

Explicitly set use_browser to 0 now, which gives us the extended
debug information now in perf script as expected.

Signed-off-by: Milian Wolff <milian.wolff@kdab.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Link: http://lkml.kernel.org/r/20181021191424.16183-1-milian.wolff@kdab.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-script.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 4da5e32b9e035..bd468b90801b2 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -3417,8 +3417,10 @@ int cmd_script(int argc, const char **argv)
 		exit(-1);
 	}
 
-	if (!script_name)
+	if (!script_name) {
 		setup_pager();
+		use_browser = 0;
+	}
 
 	session = perf_session__new(&data, false, &script.tool);
 	if (session == NULL)
-- 
GitLab


From 7ee40678af935fb489b0c6cf0f75808175214cd7 Mon Sep 17 00:00:00 2001
From: Milian Wolff <milian.wolff@kdab.com>
Date: Sun, 21 Oct 2018 21:14:24 +0200
Subject: [PATCH 0090/1890] perf script: Flush output stream after events in
 verbose mode

When the perf script output is written to a terminal stream, the normal
output of `perf script` would get buffered, but its debug output would
be written directly. This made it quite hard to figure out where a given
debug output is coming from.

We can improve on this by flushing the output buffer after processing an
event. To see the value, compare the following output for a `perf script
-v` run:

Before this patch:
```
unwind: reg 16, val 7faf7dfdc000
unwind: reg 7, val 7ffc80811e30
unwind: find_proc_info dso /usr/lib/ld-2.28.so
unwind: reg 6, val 0
unwind: _start:ip = 0x7faf7dfdc000 (0x2000)
unwind: reg 16, val 7faf7dfdc000
unwind: reg 7, val 7ffc80811e30
unwind: find_proc_info dso /usr/lib/ld-2.28.so
unwind: reg 6, val 0
unwind: _start:ip = 0x7faf7dfdc000 (0x2000)
unwind: reg 16, val 7faf7dfdc000
unwind: reg 7, val 7ffc80811e30
unwind: find_proc_info dso /usr/lib/ld-2.28.so
unwind: reg 6, val 0
unwind: _start:ip = 0x7faf7dfdc000 (0x2000)
unwind: reg 16, val 7faf7dfdc000
unwind: reg 7, val 7ffc80811e30
... lots and lots of verbose debug output
cpp-inlining 24617 90229.122036534:          1 cycles:uppp:
            7faf7dfdc000 _start+0x0 (/usr/lib/ld-2.28.so)

cpp-inlining 24617 90229.122043974:          1 cycles:uppp:
            7faf7dfdc000 _start+0x0 (/usr/lib/ld-2.28.so)
...
```

After this patch:
```
...
unwind: reg 16, val 7faf7dfdc000
unwind: reg 7, val 7ffc80811e30
unwind: find_proc_info dso /usr/lib/ld-2.28.so
unwind: reg 6, val 0
unwind: _start:ip = 0x7faf7dfdc000 (0x2000)
cpp-inlining 24617 90229.122036534:          1 cycles:uppp:
            7faf7dfdc000 _start+0x0 (/usr/lib/ld-2.28.so)

unwind: reg 16, val 7faf7dfdc000
unwind: reg 7, val 7ffc80811e30
unwind: find_proc_info dso /usr/lib/ld-2.28.so
unwind: reg 6, val 0
unwind: _start:ip = 0x7faf7dfdc000 (0x2000)
cpp-inlining 24617 90229.122043974:          1 cycles:uppp:
            7faf7dfdc000 _start+0x0 (/usr/lib/ld-2.28.so)
...
```

This new output format makes it much easier to use perf script output
for debugging purposes, e.g. to investigate broken dwarf unwinding.

Signed-off-by: Milian Wolff <milian.wolff@kdab.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: http://lkml.kernel.org/r/20181021191424.16183-2-milian.wolff@kdab.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-script.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index bd468b90801b2..ca09b7d2adb7e 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -1737,6 +1737,9 @@ static void process_event(struct perf_script *script,
 
 	if (PRINT_FIELD(METRIC))
 		perf_sample__fprint_metric(script, thread, evsel, sample, fp);
+
+	if (verbose)
+		fflush(fp);
 }
 
 static struct scripting_ops	*scripting_ops;
-- 
GitLab


From a9c5e6c1e9bff42ca5f01ceb3092a27a010755fb Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 22 Oct 2018 14:14:16 -0300
Subject: [PATCH 0091/1890] perf trace: Introduce per-event maximum number of
 events property

Call it 'nr', as in this context it should be expressive enough, i.e.:

  # perf trace -e sched:*waking/nr=8,call-graph=fp/
     0.000 :0/0 sched:sched_waking:comm=rcu_sched pid=10 prio=120 target_cpu=001
                                       try_to_wake_up ([kernel.kallsyms])
                                       sched_clock ([kernel.kallsyms])
     3.933 :0/0 sched:sched_waking:comm=rcu_sched pid=10 prio=120 target_cpu=001
                                       try_to_wake_up ([kernel.kallsyms])
                                       sched_clock ([kernel.kallsyms])
     3.970 IPDL Backgroun/3622 sched:sched_waking:comm=Gecko_IOThread pid=3569 prio=120 target_cpu=003
                                       try_to_wake_up ([kernel.kallsyms])
                                       __libc_write (/usr/lib64/libpthread-2.26.so)
    20.069 IPDL Backgroun/3622 sched:sched_waking:comm=Gecko_IOThread pid=3569 prio=120 target_cpu=003
                                       try_to_wake_up ([kernel.kallsyms])
                                       __libc_write (/usr/lib64/libpthread-2.26.so)
    37.170 IPDL Backgroun/3622 sched:sched_waking:comm=Gecko_IOThread pid=3569 prio=120 target_cpu=003
                                       try_to_wake_up ([kernel.kallsyms])
                                       __libc_write (/usr/lib64/libpthread-2.26.so)
    53.267 IPDL Backgroun/3622 sched:sched_waking:comm=Gecko_IOThread pid=3569 prio=120 target_cpu=003
                                       try_to_wake_up ([kernel.kallsyms])
                                       __libc_write (/usr/lib64/libpthread-2.26.so)
    70.365 IPDL Backgroun/3622 sched:sched_waking:comm=Gecko_IOThread pid=3569 prio=120 target_cpu=003
                                       try_to_wake_up ([kernel.kallsyms])
                                       __libc_write (/usr/lib64/libpthread-2.26.so)
    75.781 Web Content/3649 sched:sched_waking:comm=JS Helper pid=3670 prio=120 target_cpu=000
                                       try_to_wake_up ([kernel.kallsyms])
                                       try_to_wake_up ([kernel.kallsyms])
                                       wake_up_q ([kernel.kallsyms])
                                       futex_wake ([kernel.kallsyms])
                                       do_futex ([kernel.kallsyms])
                                       __x64_sys_futex ([kernel.kallsyms])
                                       do_syscall_64 ([kernel.kallsyms])
                                       entry_SYSCALL_64_after_hwframe ([kernel.kallsyms])
                                       pthread_cond_signal@@GLIBC_2.3.2 (/usr/lib64/libpthread-2.26.so)
  #

  # perf trace -e sched:*switch/nr=2/,block:*_plug/nr=4/,block:*_unplug/nr=1/,net:*dev_queue/nr=3,max-stack=16/
     0.000 :0/0 sched:sched_switch:swapper/0:0 [120] S ==> trace:3367 [120]
     0.046 :0/0 sched:sched_switch:swapper/1:0 [120] S ==> kworker/u16:58:2722 [120]
   570.670 irq/50-iwlwifi/680 net:net_dev_queue:dev=wlp3s0 skbaddr=0xffff93498051ef00 len=66
                                       __dev_queue_xmit ([kernel.kallsyms])
  1106.141 jbd2/dm-0-8/476 block:block_plug:[jbd2/dm-0-8]
  1106.175 jbd2/dm-0-8/476 block:block_unplug:[jbd2/dm-0-8] 1
  1618.088 kworker/u16:30/2694 block:block_plug:[kworker/u16:30]
  1810.000 :0/0 net:net_dev_queue:dev=vnet0 skbaddr=0xffff93498051ef00 len=52
                                       __dev_queue_xmit ([kernel.kallsyms])
  3857.974 :0/0 net:net_dev_queue:dev=vnet0 skbaddr=0xffff93498051f900 len=52
                                       __dev_queue_xmit ([kernel.kallsyms])
  4790.277 jbd2/dm-2-8/748 block:block_plug:[jbd2/dm-2-8]
  4790.448 jbd2/dm-2-8/748 block:block_plug:[jbd2/dm-2-8]
  #

The global --max-events has precendence:

  # trace --max-events 3 -e sched:*switch/nr=2/,block:*_plug/nr=4/,block:*_unplug/nr=1/,net:*dev_queue/nr=3,max-stack=16/
     0.000 :0/0 sched:sched_switch:swapper/0:0 [120] S ==> qemu-system-x86:2252 [120]
     0.029 qemu-system-x8/2252 sched:sched_switch:qemu-system-x86:2252 [120] D ==> swapper/0:0 [120]
    58.047 DNS Res~er #14/31661 net:net_dev_queue:dev=wlp3s0 skbaddr=0xffff9346966af100 len=84
                                       __dev_queue_xmit ([kernel.kallsyms])
                                       __libc_send (/usr/lib64/libpthread-2.26.so)
  #

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-s4jswltvh660ughvg9nwngah@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-trace.txt | 20 ++++++++++++++++++++
 tools/perf/builtin-trace.c              | 19 +++++++++++++++++--
 tools/perf/util/evsel.h                 |  1 +
 3 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index 0d1a1cd4d3281..e113450503d2f 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -285,6 +285,26 @@ Trace the next min page page fault to take place on the first CPU:
                                        [0x18b26e6bc2bd] (/tmp/perf-17136.map)
   #
 
+Trace the next two sched:sched_switch events, four block:*_plug events, the
+next block:*_unplug and the next three net:*dev_queue events, this last one
+with a backtrace of at most 16 entries, system wide:
+
+  # perf trace -e sched:*switch/nr=2/,block:*_plug/nr=4/,block:*_unplug/nr=1/,net:*dev_queue/nr=3,max-stack=16/
+     0.000 :0/0 sched:sched_switch:swapper/2:0 [120] S ==> rcu_sched:10 [120]
+     0.015 rcu_sched/10 sched:sched_switch:rcu_sched:10 [120] R ==> swapper/2:0 [120]
+   254.198 irq/50-iwlwifi/680 net:net_dev_queue:dev=wlp3s0 skbaddr=0xffff93498051f600 len=66
+                                       __dev_queue_xmit ([kernel.kallsyms])
+   273.977 :0/0 net:net_dev_queue:dev=wlp3s0 skbaddr=0xffff93498051f600 len=78
+                                       __dev_queue_xmit ([kernel.kallsyms])
+   274.007 :0/0 net:net_dev_queue:dev=wlp3s0 skbaddr=0xffff93498051ff00 len=78
+                                       __dev_queue_xmit ([kernel.kallsyms])
+  2930.140 kworker/u16:58/2722 block:block_plug:[kworker/u16:58]
+  2930.162 kworker/u16:58/2722 block:block_unplug:[kworker/u16:58] 1
+  4466.094 jbd2/dm-2-8/748 block:block_plug:[jbd2/dm-2-8]
+  8050.123 kworker/u16:30/2694 block:block_plug:[kworker/u16:30]
+  8050.271 kworker/u16:30/2694 block:block_plug:[kworker/u16:30]
+  #
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-script[1]
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 589e0412652a5..7081d7ea12e5d 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2092,8 +2092,18 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
 				union perf_event *event __maybe_unused,
 				struct perf_sample *sample)
 {
-	struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
+	struct thread *thread;
 	int callchain_ret = 0;
+	/*
+	 * Check if we called perf_evsel__disable(evsel) due to, for instance,
+	 * this event's max_events having been hit and this is an entry coming
+	 * from the ring buffer that we should discard, since the max events
+	 * have already been considered/printed.
+	 */
+	if (evsel->disabled)
+		return 0;
+
+	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
 
 	if (sample->callchain) {
 		callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
@@ -2142,6 +2152,11 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
 					      sample->raw_data, sample->raw_size,
 					      trace->output);
 			++trace->nr_events_printed;
+
+			if (evsel->max_events != ULONG_MAX && ++evsel->nr_events_printed == evsel->max_events) {
+				perf_evsel__disable(evsel);
+				perf_evsel__close(evsel);
+			}
 		}
 	}
 
@@ -2726,7 +2741,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 		int timeout = done ? 100 : -1;
 
 		if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
-			if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
+			if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP | POLLNVAL) == 0)
 				draining = true;
 
 			goto again;
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 4ef50f157b50b..3147ca76c6fc2 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -102,6 +102,7 @@ struct perf_evsel {
 	int			idx;
 	u32			ids;
 	unsigned long		max_events;
+	unsigned long		nr_events_printed;
 	char			*name;
 	double			scale;
 	const char		*unit;
-- 
GitLab


From 3e71c70c946b5d5e7b21397c621b14951e5c0fcf Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Mon, 1 Oct 2018 09:28:35 +0300
Subject: [PATCH 0092/1890] perf scripts python: call-graph-from-sql.py: Use
 SPDX license identifier

Use SPDX license identifier in call-graph-from-sql.py.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/20181001062853.28285-2-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/scripts/python/call-graph-from-sql.py | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/tools/perf/scripts/python/call-graph-from-sql.py b/tools/perf/scripts/python/call-graph-from-sql.py
index b494a67a1c679..ce1b91fcd6b88 100644
--- a/tools/perf/scripts/python/call-graph-from-sql.py
+++ b/tools/perf/scripts/python/call-graph-from-sql.py
@@ -1,15 +1,7 @@
 #!/usr/bin/python2
-# call-graph-from-sql.py: create call-graph from sql database
-# Copyright (c) 2014-2017, Intel Corporation.
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms and conditions of the GNU General Public License,
-# version 2, as published by the Free Software Foundation.
-#
-# This program is distributed in the hope it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-# more details.
+# SPDX-License-Identifier: GPL-2.0
+# exported-sql-viewer.py: view data from sql database
+# Copyright (c) 2014-2018, Intel Corporation.
 
 # To use this script you will need to have exported data using either the
 # export-to-sqlite.py or the export-to-postgresql.py script.  Refer to those
-- 
GitLab


From 241dbbb1fb21bfff0c46c6873cee5c7923d05378 Mon Sep 17 00:00:00 2001
From: Evan Quan <evan.quan@amd.com>
Date: Wed, 17 Oct 2018 16:36:02 +0800
Subject: [PATCH 0093/1890] drm/amd/powerplay: error out when force clock level
 under auto dpm mode V2

Forcing clock level is supported under manual dpm mode only. Error out
when trying to set under manual mode. Instead of doing nothing and
reporting success.

V2: update for mclk/pcie clock level settings also

Signed-off-by: Evan Quan <evan.quan@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c        | 15 ++++++++++++---
 drivers/gpu/drm/amd/powerplay/amd_powerplay.c | 11 +++++++----
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 94055a485e013..59cc678de8c15 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -704,7 +704,10 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
 		return ret;
 
 	if (adev->powerplay.pp_funcs->force_clock_level)
-		amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask);
+		ret = amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask);
+
+	if (ret)
+		return -EINVAL;
 
 	return count;
 }
@@ -737,7 +740,10 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
 		return ret;
 
 	if (adev->powerplay.pp_funcs->force_clock_level)
-		amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask);
+		ret = amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask);
+
+	if (ret)
+		return -EINVAL;
 
 	return count;
 }
@@ -770,7 +776,10 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
 		return ret;
 
 	if (adev->powerplay.pp_funcs->force_clock_level)
-		amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask);
+		ret = amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask);
+
+	if (ret)
+		return -EINVAL;
 
 	return count;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
index e8964cae6b93d..da9ff2cc27772 100644
--- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
@@ -723,11 +723,14 @@ static int pp_dpm_force_clock_level(void *handle,
 		pr_info("%s was not implemented.\n", __func__);
 		return 0;
 	}
+
+	if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) {
+		pr_info("force clock level is for dpm manual mode only.\n");
+		return -EINVAL;
+	}
+
 	mutex_lock(&hwmgr->smu_lock);
-	if (hwmgr->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL)
-		ret = hwmgr->hwmgr_func->force_clock_level(hwmgr, type, mask);
-	else
-		ret = -EINVAL;
+	ret = hwmgr->hwmgr_func->force_clock_level(hwmgr, type, mask);
 	mutex_unlock(&hwmgr->smu_lock);
 	return ret;
 }
-- 
GitLab


From 91eec27ebbc4f4e7cf4ee6a589d2f060ba9d0d79 Mon Sep 17 00:00:00 2001
From: Emily Deng <Emily.Deng@amd.com>
Date: Thu, 18 Oct 2018 15:01:05 +0800
Subject: [PATCH 0094/1890] drm/amdgpu: Fix null pointer
 amdgpu_device_fw_loading

Need to check adev->powerplay.pp_funcs.

Signed-off-by: Emily Deng <Emily.Deng@amd.com>
Reviewed-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1e4dd09a50726..d11489e8b3888 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1600,7 +1600,7 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
 		}
 	}
 
-	if (adev->powerplay.pp_funcs->load_firmware) {
+	if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->load_firmware) {
 		r = adev->powerplay.pp_funcs->load_firmware(adev->powerplay.pp_handle);
 		if (r) {
 			pr_err("firmware loading failed\n");
-- 
GitLab


From f191415b24a3ad3fa22088af7cd7fc328a2f469f Mon Sep 17 00:00:00 2001
From: David Francis <David.Francis@amd.com>
Date: Thu, 18 Oct 2018 11:21:15 -0400
Subject: [PATCH 0095/1890] powerplay: Respect units on max dcfclk watermark

In a refactor, the watermark clock inputs to
powerplay from DC were changed from units of 10kHz to
kHz clocks.

One division by 100 was not converted into a division
by 1000.

Signed-off-by: David Francis <David.Francis@amd.com>
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c
index 4714b5b598255..99a33c33a32c9 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c
@@ -718,7 +718,7 @@ int smu_set_watermarks_for_clocks_ranges(void *wt_table,
 		table->WatermarkRow[1][i].MaxClock =
 			cpu_to_le16((uint16_t)
 			(wm_with_clock_ranges->wm_dmif_clocks_ranges[i].wm_max_dcfclk_clk_in_khz) /
-			100);
+			1000);
 		table->WatermarkRow[1][i].MinUclk =
 			cpu_to_le16((uint16_t)
 			(wm_with_clock_ranges->wm_dmif_clocks_ranges[i].wm_min_mem_clk_in_khz) /
-- 
GitLab


From f7becf9a0803030ae125189823328e2d62b90f7b Mon Sep 17 00:00:00 2001
From: Joseph Greathouse <Joseph.Greathouse@amd.com>
Date: Thu, 18 Oct 2018 14:57:45 -0500
Subject: [PATCH 0096/1890] drm/amd/pp: enable power limit increase in OD mode

OverDrive mode allows users to increase the maximum SCLK and MCLK
frequencies beyond the default on the GPU. However, this may not
results in large performance gains if the GPU then runs into its TDP
power limit. This patch adds the capability to increase the power
limit of a GPU above its default maximum.

This is only allowed when overdrive is enabled in the ppfeaturemask,
since this is an overdrive feature. The TDPODLimit value from the
VBIOS describes how how much higher the TDP should be allowed to go
over its default, in percentage.

v2: Moved dereference of hwmgr to after its validity check

Signed-off-by: Joseph Greathouse <Joseph.Greathouse@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/amd_powerplay.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
index da9ff2cc27772..bf09735ea3ac7 100644
--- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
@@ -966,6 +966,7 @@ static int pp_dpm_switch_power_profile(void *handle,
 static int pp_set_power_limit(void *handle, uint32_t limit)
 {
 	struct pp_hwmgr *hwmgr = handle;
+	uint32_t max_power_limit;
 
 	if (!hwmgr || !hwmgr->pm_en)
 		return -EINVAL;
@@ -978,7 +979,13 @@ static int pp_set_power_limit(void *handle, uint32_t limit)
 	if (limit == 0)
 		limit = hwmgr->default_power_limit;
 
-	if (limit > hwmgr->default_power_limit)
+	max_power_limit = hwmgr->default_power_limit;
+	if (hwmgr->od_enabled) {
+		max_power_limit *= (100 + hwmgr->platform_descriptor.TDPODLimit);
+		max_power_limit /= 100;
+	}
+
+	if (limit > max_power_limit)
 		return -EINVAL;
 
 	mutex_lock(&hwmgr->smu_lock);
@@ -997,8 +1004,13 @@ static int pp_get_power_limit(void *handle, uint32_t *limit, bool default_limit)
 
 	mutex_lock(&hwmgr->smu_lock);
 
-	if (default_limit)
+	if (default_limit) {
 		*limit = hwmgr->default_power_limit;
+		if (hwmgr->od_enabled) {
+			*limit *= (100 + hwmgr->platform_descriptor.TDPODLimit);
+			*limit /= 100;
+		}
+	}
 	else
 		*limit = hwmgr->power_limit;
 
-- 
GitLab


From b44ec6a3eb386d398c6c8b8c60d1c8473ff9cb7e Mon Sep 17 00:00:00 2001
From: Evan Quan <evan.quan@amd.com>
Date: Thu, 18 Oct 2018 17:54:06 +0800
Subject: [PATCH 0097/1890] drm/amd/powerplay: drop highest UCLK setting after
 display configuration change

The UCLK is forced to highest at the start of display configuration
change. Downgrade the UCLK from highest after display configuration change.
Otherwise, we may see the UCLK stuck in the highest in some cases.

Signed-off-by: Evan Quan <evan.quan@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
index b4dbbb7c334ce..6e0b2b8df4551 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
@@ -2046,6 +2046,8 @@ static int vega20_notify_smc_display_config_after_ps_adjustment(
 {
 	struct vega20_hwmgr *data =
 			(struct vega20_hwmgr *)(hwmgr->backend);
+	struct vega20_single_dpm_table *dpm_table =
+			&data->dpm_table.mem_table;
 	struct PP_Clocks min_clocks = {0};
 	struct pp_display_clock_request clock_req;
 	int ret = 0;
@@ -2076,6 +2078,15 @@ static int vega20_notify_smc_display_config_after_ps_adjustment(
 		}
 	}
 
+	if (data->smu_features[GNLD_DPM_UCLK].enabled) {
+		dpm_table->dpm_state.hard_min_level = min_clocks.memoryClock / 100;
+		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(hwmgr,
+				PPSMC_MSG_SetHardMinByFreq,
+				(PPCLK_UCLK << 16 ) | dpm_table->dpm_state.hard_min_level)),
+				"[SetHardMinFreq] Set hard min uclk failed!",
+				return ret);
+	}
+
 	return 0;
 }
 
-- 
GitLab


From 3b2ad16dc4288dab05862aeacf1e124c4662d475 Mon Sep 17 00:00:00 2001
From: Evan Quan <evan.quan@amd.com>
Date: Fri, 19 Oct 2018 15:41:20 +0800
Subject: [PATCH 0098/1890] drm/amd/powerplay: bump the PPtable version
 supported

As the matching VBIOS is already ready. Also drop the
temporary workarounds applied before.

Signed-off-by: Evan Quan <evan.quan@amd.com>
Reviewed-by: Feifei Xu <Feifei.Xu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../powerplay/hwmgr/vega20_processpptables.c  | 46 +++++++------------
 .../drm/amd/powerplay/inc/smu11_driver_if.h   |  2 +-
 2 files changed, 18 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_processpptables.c
index e5f7f82300659..f7e8bbdc20b0b 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_processpptables.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_processpptables.c
@@ -716,10 +716,6 @@ static int append_vbios_pptable(struct pp_hwmgr *hwmgr, PPTable_t *ppsmc_pptable
 		"[appendVbiosPPTable] Failed to retrieve Smc Dpm Table from VBIOS!",
 		return -1);
 
-	memset(ppsmc_pptable->Padding32,
-			0,
-			sizeof(struct atom_smc_dpm_info_v4_4) -
-			sizeof(struct atom_common_table_header));
 	ppsmc_pptable->MaxVoltageStepGfx = smc_dpm_table->maxvoltagestepgfx;
 	ppsmc_pptable->MaxVoltageStepSoc = smc_dpm_table->maxvoltagestepsoc;
 
@@ -778,22 +774,19 @@ static int append_vbios_pptable(struct pp_hwmgr *hwmgr, PPTable_t *ppsmc_pptable
 	ppsmc_pptable->FllGfxclkSpreadPercent = smc_dpm_table->fllgfxclkspreadpercent;
 	ppsmc_pptable->FllGfxclkSpreadFreq = smc_dpm_table->fllgfxclkspreadfreq;
 
-	if ((smc_dpm_table->table_header.format_revision == 4) &&
-	    (smc_dpm_table->table_header.content_revision == 4)) {
-		for (i = 0; i < I2C_CONTROLLER_NAME_COUNT; i++) {
-			ppsmc_pptable->I2cControllers[i].Enabled =
-				smc_dpm_table->i2ccontrollers[i].enabled;
-			ppsmc_pptable->I2cControllers[i].SlaveAddress =
-				smc_dpm_table->i2ccontrollers[i].slaveaddress;
-			ppsmc_pptable->I2cControllers[i].ControllerPort =
-				smc_dpm_table->i2ccontrollers[i].controllerport;
-			ppsmc_pptable->I2cControllers[i].ThermalThrottler =
-				smc_dpm_table->i2ccontrollers[i].thermalthrottler;
-			ppsmc_pptable->I2cControllers[i].I2cProtocol =
-				smc_dpm_table->i2ccontrollers[i].i2cprotocol;
-			ppsmc_pptable->I2cControllers[i].I2cSpeed =
-				smc_dpm_table->i2ccontrollers[i].i2cspeed;
-		}
+	for (i = 0; i < I2C_CONTROLLER_NAME_COUNT; i++) {
+		ppsmc_pptable->I2cControllers[i].Enabled =
+			smc_dpm_table->i2ccontrollers[i].enabled;
+		ppsmc_pptable->I2cControllers[i].SlaveAddress =
+			smc_dpm_table->i2ccontrollers[i].slaveaddress;
+		ppsmc_pptable->I2cControllers[i].ControllerPort =
+			smc_dpm_table->i2ccontrollers[i].controllerport;
+		ppsmc_pptable->I2cControllers[i].ThermalThrottler =
+			smc_dpm_table->i2ccontrollers[i].thermalthrottler;
+		ppsmc_pptable->I2cControllers[i].I2cProtocol =
+			smc_dpm_table->i2ccontrollers[i].i2cprotocol;
+		ppsmc_pptable->I2cControllers[i].I2cSpeed =
+			smc_dpm_table->i2ccontrollers[i].i2cspeed;
 	}
 
 	return 0;
@@ -882,15 +875,10 @@ static int init_powerplay_table_information(
 	if (pptable_information->smc_pptable == NULL)
 		return -ENOMEM;
 
-	if (powerplay_table->smcPPTable.Version <= 2)
-		memcpy(pptable_information->smc_pptable,
-				&(powerplay_table->smcPPTable),
-				sizeof(PPTable_t) -
-				sizeof(I2cControllerConfig_t) * I2C_CONTROLLER_NAME_COUNT);
-	else
-		memcpy(pptable_information->smc_pptable,
-				&(powerplay_table->smcPPTable),
-				sizeof(PPTable_t));
+	memcpy(pptable_information->smc_pptable,
+			&(powerplay_table->smcPPTable),
+			sizeof(PPTable_t));
+
 
 	result = append_vbios_pptable(hwmgr, (pptable_information->smc_pptable));
 
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h b/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h
index 2998a49960ede..63d5cf6915496 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if.h
@@ -29,7 +29,7 @@
 // any structure is changed in this file
 #define SMU11_DRIVER_IF_VERSION 0x12
 
-#define PPTABLE_V20_SMU_VERSION 2
+#define PPTABLE_V20_SMU_VERSION 3
 
 #define NUM_GFXCLK_DPM_LEVELS  16
 #define NUM_VCLK_DPM_LEVELS    8
-- 
GitLab


From e738c5f15562635c02a03365cb62cad35051babe Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Thu, 18 Oct 2018 13:57:22 +0000
Subject: [PATCH 0099/1890] powerpc/8xx: Add DT node for using the SEC engine
 of the MPC885

The MPC885 has SEC engine version 1.2 with the following details:
- Number of Crypto channels: 1
- Exec Units: DEU, MDEU and AESU
- Available descriptors: 00010, 00100, 00110, 01000, 11000, 11010

It is also supposed to have descriptor 00000, but it doesn't work
properly so we keep it out for the moment.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Scott Wood <oss@buserror.net>
---
 arch/powerpc/boot/dts/mpc885ads.dts | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/boot/dts/mpc885ads.dts b/arch/powerpc/boot/dts/mpc885ads.dts
index 5b037f51741df..3aa300afbbca4 100644
--- a/arch/powerpc/boot/dts/mpc885ads.dts
+++ b/arch/powerpc/boot/dts/mpc885ads.dts
@@ -72,7 +72,7 @@ soc@ff000000 {
 		#address-cells = <1>;
 		#size-cells = <1>;
 		device_type = "soc";
-		ranges = <0x0 0xff000000 0x4000>;
+		ranges = <0x0 0xff000000 0x28000>;
 		bus-frequency = <0>;
 
 		// Temporary -- will go away once kernel uses ranges for get_immrbase().
@@ -224,6 +224,17 @@ i2c@860 {
 				#size-cells = <0>;
 			};
 		};
+
+		crypto@20000 {
+			compatible = "fsl,sec1.2", "fsl,sec1.0";
+			reg = <0x20000 0x8000>;
+			interrupts = <1 1>;
+			interrupt-parent = <&PIC>;
+			fsl,num-channels = <1>;
+			fsl,channel-fifo-len = <24>;
+			fsl,exec-units-mask = <0x4c>;
+			fsl,descriptor-types-mask = <0x05000154>;
+		};
 	};
 
 	chosen {
-- 
GitLab


From b6ae3550c8e2ca8f7ce1b7f04585dc12a0eb5cbd Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Thu, 18 Oct 2018 05:22:27 +0000
Subject: [PATCH 0100/1890] powerpc/8xx: add missing header in 8xx_mmu.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

arch/powerpc/mm/8xx_mmu.c:174:6: error: no previous prototype for â€˜set_contextâ€™ [-Werror=missing-prototypes]
 void set_context(unsigned long id, pgd_t *pgd)

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Scott Wood <oss@buserror.net>
---
 arch/powerpc/mm/8xx_mmu.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c
index 36484a2ef9158..64ee7597380ea 100644
--- a/arch/powerpc/mm/8xx_mmu.c
+++ b/arch/powerpc/mm/8xx_mmu.c
@@ -13,6 +13,7 @@
  */
 
 #include <linux/memblock.h>
+#include <linux/mmu_context.h>
 #include <asm/fixmap.h>
 #include <asm/code-patching.h>
 
-- 
GitLab


From 7e5583fd77194b45eef04835576a4fe7f3f235c8 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Fri, 5 Oct 2018 10:29:31 +0200
Subject: [PATCH 0101/1890] ubi: Mark expected switch fall-throughs

In preparation to enabling -Wimplicit-fallthrough, mark switch cases
where we are expecting to fall through.

Addresses-Coverity-ID: 1373884 ("Missing break in switch")
Addresses-Coverity-ID: 114869 ("Missing break in switch")
Addresses-Coverity-ID: 114870 ("Missing break in switch")
Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 drivers/mtd/ubi/attach.c | 1 +
 drivers/mtd/ubi/build.c  | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/drivers/mtd/ubi/attach.c b/drivers/mtd/ubi/attach.c
index 93ceea4f27d57..e294d3986ba96 100644
--- a/drivers/mtd/ubi/attach.c
+++ b/drivers/mtd/ubi/attach.c
@@ -1072,6 +1072,7 @@ static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai,
 			 * be a result of power cut during erasure.
 			 */
 			ai->maybe_bad_peb_count += 1;
+		/* fall through */
 	case UBI_IO_BAD_HDR:
 			/*
 			 * If we're facing a bad VID header we have to drop *all*
diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index d2a726654ff11..a4e3454133a47 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -1334,8 +1334,10 @@ static int bytes_str_to_int(const char *str)
 	switch (*endp) {
 	case 'G':
 		result *= 1024;
+		/* fall through */
 	case 'M':
 		result *= 1024;
+		/* fall through */
 	case 'K':
 		result *= 1024;
 		if (endp[1] == 'i' && endp[2] == 'B')
-- 
GitLab


From c4de6d7e4319ffb52d6db3138057712749540191 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Fri, 7 Sep 2018 14:36:23 +0200
Subject: [PATCH 0102/1890] ubifs: Refactor create_default_filesystem()

create_default_filesystem() allocates memory for a node, writes that
node and frees the memory directly afterwards. With this patch we
allocate memory for all nodes at the beginning of the function and
free the memory at the end. This makes it easier to implement
authentication support since with authentication support we'll need
the contents of some nodes when creating other nodes.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/sb.c | 95 +++++++++++++++++++++++++--------------------------
 1 file changed, 47 insertions(+), 48 deletions(-)

diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index bf17f58908ff9..cf7ee2880c571 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -82,6 +82,7 @@ static int create_default_filesystem(struct ubifs_info *c)
 	int err, tmp, jnl_lebs, log_lebs, max_buds, main_lebs, main_first;
 	int lpt_lebs, lpt_first, orph_lebs, big_lpt, ino_waste, sup_flags = 0;
 	int min_leb_cnt = UBIFS_MIN_LEB_CNT;
+	int idx_node_size;
 	long long tmp64, main_bytes;
 	__le64 tmp_le64;
 	__le32 tmp_le32;
@@ -156,11 +157,19 @@ static int create_default_filesystem(struct ubifs_info *c)
 
 	main_first = c->leb_cnt - main_lebs;
 
+	sup = kzalloc(ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size), GFP_KERNEL);
+	mst = kzalloc(c->mst_node_alsz, GFP_KERNEL);
+	idx_node_size = ubifs_idx_node_sz(c, 1);
+	idx = kzalloc(ALIGN(tmp, c->min_io_size), GFP_KERNEL);
+	ino = kzalloc(ALIGN(UBIFS_INO_NODE_SZ, c->min_io_size), GFP_KERNEL);
+	cs = kzalloc(ALIGN(UBIFS_CS_NODE_SZ, c->min_io_size), GFP_KERNEL);
+
+	if (!sup || !mst || !idx || !ino || !cs) {
+		err = -ENOMEM;
+		goto out;
+	}
+
 	/* Create default superblock */
-	tmp = ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size);
-	sup = kzalloc(tmp, GFP_KERNEL);
-	if (!sup)
-		return -ENOMEM;
 
 	tmp64 = (long long)max_buds * c->leb_size;
 	if (big_lpt)
@@ -197,17 +206,9 @@ static int create_default_filesystem(struct ubifs_info *c)
 	sup->rp_size = cpu_to_le64(tmp64);
 	sup->ro_compat_version = cpu_to_le32(UBIFS_RO_COMPAT_VERSION);
 
-	err = ubifs_write_node(c, sup, UBIFS_SB_NODE_SZ, 0, 0);
-	kfree(sup);
-	if (err)
-		return err;
-
 	dbg_gen("default superblock created at LEB 0:0");
 
 	/* Create default master node */
-	mst = kzalloc(c->mst_node_alsz, GFP_KERNEL);
-	if (!mst)
-		return -ENOMEM;
 
 	mst->ch.node_type = UBIFS_MST_NODE;
 	mst->log_lnum     = cpu_to_le32(UBIFS_LOG_LNUM);
@@ -253,24 +254,9 @@ static int create_default_filesystem(struct ubifs_info *c)
 
 	mst->total_used = cpu_to_le64(UBIFS_INO_NODE_SZ);
 
-	err = ubifs_write_node(c, mst, UBIFS_MST_NODE_SZ, UBIFS_MST_LNUM, 0);
-	if (err) {
-		kfree(mst);
-		return err;
-	}
-	err = ubifs_write_node(c, mst, UBIFS_MST_NODE_SZ, UBIFS_MST_LNUM + 1,
-			       0);
-	kfree(mst);
-	if (err)
-		return err;
-
 	dbg_gen("default master node created at LEB %d:0", UBIFS_MST_LNUM);
 
 	/* Create the root indexing node */
-	tmp = ubifs_idx_node_sz(c, 1);
-	idx = kzalloc(ALIGN(tmp, c->min_io_size), GFP_KERNEL);
-	if (!idx)
-		return -ENOMEM;
 
 	c->key_fmt = UBIFS_SIMPLE_KEY_FMT;
 	c->key_hash = key_r5_hash;
@@ -282,19 +268,11 @@ static int create_default_filesystem(struct ubifs_info *c)
 	key_write_idx(c, &key, &br->key);
 	br->lnum = cpu_to_le32(main_first + DEFAULT_DATA_LEB);
 	br->len  = cpu_to_le32(UBIFS_INO_NODE_SZ);
-	err = ubifs_write_node(c, idx, tmp, main_first + DEFAULT_IDX_LEB, 0);
-	kfree(idx);
-	if (err)
-		return err;
 
 	dbg_gen("default root indexing node created LEB %d:0",
 		main_first + DEFAULT_IDX_LEB);
 
 	/* Create default root inode */
-	tmp = ALIGN(UBIFS_INO_NODE_SZ, c->min_io_size);
-	ino = kzalloc(tmp, GFP_KERNEL);
-	if (!ino)
-		return -ENOMEM;
 
 	ino_key_init_flash(c, &ino->key, UBIFS_ROOT_INO);
 	ino->ch.node_type = UBIFS_INO_NODE;
@@ -317,12 +295,6 @@ static int create_default_filesystem(struct ubifs_info *c)
 	/* Set compression enabled by default */
 	ino->flags = cpu_to_le32(UBIFS_COMPR_FL);
 
-	err = ubifs_write_node(c, ino, UBIFS_INO_NODE_SZ,
-			       main_first + DEFAULT_DATA_LEB, 0);
-	kfree(ino);
-	if (err)
-		return err;
-
 	dbg_gen("root inode created at LEB %d:0",
 		main_first + DEFAULT_DATA_LEB);
 
@@ -331,19 +303,46 @@ static int create_default_filesystem(struct ubifs_info *c)
 	 * always the case during normal file-system operation. Write a fake
 	 * commit start node to the log.
 	 */
-	tmp = ALIGN(UBIFS_CS_NODE_SZ, c->min_io_size);
-	cs = kzalloc(tmp, GFP_KERNEL);
-	if (!cs)
-		return -ENOMEM;
 
 	cs->ch.node_type = UBIFS_CS_NODE;
+
+	err = ubifs_write_node(c, sup, UBIFS_SB_NODE_SZ, 0, 0);
+	if (err)
+		goto out;
+
+	err = ubifs_write_node(c, mst, UBIFS_MST_NODE_SZ, UBIFS_MST_LNUM, 0);
+	if (err)
+		goto out;
+
+	err = ubifs_write_node(c, mst, UBIFS_MST_NODE_SZ, UBIFS_MST_LNUM + 1,
+			       0);
+	if (err)
+		goto out;
+
+	err = ubifs_write_node(c, idx, idx_node_size, main_first + DEFAULT_IDX_LEB, 0);
+	if (err)
+		goto out;
+
+	err = ubifs_write_node(c, ino, UBIFS_INO_NODE_SZ,
+			       main_first + DEFAULT_DATA_LEB, 0);
+	if (err)
+		goto out;
+
 	err = ubifs_write_node(c, cs, UBIFS_CS_NODE_SZ, UBIFS_LOG_LNUM, 0);
-	kfree(cs);
 	if (err)
-		return err;
+		goto out;
 
 	ubifs_msg(c, "default file-system created");
-	return 0;
+
+	err = 0;
+out:
+	kfree(sup);
+	kfree(mst);
+	kfree(idx);
+	kfree(ino);
+	kfree(cs);
+
+	return err;
 }
 
 /**
-- 
GitLab


From 545bc8f6b0c60eee789a91ac1f8c9a16467b2fc5 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Fri, 7 Sep 2018 14:36:24 +0200
Subject: [PATCH 0103/1890] ubifs: Pass ubifs_zbranch to try_read_node()

try_read_node() takes len, lnum and offs arguments which the caller all
extracts from the same struct ubifs_zbranch *. When adding authentication
support we would have to add a pointer to a hash to the arguments which
is also part of struct ubifs_zbranch. Pass the ubifs_zbranch * instead
so that we do not have to add another argument.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/tnc.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index bf416e5127431..8502c07c1e0a1 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -35,7 +35,7 @@
 #include "ubifs.h"
 
 static int try_read_node(const struct ubifs_info *c, void *buf, int type,
-			 int len, int lnum, int offs);
+			 struct ubifs_zbranch *zbr);
 static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key,
 			      struct ubifs_zbranch *zbr, void *node);
 
@@ -433,9 +433,7 @@ static int tnc_read_hashed_node(struct ubifs_info *c, struct ubifs_zbranch *zbr,
  * @c: UBIFS file-system description object
  * @buf: buffer to read to
  * @type: node type
- * @len: node length (not aligned)
- * @lnum: LEB number of node to read
- * @offs: offset of node to read
+ * @zbr: the zbranch describing the node to read
  *
  * This function tries to read a node of known type and length, checks it and
  * stores it in @buf. This function returns %1 if a node is present and %0 if
@@ -453,8 +451,11 @@ static int tnc_read_hashed_node(struct ubifs_info *c, struct ubifs_zbranch *zbr,
  * journal nodes may potentially be corrupted, so checking is required.
  */
 static int try_read_node(const struct ubifs_info *c, void *buf, int type,
-			 int len, int lnum, int offs)
+			 struct ubifs_zbranch *zbr)
 {
+	int len = zbr->len;
+	int lnum = zbr->lnum;
+	int offs = zbr->offs;
 	int err, node_len;
 	struct ubifs_ch *ch = buf;
 	uint32_t crc, node_crc;
@@ -507,8 +508,7 @@ static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key,
 
 	dbg_tnck(key, "LEB %d:%d, key ", zbr->lnum, zbr->offs);
 
-	ret = try_read_node(c, node, key_type(c, key), zbr->len, zbr->lnum,
-			    zbr->offs);
+	ret = try_read_node(c, node, key_type(c, key), zbr);
 	if (ret == 1) {
 		union ubifs_key node_key;
 		struct ubifs_dent_node *dent = node;
-- 
GitLab


From 22ceaa8c688d0c8a6fc6eb7ebf32c01914220d0a Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Fri, 7 Sep 2018 14:36:25 +0200
Subject: [PATCH 0104/1890] ubifs: Pass ubifs_zbranch to read_znode()

read_znode() takes len, lnum and offs arguments which the caller all
extracts from the same struct ubifs_zbranch *. When adding authentication
support we would have to add a pointer to a hash to the arguments which
is also part of struct ubifs_zbranch. Pass the ubifs_zbranch * instead
so that we do not have to add another argument.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/tnc_misc.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/fs/ubifs/tnc_misc.c b/fs/ubifs/tnc_misc.c
index d90ee01076a9e..6ce75999f2733 100644
--- a/fs/ubifs/tnc_misc.c
+++ b/fs/ubifs/tnc_misc.c
@@ -265,9 +265,7 @@ long ubifs_destroy_tnc_subtree(const struct ubifs_info *c,
 /**
  * read_znode - read an indexing node from flash and fill znode.
  * @c: UBIFS file-system description object
- * @lnum: LEB of the indexing node to read
- * @offs: node offset
- * @len: node length
+ * @zzbr: the zbranch describing the node to read
  * @znode: znode to read to
  *
  * This function reads an indexing node from the flash media and fills znode
@@ -276,9 +274,12 @@ long ubifs_destroy_tnc_subtree(const struct ubifs_info *c,
  * is wrong with it, this function prints complaint messages and returns
  * %-EINVAL.
  */
-static int read_znode(struct ubifs_info *c, int lnum, int offs, int len,
+static int read_znode(struct ubifs_info *c, struct ubifs_zbranch *zzbr,
 		      struct ubifs_znode *znode)
 {
+	int lnum = zzbr->lnum;
+	int offs = zzbr->offs;
+	int len = zzbr->len;
 	int i, err, type, cmp;
 	struct ubifs_idx_node *idx;
 
@@ -425,7 +426,7 @@ struct ubifs_znode *ubifs_load_znode(struct ubifs_info *c,
 	if (!znode)
 		return ERR_PTR(-ENOMEM);
 
-	err = read_znode(c, zbr->lnum, zbr->offs, zbr->len, znode);
+	err = read_znode(c, zbr, znode);
 	if (err)
 		goto out;
 
-- 
GitLab


From 0e26b6e2551e21df72c140e46819523e1b686009 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Fri, 7 Sep 2018 14:36:26 +0200
Subject: [PATCH 0105/1890] ubifs: Export pnode_lookup as ubifs_pnode_lookup

ubifs_lpt_lookup could be implemented using pnode_lookup. To make that
possible move pnode_lookup from lpt.c to lpt_commit.c. Rename it to
ubifs_pnode_lookup since it's now exported.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/lpt.c        | 32 ++++++++++++++++++++++++++++++++
 fs/ubifs/lpt_commit.c | 40 ++++------------------------------------
 fs/ubifs/ubifs.h      |  1 +
 3 files changed, 37 insertions(+), 36 deletions(-)

diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
index 31393370e3348..ed0c67fe7500b 100644
--- a/fs/ubifs/lpt.c
+++ b/fs/ubifs/lpt.c
@@ -1438,6 +1438,38 @@ struct ubifs_pnode *ubifs_get_pnode(struct ubifs_info *c,
 	return branch->pnode;
 }
 
+/**
+ * ubifs_pnode_lookup - lookup a pnode in the LPT.
+ * @c: UBIFS file-system description object
+ * @i: pnode number (0 to (main_lebs - 1) / UBIFS_LPT_FANOUT)
+ *
+ * This function returns a pointer to the pnode on success or a negative
+ * error code on failure.
+ */
+struct ubifs_pnode *ubifs_pnode_lookup(struct ubifs_info *c, int i)
+{
+	int err, h, iip, shft;
+	struct ubifs_nnode *nnode;
+
+	if (!c->nroot) {
+		err = ubifs_read_nnode(c, NULL, 0);
+		if (err)
+			return ERR_PTR(err);
+	}
+	i <<= UBIFS_LPT_FANOUT_SHIFT;
+	nnode = c->nroot;
+	shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT;
+	for (h = 1; h < c->lpt_hght; h++) {
+		iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
+		shft -= UBIFS_LPT_FANOUT_SHIFT;
+		nnode = ubifs_get_nnode(c, nnode, iip);
+		if (IS_ERR(nnode))
+			return ERR_CAST(nnode);
+	}
+	iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
+	return ubifs_get_pnode(c, nnode, iip);
+}
+
 /**
  * ubifs_lpt_lookup - lookup LEB properties in the LPT.
  * @c: UBIFS file-system description object
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index 7ce30994bbbac..62d6a87d4f5d0 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -618,38 +618,6 @@ static struct ubifs_pnode *next_pnode_to_dirty(struct ubifs_info *c,
 	return ubifs_get_pnode(c, nnode, iip);
 }
 
-/**
- * pnode_lookup - lookup a pnode in the LPT.
- * @c: UBIFS file-system description object
- * @i: pnode number (0 to (main_lebs - 1) / UBIFS_LPT_FANOUT))
- *
- * This function returns a pointer to the pnode on success or a negative
- * error code on failure.
- */
-static struct ubifs_pnode *pnode_lookup(struct ubifs_info *c, int i)
-{
-	int err, h, iip, shft;
-	struct ubifs_nnode *nnode;
-
-	if (!c->nroot) {
-		err = ubifs_read_nnode(c, NULL, 0);
-		if (err)
-			return ERR_PTR(err);
-	}
-	i <<= UBIFS_LPT_FANOUT_SHIFT;
-	nnode = c->nroot;
-	shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT;
-	for (h = 1; h < c->lpt_hght; h++) {
-		iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
-		shft -= UBIFS_LPT_FANOUT_SHIFT;
-		nnode = ubifs_get_nnode(c, nnode, iip);
-		if (IS_ERR(nnode))
-			return ERR_CAST(nnode);
-	}
-	iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
-	return ubifs_get_pnode(c, nnode, iip);
-}
-
 /**
  * add_pnode_dirt - add dirty space to LPT LEB properties.
  * @c: UBIFS file-system description object
@@ -702,7 +670,7 @@ static int make_tree_dirty(struct ubifs_info *c)
 {
 	struct ubifs_pnode *pnode;
 
-	pnode = pnode_lookup(c, 0);
+	pnode = ubifs_pnode_lookup(c, 0);
 	if (IS_ERR(pnode))
 		return PTR_ERR(pnode);
 
@@ -956,7 +924,7 @@ static int make_pnode_dirty(struct ubifs_info *c, int node_num, int lnum,
 	struct ubifs_pnode *pnode;
 	struct ubifs_nbranch *branch;
 
-	pnode = pnode_lookup(c, node_num);
+	pnode = ubifs_pnode_lookup(c, node_num);
 	if (IS_ERR(pnode))
 		return PTR_ERR(pnode);
 	branch = &pnode->parent->nbranch[pnode->iip];
@@ -1558,7 +1526,7 @@ static int dbg_is_pnode_dirty(struct ubifs_info *c, int lnum, int offs)
 		struct ubifs_nbranch *branch;
 
 		cond_resched();
-		pnode = pnode_lookup(c, i);
+		pnode = ubifs_pnode_lookup(c, i);
 		if (IS_ERR(pnode))
 			return PTR_ERR(pnode);
 		branch = &pnode->parent->nbranch[pnode->iip];
@@ -1710,7 +1678,7 @@ int dbg_check_ltab(struct ubifs_info *c)
 	for (i = 0; i < cnt; i++) {
 		struct ubifs_pnode *pnode;
 
-		pnode = pnode_lookup(c, i);
+		pnode = ubifs_pnode_lookup(c, i);
 		if (IS_ERR(pnode))
 			return PTR_ERR(pnode);
 		cond_resched();
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 4368cde476b0f..306cc7a4f7259 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1712,6 +1712,7 @@ struct ubifs_pnode *ubifs_get_pnode(struct ubifs_info *c,
 				    struct ubifs_nnode *parent, int iip);
 struct ubifs_nnode *ubifs_get_nnode(struct ubifs_info *c,
 				    struct ubifs_nnode *parent, int iip);
+struct ubifs_pnode *ubifs_pnode_lookup(struct ubifs_info *c, int i);
 int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip);
 void ubifs_add_lpt_dirt(struct ubifs_info *c, int lnum, int dirty);
 void ubifs_add_nnode_dirt(struct ubifs_info *c, struct ubifs_nnode *nnode);
-- 
GitLab


From e635cf8c3bbd7b9bd3e282156c6cc4f818b72ee9 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Fri, 7 Sep 2018 14:36:27 +0200
Subject: [PATCH 0106/1890] ubifs: Implement ubifs_lpt_lookup using
 ubifs_pnode_lookup

ubifs_lpt_lookup() starts by looking up the nth pnode in the LPT. We
already have this functionality in ubifs_pnode_lookup(). Use this
function rather than open coding its functionality.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/lpt.c | 20 ++------------------
 1 file changed, 2 insertions(+), 18 deletions(-)

diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
index ed0c67fe7500b..5f57af224b8f9 100644
--- a/fs/ubifs/lpt.c
+++ b/fs/ubifs/lpt.c
@@ -1480,27 +1480,11 @@ struct ubifs_pnode *ubifs_pnode_lookup(struct ubifs_info *c, int i)
  */
 struct ubifs_lprops *ubifs_lpt_lookup(struct ubifs_info *c, int lnum)
 {
-	int err, i, h, iip, shft;
-	struct ubifs_nnode *nnode;
+	int i, iip;
 	struct ubifs_pnode *pnode;
 
-	if (!c->nroot) {
-		err = ubifs_read_nnode(c, NULL, 0);
-		if (err)
-			return ERR_PTR(err);
-	}
-	nnode = c->nroot;
 	i = lnum - c->main_first;
-	shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT;
-	for (h = 1; h < c->lpt_hght; h++) {
-		iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
-		shft -= UBIFS_LPT_FANOUT_SHIFT;
-		nnode = ubifs_get_nnode(c, nnode, iip);
-		if (IS_ERR(nnode))
-			return ERR_CAST(nnode);
-	}
-	iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
-	pnode = ubifs_get_pnode(c, nnode, iip);
+	pnode = ubifs_pnode_lookup(c, i >> UBIFS_LPT_FANOUT_SHIFT);
 	if (IS_ERR(pnode))
 		return ERR_CAST(pnode);
 	iip = (i & (UBIFS_LPT_FANOUT - 1));
-- 
GitLab


From 83407437bbeae39551195861e51608c9638ffda4 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Fri, 7 Sep 2018 14:36:28 +0200
Subject: [PATCH 0107/1890] ubifs: Drop write_node

write_node() is used only once and can easily be replaced with calls
to ubifs_prepare_node()/write_head() which makes the code a bit shorter.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/journal.c | 39 +++++----------------------------------
 1 file changed, 5 insertions(+), 34 deletions(-)

diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index 802565a17733c..de7ac9e7accef 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -228,36 +228,6 @@ static int reserve_space(struct ubifs_info *c, int jhead, int len)
 	return err;
 }
 
-/**
- * write_node - write node to a journal head.
- * @c: UBIFS file-system description object
- * @jhead: journal head
- * @node: node to write
- * @len: node length
- * @lnum: LEB number written is returned here
- * @offs: offset written is returned here
- *
- * This function writes a node to reserved space of journal head @jhead.
- * Returns zero in case of success and a negative error code in case of
- * failure.
- */
-static int write_node(struct ubifs_info *c, int jhead, void *node, int len,
-		      int *lnum, int *offs)
-{
-	struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf;
-
-	ubifs_assert(c, jhead != GCHD);
-
-	*lnum = c->jheads[jhead].wbuf.lnum;
-	*offs = c->jheads[jhead].wbuf.offs + c->jheads[jhead].wbuf.used;
-
-	dbg_jnl("jhead %s, LEB %d:%d, len %d",
-		dbg_jhead(jhead), *lnum, *offs, len);
-	ubifs_prepare_node(c, node, len, 0);
-
-	return ubifs_wbuf_write_nolock(wbuf, node, len);
-}
-
 /**
  * write_head - write data to a journal head.
  * @c: UBIFS file-system description object
@@ -268,9 +238,9 @@ static int write_node(struct ubifs_info *c, int jhead, void *node, int len,
  * @offs: offset written is returned here
  * @sync: non-zero if the write-buffer has to by synchronized
  *
- * This function is the same as 'write_node()' but it does not assume the
- * buffer it is writing is a node, so it does not prepare it (which means
- * initializing common header and calculating CRC).
+ * This function writes data to the reserved space of journal head @jhead.
+ * Returns zero in case of success and a negative error code in case of
+ * failure.
  */
 static int write_head(struct ubifs_info *c, int jhead, void *buf, int len,
 		      int *lnum, int *offs, int sync)
@@ -764,7 +734,8 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
 	if (err)
 		goto out_free;
 
-	err = write_node(c, DATAHD, data, dlen, &lnum, &offs);
+	ubifs_prepare_node(c, data, dlen, 0);
+	err = write_head(c, DATAHD, data, dlen, &lnum, &offs, 0);
 	if (err)
 		goto out_release;
 	ubifs_wbuf_add_ino_nolock(&c->jheads[DATAHD].wbuf, key_inum(c, key));
-- 
GitLab


From fd6150051becd3a9f8039046e3af91fd9ef01e57 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Fri, 7 Sep 2018 14:36:29 +0200
Subject: [PATCH 0108/1890] ubifs: Store read superblock node

The superblock node is read/modified/written several times throughout
the UBIFS code. Instead of reading it from the device each time just
keep a copy in memory and write back the modified copy when necessary.
This patch helps for authentication support, here we not only have to
read the superblock node, but also have to authenticate it, which
is easier if we do it once during initialization.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/sb.c    | 19 +++++--------------
 fs/ubifs/super.c |  8 +-------
 fs/ubifs/ubifs.h |  3 ++-
 3 files changed, 8 insertions(+), 22 deletions(-)

diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index cf7ee2880c571..7e08c81433165 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -497,7 +497,7 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup)
  * code. Note, the user of this function is responsible of kfree()'ing the
  * returned superblock buffer.
  */
-struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c)
+static struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c)
 {
 	struct ubifs_sb_node *sup;
 	int err;
@@ -554,6 +554,8 @@ int ubifs_read_superblock(struct ubifs_info *c)
 	if (IS_ERR(sup))
 		return PTR_ERR(sup);
 
+	c->sup_node = sup;
+
 	c->fmt_version = le32_to_cpu(sup->fmt_version);
 	c->ro_compat_version = le32_to_cpu(sup->ro_compat_version);
 
@@ -685,7 +687,6 @@ int ubifs_read_superblock(struct ubifs_info *c)
 
 	err = validate_sb(c, sup);
 out:
-	kfree(sup);
 	return err;
 }
 
@@ -814,7 +815,7 @@ static int fixup_free_space(struct ubifs_info *c)
 int ubifs_fixup_free_space(struct ubifs_info *c)
 {
 	int err;
-	struct ubifs_sb_node *sup;
+	struct ubifs_sb_node *sup = c->sup_node;
 
 	ubifs_assert(c, c->space_fixup);
 	ubifs_assert(c, !c->ro_mount);
@@ -825,16 +826,11 @@ int ubifs_fixup_free_space(struct ubifs_info *c)
 	if (err)
 		return err;
 
-	sup = ubifs_read_sb_node(c);
-	if (IS_ERR(sup))
-		return PTR_ERR(sup);
-
 	/* Free-space fixup is no longer required */
 	c->space_fixup = 0;
 	sup->flags &= cpu_to_le32(~UBIFS_FLG_SPACE_FIXUP);
 
 	err = ubifs_write_sb_node(c, sup);
-	kfree(sup);
 	if (err)
 		return err;
 
@@ -845,7 +841,7 @@ int ubifs_fixup_free_space(struct ubifs_info *c)
 int ubifs_enable_encryption(struct ubifs_info *c)
 {
 	int err;
-	struct ubifs_sb_node *sup;
+	struct ubifs_sb_node *sup = c->sup_node;
 
 	if (c->encrypted)
 		return 0;
@@ -858,16 +854,11 @@ int ubifs_enable_encryption(struct ubifs_info *c)
 		return -EINVAL;
 	}
 
-	sup = ubifs_read_sb_node(c);
-	if (IS_ERR(sup))
-		return PTR_ERR(sup);
-
 	sup->flags |= cpu_to_le32(UBIFS_FLG_ENCRYPTION);
 
 	err = ubifs_write_sb_node(c, sup);
 	if (!err)
 		c->encrypted = 1;
-	kfree(sup);
 
 	return err;
 }
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index fec62e9dfbe6a..70a64e00f0a83 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1605,16 +1605,10 @@ static int ubifs_remount_rw(struct ubifs_info *c)
 		goto out;
 
 	if (c->old_leb_cnt != c->leb_cnt) {
-		struct ubifs_sb_node *sup;
+		struct ubifs_sb_node *sup = c->sup_node;
 
-		sup = ubifs_read_sb_node(c);
-		if (IS_ERR(sup)) {
-			err = PTR_ERR(sup);
-			goto out;
-		}
 		sup->leb_cnt = cpu_to_le32(c->leb_cnt);
 		err = ubifs_write_sb_node(c, sup);
-		kfree(sup);
 		if (err)
 			goto out;
 	}
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 306cc7a4f7259..93e1c34c097f3 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -983,6 +983,7 @@ struct ubifs_debug_info;
  * struct ubifs_info - UBIFS file-system description data structure
  * (per-superblock).
  * @vfs_sb: VFS @struct super_block object
+ * @sup_node: The super block node as read from the device
  *
  * @highest_inum: highest used inode number
  * @max_sqnum: current global sequence number
@@ -1230,6 +1231,7 @@ struct ubifs_debug_info;
  */
 struct ubifs_info {
 	struct super_block *vfs_sb;
+	struct ubifs_sb_node *sup_node;
 
 	ino_t highest_inum;
 	unsigned long long max_sqnum;
@@ -1664,7 +1666,6 @@ int ubifs_write_master(struct ubifs_info *c);
 
 /* sb.c */
 int ubifs_read_superblock(struct ubifs_info *c);
-struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c);
 int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup);
 int ubifs_fixup_free_space(struct ubifs_info *c);
 int ubifs_enable_encryption(struct ubifs_info *c);
-- 
GitLab


From 5125cfdff13a4da4c34a053cd67e99083aece028 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Fri, 7 Sep 2018 14:36:30 +0200
Subject: [PATCH 0109/1890] ubifs: Format changes for authentication support

This patch adds the changes to the on disk format needed for
authentication support. We'll add:

* a HMAC covering super block node
* a HMAC covering the master node
* a hash over the root index node to the master node
* a hash over the LPT to the master node
* a flag to the filesystem flag indicating the filesystem is
  authenticated
* an authentication node necessary to authenticate the nodes written
  to the journal heads while they are written.
* a HMAC of a well known message to the super block node to be able
  to check if the correct key is provided

And finally, not visible in this patch, nevertheless explained here:

* hashes over the referenced child nodes in each branch of a index node

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/debug.c       |  6 ++++++
 fs/ubifs/super.c       |  1 +
 fs/ubifs/ubifs-media.h | 46 +++++++++++++++++++++++++++++++++++++++---
 3 files changed, 50 insertions(+), 3 deletions(-)

diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 564e330d05b14..c49ff50fdceb1 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -165,6 +165,8 @@ const char *dbg_ntype(int type)
 		return "commit start node";
 	case UBIFS_ORPH_NODE:
 		return "orphan node";
+	case UBIFS_AUTH_NODE:
+		return "auth node";
 	default:
 		return "unknown node";
 	}
@@ -542,6 +544,10 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node)
 			       (unsigned long long)le64_to_cpu(orph->inos[i]));
 		break;
 	}
+	case UBIFS_AUTH_NODE:
+	{
+		break;
+	}
 	default:
 		pr_err("node type %d was not recognized\n",
 		       (int)ch->node_type);
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 70a64e00f0a83..0194e3c0853f6 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -579,6 +579,7 @@ static int init_constants_early(struct ubifs_info *c)
 	c->ranges[UBIFS_REF_NODE].len  = UBIFS_REF_NODE_SZ;
 	c->ranges[UBIFS_TRUN_NODE].len = UBIFS_TRUN_NODE_SZ;
 	c->ranges[UBIFS_CS_NODE].len   = UBIFS_CS_NODE_SZ;
+	c->ranges[UBIFS_AUTH_NODE].len = UBIFS_AUTH_NODE_SZ;
 
 	c->ranges[UBIFS_INO_NODE].min_len  = UBIFS_INO_NODE_SZ;
 	c->ranges[UBIFS_INO_NODE].max_len  = UBIFS_MAX_INO_NODE_SZ;
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h
index e8c23c9d4f4a7..8b7c1844014ff 100644
--- a/fs/ubifs/ubifs-media.h
+++ b/fs/ubifs/ubifs-media.h
@@ -286,6 +286,7 @@ enum {
 #define UBIFS_IDX_NODE_SZ  sizeof(struct ubifs_idx_node)
 #define UBIFS_CS_NODE_SZ   sizeof(struct ubifs_cs_node)
 #define UBIFS_ORPH_NODE_SZ sizeof(struct ubifs_orph_node)
+#define UBIFS_AUTH_NODE_SZ sizeof(struct ubifs_auth_node)
 /* Extended attribute entry nodes are identical to directory entry nodes */
 #define UBIFS_XENT_NODE_SZ UBIFS_DENT_NODE_SZ
 /* Only this does not have to be multiple of 8 bytes */
@@ -300,6 +301,12 @@ enum {
 /* The largest UBIFS node */
 #define UBIFS_MAX_NODE_SZ UBIFS_MAX_INO_NODE_SZ
 
+/* The maxmimum size of a hash, enough for sha512 */
+#define UBIFS_MAX_HASH_LEN 64
+
+/* The maxmimum size of a hmac, enough for hmac(sha512) */
+#define UBIFS_MAX_HMAC_LEN 64
+
 /*
  * xattr name of UBIFS encryption context, we don't use a prefix
  * nor a long name to not waste space on the flash.
@@ -365,6 +372,7 @@ enum {
  * UBIFS_IDX_NODE: index node
  * UBIFS_CS_NODE: commit start node
  * UBIFS_ORPH_NODE: orphan node
+ * UBIFS_AUTH_NODE: authentication node
  * UBIFS_NODE_TYPES_CNT: count of supported node types
  *
  * Note, we index arrays by these numbers, so keep them low and contiguous.
@@ -384,6 +392,7 @@ enum {
 	UBIFS_IDX_NODE,
 	UBIFS_CS_NODE,
 	UBIFS_ORPH_NODE,
+	UBIFS_AUTH_NODE,
 	UBIFS_NODE_TYPES_CNT,
 };
 
@@ -421,15 +430,19 @@ enum {
  * UBIFS_FLG_DOUBLE_HASH: store a 32bit cookie in directory entry nodes to
  *			  support 64bit cookies for lookups by hash
  * UBIFS_FLG_ENCRYPTION: this filesystem contains encrypted files
+ * UBIFS_FLG_AUTHENTICATION: this filesystem contains hashes for authentication
  */
 enum {
 	UBIFS_FLG_BIGLPT = 0x02,
 	UBIFS_FLG_SPACE_FIXUP = 0x04,
 	UBIFS_FLG_DOUBLE_HASH = 0x08,
 	UBIFS_FLG_ENCRYPTION = 0x10,
+	UBIFS_FLG_AUTHENTICATION = 0x20,
 };
 
-#define UBIFS_FLG_MASK (UBIFS_FLG_BIGLPT|UBIFS_FLG_SPACE_FIXUP|UBIFS_FLG_DOUBLE_HASH|UBIFS_FLG_ENCRYPTION)
+#define UBIFS_FLG_MASK (UBIFS_FLG_BIGLPT | UBIFS_FLG_SPACE_FIXUP | \
+		UBIFS_FLG_DOUBLE_HASH | UBIFS_FLG_ENCRYPTION | \
+		UBIFS_FLG_AUTHENTICATION)
 
 /**
  * struct ubifs_ch - common header node.
@@ -633,6 +646,10 @@ struct ubifs_pad_node {
  * @time_gran: time granularity in nanoseconds
  * @uuid: UUID generated when the file system image was created
  * @ro_compat_version: UBIFS R/O compatibility version
+ * @hmac: HMAC to authenticate the superblock node
+ * @hmac_wkm: HMAC of a well known message (the string "UBIFS") as a convenience
+ *            to the user to check if the correct key is passed.
+ * @hash_algo: The hash algo used for this filesystem (one of enum hash_algo)
  */
 struct ubifs_sb_node {
 	struct ubifs_ch ch;
@@ -660,7 +677,10 @@ struct ubifs_sb_node {
 	__le32 time_gran;
 	__u8 uuid[16];
 	__le32 ro_compat_version;
-	__u8 padding2[3968];
+	__u8 hmac[UBIFS_MAX_HMAC_LEN];
+	__u8 hmac_wkm[UBIFS_MAX_HMAC_LEN];
+	__le16 hash_algo;
+	__u8 padding2[3838];
 } __packed;
 
 /**
@@ -695,6 +715,9 @@ struct ubifs_sb_node {
  * @empty_lebs: number of empty logical eraseblocks
  * @idx_lebs: number of indexing logical eraseblocks
  * @leb_cnt: count of LEBs used by file-system
+ * @hash_root_idx: the hash of the root index node
+ * @hash_lpt: the hash of the LPT
+ * @hmac: HMAC to authenticate the master node
  * @padding: reserved for future, zeroes
  */
 struct ubifs_mst_node {
@@ -727,7 +750,10 @@ struct ubifs_mst_node {
 	__le32 empty_lebs;
 	__le32 idx_lebs;
 	__le32 leb_cnt;
-	__u8 padding[344];
+	__u8 hash_root_idx[UBIFS_MAX_HASH_LEN];
+	__u8 hash_lpt[UBIFS_MAX_HASH_LEN];
+	__u8 hmac[UBIFS_MAX_HMAC_LEN];
+	__u8 padding[152];
 } __packed;
 
 /**
@@ -746,12 +772,26 @@ struct ubifs_ref_node {
 	__u8 padding[28];
 } __packed;
 
+/**
+ * struct ubifs_auth_node - node for authenticating other nodes
+ * @ch: common header
+ * @hmac: The HMAC
+ */
+struct ubifs_auth_node {
+	struct ubifs_ch ch;
+	__u8 hmac[];
+} __packed;
+
 /**
  * struct ubifs_branch - key/reference/length branch
  * @lnum: LEB number of the target node
  * @offs: offset within @lnum
  * @len: target node length
  * @key: key
+ *
+ * In an authenticated UBIFS we have the hash of the referenced node after @key.
+ * This can't be added to the struct type definition because @key is a
+ * dynamically sized element already.
  */
 struct ubifs_branch {
 	__le32 lnum;
-- 
GitLab


From dead97266f1034d1cabd9a50641163d909559816 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Fri, 7 Sep 2018 14:36:31 +0200
Subject: [PATCH 0110/1890] ubifs: Add separate functions to init/crc a node

When adding authentication support we will embed a HMAC into some
nodes. To prepare these nodes we have to first initialize the nodes,
then add a HMAC and finally add a CRC. To accomplish this add separate
ubifs_init_node/ubifs_crc_node functions.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/io.c    | 42 +++++++++++++++++++++++++++---------------
 fs/ubifs/ubifs.h |  2 ++
 2 files changed, 29 insertions(+), 15 deletions(-)

diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index 099bec94b8207..4bd61fe146e06 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -365,20 +365,8 @@ static unsigned long long next_sqnum(struct ubifs_info *c)
 	return sqnum;
 }
 
-/**
- * ubifs_prepare_node - prepare node to be written to flash.
- * @c: UBIFS file-system description object
- * @node: the node to pad
- * @len: node length
- * @pad: if the buffer has to be padded
- *
- * This function prepares node at @node to be written to the media - it
- * calculates node CRC, fills the common header, and adds proper padding up to
- * the next minimum I/O unit if @pad is not zero.
- */
-void ubifs_prepare_node(struct ubifs_info *c, void *node, int len, int pad)
+void ubifs_init_node(struct ubifs_info *c, void *node, int len, int pad)
 {
-	uint32_t crc;
 	struct ubifs_ch *ch = node;
 	unsigned long long sqnum = next_sqnum(c);
 
@@ -389,8 +377,6 @@ void ubifs_prepare_node(struct ubifs_info *c, void *node, int len, int pad)
 	ch->group_type = UBIFS_NO_NODE_GROUP;
 	ch->sqnum = cpu_to_le64(sqnum);
 	ch->padding[0] = ch->padding[1] = 0;
-	crc = crc32(UBIFS_CRC32_INIT, node + 8, len - 8);
-	ch->crc = cpu_to_le32(crc);
 
 	if (pad) {
 		len = ALIGN(len, 8);
@@ -399,6 +385,32 @@ void ubifs_prepare_node(struct ubifs_info *c, void *node, int len, int pad)
 	}
 }
 
+void ubifs_crc_node(struct ubifs_info *c, void *node, int len)
+{
+	struct ubifs_ch *ch = node;
+	uint32_t crc;
+
+	crc = crc32(UBIFS_CRC32_INIT, node + 8, len - 8);
+	ch->crc = cpu_to_le32(crc);
+}
+
+/**
+ * ubifs_prepare_node - prepare node to be written to flash.
+ * @c: UBIFS file-system description object
+ * @node: the node to pad
+ * @len: node length
+ * @pad: if the buffer has to be padded
+ *
+ * This function prepares node at @node to be written to the media - it
+ * calculates node CRC, fills the common header, and adds proper padding up to
+ * the next minimum I/O unit if @pad is not zero.
+ */
+void ubifs_prepare_node(struct ubifs_info *c, void *node, int len, int pad)
+{
+	ubifs_init_node(c, node, len, pad);
+	ubifs_crc_node(c, node, len);
+}
+
 /**
  * ubifs_prep_grp_node - prepare node of a group to be written to flash.
  * @c: UBIFS file-system description object
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 93e1c34c097f3..0422865661b11 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1494,6 +1494,8 @@ int ubifs_write_node(struct ubifs_info *c, void *node, int len, int lnum,
 		     int offs);
 int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
 		     int offs, int quiet, int must_chk_crc);
+void ubifs_init_node(struct ubifs_info *c, void *buf, int len, int pad);
+void ubifs_crc_node(struct ubifs_info *c, void *buf, int len);
 void ubifs_prepare_node(struct ubifs_info *c, void *buf, int len, int pad);
 void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last);
 int ubifs_io_init(struct ubifs_info *c);
-- 
GitLab


From 49525e5eecca5e1b4a83ac217868e8d8b843539f Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Fri, 7 Sep 2018 14:36:32 +0200
Subject: [PATCH 0111/1890] ubifs: Add helper functions for authentication
 support

This patch adds the various helper functions needed for authentication
support. We need functions to hash nodes, to embed HMACs into a node and
to compare hashes and HMACs. Most functions first check if this
filesystem is authenticated and bail out early if not, which makes the
functions safe to be called with disabled authentication.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/Kconfig  |   1 +
 fs/ubifs/Makefile |   1 +
 fs/ubifs/auth.c   | 502 ++++++++++++++++++++++++++++++++++++++++++++++
 fs/ubifs/ubifs.h  | 218 ++++++++++++++++++++
 4 files changed, 722 insertions(+)
 create mode 100644 fs/ubifs/auth.c

diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig
index bbc78549be4cc..853c77579b4e9 100644
--- a/fs/ubifs/Kconfig
+++ b/fs/ubifs/Kconfig
@@ -7,6 +7,7 @@ config UBIFS_FS
 	select CRYPTO if UBIFS_FS_ZLIB
 	select CRYPTO_LZO if UBIFS_FS_LZO
 	select CRYPTO_DEFLATE if UBIFS_FS_ZLIB
+	select CRYPTO_HASH_INFO
 	depends on MTD_UBI
 	help
 	  UBIFS is a file system for flash devices which works on top of UBI.
diff --git a/fs/ubifs/Makefile b/fs/ubifs/Makefile
index 6197d7e539e42..5f838319c8d53 100644
--- a/fs/ubifs/Makefile
+++ b/fs/ubifs/Makefile
@@ -8,3 +8,4 @@ ubifs-y += recovery.o ioctl.o lpt_commit.o tnc_misc.o debug.o
 ubifs-y += misc.o
 ubifs-$(CONFIG_UBIFS_FS_ENCRYPTION) += crypto.o
 ubifs-$(CONFIG_UBIFS_FS_XATTR) += xattr.o
+ubifs-$(CONFIG_UBIFS_FS_AUTHENTICATION) += auth.o
diff --git a/fs/ubifs/auth.c b/fs/ubifs/auth.c
new file mode 100644
index 0000000000000..124e965a28b30
--- /dev/null
+++ b/fs/ubifs/auth.c
@@ -0,0 +1,502 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This file is part of UBIFS.
+ *
+ * Copyright (C) 2018 Pengutronix, Sascha Hauer <s.hauer@pengutronix.de>
+ */
+
+/*
+ * This file implements various helper functions for UBIFS authentication support
+ */
+
+#include <linux/crypto.h>
+#include <crypto/hash.h>
+#include <crypto/sha.h>
+#include <crypto/algapi.h>
+#include <keys/user-type.h>
+
+#include "ubifs.h"
+
+/**
+ * ubifs_node_calc_hash - calculate the hash of a UBIFS node
+ * @c: UBIFS file-system description object
+ * @node: the node to calculate a hash for
+ * @hash: the returned hash
+ *
+ * Returns 0 for success or a negative error code otherwise.
+ */
+int __ubifs_node_calc_hash(const struct ubifs_info *c, const void *node,
+			    u8 *hash)
+{
+	const struct ubifs_ch *ch = node;
+	SHASH_DESC_ON_STACK(shash, c->hash_tfm);
+	int err;
+
+	shash->tfm = c->hash_tfm;
+	shash->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	err = crypto_shash_digest(shash, node, le32_to_cpu(ch->len), hash);
+	if (err < 0)
+		return err;
+	return 0;
+}
+
+/**
+ * ubifs_hash_calc_hmac - calculate a HMAC from a hash
+ * @c: UBIFS file-system description object
+ * @hash: the node to calculate a HMAC for
+ * @hmac: the returned HMAC
+ *
+ * Returns 0 for success or a negative error code otherwise.
+ */
+static int ubifs_hash_calc_hmac(const struct ubifs_info *c, const u8 *hash,
+				 u8 *hmac)
+{
+	SHASH_DESC_ON_STACK(shash, c->hmac_tfm);
+	int err;
+
+	shash->tfm = c->hmac_tfm;
+	shash->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	err = crypto_shash_digest(shash, hash, c->hash_len, hmac);
+	if (err < 0)
+		return err;
+	return 0;
+}
+
+/**
+ * ubifs_prepare_auth_node - Prepare an authentication node
+ * @c: UBIFS file-system description object
+ * @node: the node to calculate a hash for
+ * @hash: input hash of previous nodes
+ *
+ * This function prepares an authentication node for writing onto flash.
+ * It creates a HMAC from the given input hash and writes it to the node.
+ *
+ * Returns 0 for success or a negative error code otherwise.
+ */
+int ubifs_prepare_auth_node(struct ubifs_info *c, void *node,
+			     struct shash_desc *inhash)
+{
+	SHASH_DESC_ON_STACK(hash_desc, c->hash_tfm);
+	struct ubifs_auth_node *auth = node;
+	u8 *hash;
+	int err;
+
+	hash = kmalloc(crypto_shash_descsize(c->hash_tfm), GFP_NOFS);
+	if (!hash)
+		return -ENOMEM;
+
+	hash_desc->tfm = c->hash_tfm;
+	hash_desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+	ubifs_shash_copy_state(c, inhash, hash_desc);
+
+	err = crypto_shash_final(hash_desc, hash);
+	if (err)
+		goto out;
+
+	err = ubifs_hash_calc_hmac(c, hash, auth->hmac);
+	if (err)
+		goto out;
+
+	auth->ch.node_type = UBIFS_AUTH_NODE;
+	ubifs_prepare_node(c, auth, ubifs_auth_node_sz(c), 0);
+
+	err = 0;
+out:
+	kfree(hash);
+
+	return err;
+}
+
+static struct shash_desc *ubifs_get_desc(const struct ubifs_info *c,
+					 struct crypto_shash *tfm)
+{
+	struct shash_desc *desc;
+	int err;
+
+	if (!ubifs_authenticated(c))
+		return NULL;
+
+	desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(tfm), GFP_KERNEL);
+	if (!desc)
+		return ERR_PTR(-ENOMEM);
+
+	desc->tfm = tfm;
+	desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	err = crypto_shash_init(desc);
+	if (err) {
+		kfree(desc);
+		return ERR_PTR(err);
+	}
+
+	return desc;
+}
+
+/**
+ * __ubifs_hash_get_desc - get a descriptor suitable for hashing a node
+ * @c: UBIFS file-system description object
+ *
+ * This function returns a descriptor suitable for hashing a node. Free after use
+ * with kfree.
+ */
+struct shash_desc *__ubifs_hash_get_desc(const struct ubifs_info *c)
+{
+	return ubifs_get_desc(c, c->hash_tfm);
+}
+
+/**
+ * __ubifs_shash_final - finalize shash
+ * @c: UBIFS file-system description object
+ * @desc: the descriptor
+ * @out: the output hash
+ *
+ * Simple wrapper around crypto_shash_final(), safe to be called with
+ * disabled authentication.
+ */
+int __ubifs_shash_final(const struct ubifs_info *c, struct shash_desc *desc,
+			u8 *out)
+{
+	if (ubifs_authenticated(c))
+		return crypto_shash_final(desc, out);
+
+	return 0;
+}
+
+/**
+ * ubifs_bad_hash - Report hash mismatches
+ * @c: UBIFS file-system description object
+ * @node: the node
+ * @hash: the expected hash
+ * @lnum: the LEB @node was read from
+ * @offs: offset in LEB @node was read from
+ *
+ * This function reports a hash mismatch when a node has a different hash than
+ * expected.
+ */
+void ubifs_bad_hash(const struct ubifs_info *c, const void *node, const u8 *hash,
+		    int lnum, int offs)
+{
+	int len = min(c->hash_len, 20);
+	int cropped = len != c->hash_len;
+	const char *cont = cropped ? "..." : "";
+
+	u8 calc[UBIFS_HASH_ARR_SZ];
+
+	__ubifs_node_calc_hash(c, node, calc);
+
+	ubifs_err(c, "hash mismatch on node at LEB %d:%d", lnum, offs);
+	ubifs_err(c, "hash expected:   %*ph%s", len, hash, cont);
+	ubifs_err(c, "hash calculated: %*ph%s", len, calc, cont);
+}
+
+/**
+ * __ubifs_node_check_hash - check the hash of a node against given hash
+ * @c: UBIFS file-system description object
+ * @node: the node
+ * @expected: the expected hash
+ *
+ * This function calculates a hash over a node and compares it to the given hash.
+ * Returns 0 if both hashes are equal or authentication is disabled, otherwise a
+ * negative error code is returned.
+ */
+int __ubifs_node_check_hash(const struct ubifs_info *c, const void *node,
+			    const u8 *expected)
+{
+	u8 calc[UBIFS_HASH_ARR_SZ];
+	int err;
+
+	err = __ubifs_node_calc_hash(c, node, calc);
+	if (err)
+		return err;
+
+	if (ubifs_check_hash(c, expected, calc))
+		return -EPERM;
+
+	return 0;
+}
+
+/**
+ * ubifs_init_authentication - initialize UBIFS authentication support
+ * @c: UBIFS file-system description object
+ *
+ * This function returns 0 for success or a negative error code otherwise.
+ */
+int ubifs_init_authentication(struct ubifs_info *c)
+{
+	struct key *keyring_key;
+	const struct user_key_payload *ukp;
+	int err;
+	char hmac_name[CRYPTO_MAX_ALG_NAME];
+
+	if (!c->auth_hash_name) {
+		ubifs_err(c, "authentication hash name needed with authentication");
+		return -EINVAL;
+	}
+
+	c->auth_hash_algo = match_string(hash_algo_name, HASH_ALGO__LAST,
+					 c->auth_hash_name);
+	if ((int)c->auth_hash_algo < 0) {
+		ubifs_err(c, "Unknown hash algo %s specified",
+			  c->auth_hash_name);
+		return -EINVAL;
+	}
+
+	snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)",
+		 c->auth_hash_name);
+
+	keyring_key = request_key(&key_type_logon, c->auth_key_name, NULL);
+
+	if (IS_ERR(keyring_key)) {
+		ubifs_err(c, "Failed to request key: %ld",
+			  PTR_ERR(keyring_key));
+		return PTR_ERR(keyring_key);
+	}
+
+	down_read(&keyring_key->sem);
+
+	if (keyring_key->type != &key_type_logon) {
+		ubifs_err(c, "key type must be logon");
+		err = -ENOKEY;
+		goto out;
+	}
+
+	ukp = user_key_payload_locked(keyring_key);
+	if (!ukp) {
+		/* key was revoked before we acquired its semaphore */
+		err = -EKEYREVOKED;
+		goto out;
+	}
+
+	c->hash_tfm = crypto_alloc_shash(c->auth_hash_name, 0,
+					 CRYPTO_ALG_ASYNC);
+	if (IS_ERR(c->hash_tfm)) {
+		err = PTR_ERR(c->hash_tfm);
+		ubifs_err(c, "Can not allocate %s: %d",
+			  c->auth_hash_name, err);
+		goto out;
+	}
+
+	c->hash_len = crypto_shash_digestsize(c->hash_tfm);
+	if (c->hash_len > UBIFS_HASH_ARR_SZ) {
+		ubifs_err(c, "hash %s is bigger than maximum allowed hash size (%d > %d)",
+			  c->auth_hash_name, c->hash_len, UBIFS_HASH_ARR_SZ);
+		err = -EINVAL;
+		goto out_free_hash;
+	}
+
+	c->hmac_tfm = crypto_alloc_shash(hmac_name, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(c->hmac_tfm)) {
+		err = PTR_ERR(c->hmac_tfm);
+		ubifs_err(c, "Can not allocate %s: %d", hmac_name, err);
+		goto out_free_hash;
+	}
+
+	c->hmac_desc_len = crypto_shash_digestsize(c->hmac_tfm);
+	if (c->hmac_desc_len > UBIFS_HMAC_ARR_SZ) {
+		ubifs_err(c, "hmac %s is bigger than maximum allowed hmac size (%d > %d)",
+			  hmac_name, c->hmac_desc_len, UBIFS_HMAC_ARR_SZ);
+		err = -EINVAL;
+		goto out_free_hash;
+	}
+
+	err = crypto_shash_setkey(c->hmac_tfm, ukp->data, ukp->datalen);
+	if (err)
+		goto out_free_hmac;
+
+	c->authenticated = true;
+
+	c->log_hash = ubifs_hash_get_desc(c);
+	if (IS_ERR(c->log_hash))
+		goto out_free_hmac;
+
+	err = 0;
+
+out_free_hmac:
+	if (err)
+		crypto_free_shash(c->hmac_tfm);
+out_free_hash:
+	if (err)
+		crypto_free_shash(c->hash_tfm);
+out:
+	up_read(&keyring_key->sem);
+	key_put(keyring_key);
+
+	return err;
+}
+
+/**
+ * __ubifs_exit_authentication - release resource
+ * @c: UBIFS file-system description object
+ *
+ * This function releases the authentication related resources.
+ */
+void __ubifs_exit_authentication(struct ubifs_info *c)
+{
+	if (!ubifs_authenticated(c))
+		return;
+
+	crypto_free_shash(c->hmac_tfm);
+	crypto_free_shash(c->hash_tfm);
+	kfree(c->log_hash);
+}
+
+/**
+ * ubifs_node_calc_hmac - calculate the HMAC of a UBIFS node
+ * @c: UBIFS file-system description object
+ * @node: the node to insert a HMAC into.
+ * @len: the length of the node
+ * @ofs_hmac: the offset in the node where the HMAC is inserted
+ * @hmac: returned HMAC
+ *
+ * This function calculates a HMAC of a UBIFS node. The HMAC is expected to be
+ * embedded into the node, so this area is not covered by the HMAC. Also not
+ * covered is the UBIFS_NODE_MAGIC and the CRC of the node.
+ */
+static int ubifs_node_calc_hmac(const struct ubifs_info *c, const void *node,
+				int len, int ofs_hmac, void *hmac)
+{
+	SHASH_DESC_ON_STACK(shash, c->hmac_tfm);
+	int hmac_len = c->hmac_desc_len;
+	int err;
+
+	ubifs_assert(c, ofs_hmac > 8);
+	ubifs_assert(c, ofs_hmac + hmac_len < len);
+
+	shash->tfm = c->hmac_tfm;
+	shash->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	err = crypto_shash_init(shash);
+	if (err)
+		return err;
+
+	/* behind common node header CRC up to HMAC begin */
+	err = crypto_shash_update(shash, node + 8, ofs_hmac - 8);
+	if (err < 0)
+		return err;
+
+	/* behind HMAC, if any */
+	if (len - ofs_hmac - hmac_len > 0) {
+		err = crypto_shash_update(shash, node + ofs_hmac + hmac_len,
+			    len - ofs_hmac - hmac_len);
+		if (err < 0)
+			return err;
+	}
+
+	return crypto_shash_final(shash, hmac);
+}
+
+/**
+ * __ubifs_node_insert_hmac - insert a HMAC into a UBIFS node
+ * @c: UBIFS file-system description object
+ * @node: the node to insert a HMAC into.
+ * @len: the length of the node
+ * @ofs_hmac: the offset in the node where the HMAC is inserted
+ *
+ * This function inserts a HMAC at offset @ofs_hmac into the node given in
+ * @node.
+ *
+ * This function returns 0 for success or a negative error code otherwise.
+ */
+int __ubifs_node_insert_hmac(const struct ubifs_info *c, void *node, int len,
+			    int ofs_hmac)
+{
+	return ubifs_node_calc_hmac(c, node, len, ofs_hmac, node + ofs_hmac);
+}
+
+/**
+ * __ubifs_node_verify_hmac - verify the HMAC of UBIFS node
+ * @c: UBIFS file-system description object
+ * @node: the node to insert a HMAC into.
+ * @len: the length of the node
+ * @ofs_hmac: the offset in the node where the HMAC is inserted
+ *
+ * This function verifies the HMAC at offset @ofs_hmac of the node given in
+ * @node. Returns 0 if successful or a negative error code otherwise.
+ */
+int __ubifs_node_verify_hmac(const struct ubifs_info *c, const void *node,
+			     int len, int ofs_hmac)
+{
+	int hmac_len = c->hmac_desc_len;
+	u8 *hmac;
+	int err;
+
+	hmac = kmalloc(hmac_len, GFP_NOFS);
+	if (!hmac)
+		return -ENOMEM;
+
+	err = ubifs_node_calc_hmac(c, node, len, ofs_hmac, hmac);
+	if (err)
+		return err;
+
+	err = crypto_memneq(hmac, node + ofs_hmac, hmac_len);
+
+	kfree(hmac);
+
+	if (!err)
+		return 0;
+
+	return -EPERM;
+}
+
+int __ubifs_shash_copy_state(const struct ubifs_info *c, struct shash_desc *src,
+			     struct shash_desc *target)
+{
+	u8 *state;
+	int err;
+
+	state = kmalloc(crypto_shash_descsize(src->tfm), GFP_NOFS);
+	if (!state)
+		return -ENOMEM;
+
+	err = crypto_shash_export(src, state);
+	if (err)
+		goto out;
+
+	err = crypto_shash_import(target, state);
+
+out:
+	kfree(state);
+
+	return err;
+}
+
+/**
+ * ubifs_hmac_wkm - Create a HMAC of the well known message
+ * @c: UBIFS file-system description object
+ * @hmac: The HMAC of the well known message
+ *
+ * This function creates a HMAC of a well known message. This is used
+ * to check if the provided key is suitable to authenticate a UBIFS
+ * image. This is only a convenience to the user to provide a better
+ * error message when the wrong key is provided.
+ *
+ * This function returns 0 for success or a negative error code otherwise.
+ */
+int ubifs_hmac_wkm(struct ubifs_info *c, u8 *hmac)
+{
+	SHASH_DESC_ON_STACK(shash, c->hmac_tfm);
+	int err;
+	const char well_known_message[] = "UBIFS";
+
+	if (!ubifs_authenticated(c))
+		return 0;
+
+	shash->tfm = c->hmac_tfm;
+	shash->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	err = crypto_shash_init(shash);
+	if (err)
+		return err;
+
+	err = crypto_shash_update(shash, well_known_message,
+				  sizeof(well_known_message) - 1);
+	if (err < 0)
+		return err;
+
+	err = crypto_shash_final(shash, hmac);
+	if (err)
+		return err;
+	return 0;
+}
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 0422865661b11..3300f68c40978 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -39,6 +39,9 @@
 #include <linux/security.h>
 #include <linux/xattr.h>
 #include <linux/random.h>
+#include <crypto/hash_info.h>
+#include <crypto/hash.h>
+#include <crypto/algapi.h>
 
 #define __FS_HAS_ENCRYPTION IS_ENABLED(CONFIG_UBIFS_FS_ENCRYPTION)
 #include <linux/fscrypt.h>
@@ -157,6 +160,14 @@
 /* Maximum number of data nodes to bulk-read */
 #define UBIFS_MAX_BULK_READ 32
 
+#ifdef CONFIG_UBIFS_FS_AUTHENTICATION
+#define UBIFS_HASH_ARR_SZ UBIFS_MAX_HASH_LEN
+#define UBIFS_HMAC_ARR_SZ UBIFS_MAX_HMAC_LEN
+#else
+#define UBIFS_HASH_ARR_SZ 0
+#define UBIFS_HMAC_ARR_SZ 0
+#endif
+
 /*
  * Lockdep classes for UBIFS inode @ui_mutex.
  */
@@ -1029,6 +1040,7 @@ struct ubifs_debug_info;
  * @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc)
  * @rw_incompat: the media is not R/W compatible
  * @assert_action: action to take when a ubifs_assert() fails
+ * @authenticated: flag indigating the FS is mounted in authenticated mode
  *
  * @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and
  *             @calc_idx_sz
@@ -1076,6 +1088,7 @@ struct ubifs_debug_info;
  * @key_hash: direntry key hash function
  * @key_fmt: key format
  * @key_len: key length
+ * @hash_len: The length of the index node hashes
  * @fanout: fanout of the index tree (number of links per indexing node)
  *
  * @min_io_size: minimal input/output unit size
@@ -1211,6 +1224,13 @@ struct ubifs_debug_info;
  * @rp_uid: reserved pool user ID
  * @rp_gid: reserved pool group ID
  *
+ * @hash_tfm: the hash transformation used for hashing nodes
+ * @hmac_tfm: the HMAC transformation for this filesystem
+ * @hmac_desc_len: length of the HMAC used for authentication
+ * @auth_key_name: the authentication key name
+ * @auth_hash_name: the name of the hash algorithm used for authentication
+ * @auth_hash_algo: the authentication hash used for this fs
+ *
  * @empty: %1 if the UBI device is empty
  * @need_recovery: %1 if the file-system needs recovery
  * @replaying: %1 during journal replay
@@ -1272,6 +1292,7 @@ struct ubifs_info {
 	unsigned int default_compr:2;
 	unsigned int rw_incompat:1;
 	unsigned int assert_action:2;
+	unsigned int authenticated:1;
 
 	struct mutex tnc_mutex;
 	struct ubifs_zbranch zroot;
@@ -1316,6 +1337,7 @@ struct ubifs_info {
 	uint32_t (*key_hash)(const char *str, int len);
 	int key_fmt;
 	int key_len;
+	int hash_len;
 	int fanout;
 
 	int min_io_size;
@@ -1443,6 +1465,13 @@ struct ubifs_info {
 	kuid_t rp_uid;
 	kgid_t rp_gid;
 
+	struct crypto_shash *hash_tfm;
+	struct crypto_shash *hmac_tfm;
+	int hmac_desc_len;
+	char *auth_key_name;
+	char *auth_hash_name;
+	enum hash_algo auth_hash_algo;
+
 	/* The below fields are used only during mounting and re-mounting */
 	unsigned int empty:1;
 	unsigned int need_recovery:1;
@@ -1473,6 +1502,195 @@ extern const struct inode_operations ubifs_dir_inode_operations;
 extern const struct inode_operations ubifs_symlink_inode_operations;
 extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT];
 
+/* auth.c */
+static inline int ubifs_authenticated(const struct ubifs_info *c)
+{
+	return (IS_ENABLED(CONFIG_UBIFS_FS_AUTHENTICATION)) && c->authenticated;
+}
+
+struct shash_desc *__ubifs_hash_get_desc(const struct ubifs_info *c);
+static inline struct shash_desc *ubifs_hash_get_desc(const struct ubifs_info *c)
+{
+	return ubifs_authenticated(c) ? __ubifs_hash_get_desc(c) : NULL;
+}
+
+static inline int ubifs_shash_init(const struct ubifs_info *c,
+				   struct shash_desc *desc)
+{
+	if (ubifs_authenticated(c))
+		return crypto_shash_init(desc);
+	else
+		return 0;
+}
+
+static inline int ubifs_shash_update(const struct ubifs_info *c,
+				      struct shash_desc *desc, const void *buf,
+				      unsigned int len)
+{
+	int err = 0;
+
+	if (ubifs_authenticated(c)) {
+		err = crypto_shash_update(desc, buf, len);
+		if (err < 0)
+			return err;
+	}
+
+	return 0;
+}
+
+static inline int ubifs_shash_final(const struct ubifs_info *c,
+				    struct shash_desc *desc, u8 *out)
+{
+	return ubifs_authenticated(c) ? crypto_shash_final(desc, out) : 0;
+}
+
+int __ubifs_node_calc_hash(const struct ubifs_info *c, const void *buf,
+			  u8 *hash);
+static inline int ubifs_node_calc_hash(const struct ubifs_info *c,
+					const void *buf, u8 *hash)
+{
+	if (ubifs_authenticated(c))
+		return __ubifs_node_calc_hash(c, buf, hash);
+	else
+		return 0;
+}
+
+int ubifs_prepare_auth_node(struct ubifs_info *c, void *node,
+			     struct shash_desc *inhash);
+
+/**
+ * ubifs_check_hash - compare two hashes
+ * @c: UBIFS file-system description object
+ * @expected: first hash
+ * @got: second hash
+ *
+ * Compare two hashes @expected and @got. Returns 0 when they are equal, a
+ * negative error code otherwise.
+ */
+static inline int ubifs_check_hash(const struct ubifs_info *c,
+				   const u8 *expected, const u8 *got)
+{
+	return crypto_memneq(expected, got, c->hash_len);
+}
+
+/**
+ * ubifs_check_hmac - compare two HMACs
+ * @c: UBIFS file-system description object
+ * @expected: first HMAC
+ * @got: second HMAC
+ *
+ * Compare two hashes @expected and @got. Returns 0 when they are equal, a
+ * negative error code otherwise.
+ */
+static inline int ubifs_check_hmac(const struct ubifs_info *c,
+				   const u8 *expected, const u8 *got)
+{
+	return crypto_memneq(expected, got, c->hmac_desc_len);
+}
+
+void ubifs_bad_hash(const struct ubifs_info *c, const void *node,
+		    const u8 *hash, int lnum, int offs);
+
+int __ubifs_node_check_hash(const struct ubifs_info *c, const void *buf,
+			  const u8 *expected);
+static inline int ubifs_node_check_hash(const struct ubifs_info *c,
+					const void *buf, const u8 *expected)
+{
+	if (ubifs_authenticated(c))
+		return __ubifs_node_check_hash(c, buf, expected);
+	else
+		return 0;
+}
+
+int ubifs_init_authentication(struct ubifs_info *c);
+void __ubifs_exit_authentication(struct ubifs_info *c);
+static inline void ubifs_exit_authentication(struct ubifs_info *c)
+{
+	if (ubifs_authenticated(c))
+		__ubifs_exit_authentication(c);
+}
+
+/**
+ * ubifs_branch_hash - returns a pointer to the hash of a branch
+ * @c: UBIFS file-system description object
+ * @br: branch to get the hash from
+ *
+ * This returns a pointer to the hash of a branch. Since the key already is a
+ * dynamically sized object we cannot use a struct member here.
+ */
+static inline u8 *ubifs_branch_hash(struct ubifs_info *c,
+				    struct ubifs_branch *br)
+{
+	return (void *)br + sizeof(*br) + c->key_len;
+}
+
+/**
+ * ubifs_copy_hash - copy a hash
+ * @c: UBIFS file-system description object
+ * @from: source hash
+ * @to: destination hash
+ *
+ * With authentication this copies a hash, otherwise does nothing.
+ */
+static inline void ubifs_copy_hash(const struct ubifs_info *c, const u8 *from,
+				   u8 *to)
+{
+	if (ubifs_authenticated(c))
+		memcpy(to, from, c->hash_len);
+}
+
+int __ubifs_node_insert_hmac(const struct ubifs_info *c, void *buf,
+			      int len, int ofs_hmac);
+static inline int ubifs_node_insert_hmac(const struct ubifs_info *c, void *buf,
+					  int len, int ofs_hmac)
+{
+	if (ubifs_authenticated(c))
+		return __ubifs_node_insert_hmac(c, buf, len, ofs_hmac);
+	else
+		return 0;
+}
+
+int __ubifs_node_verify_hmac(const struct ubifs_info *c, const void *buf,
+			     int len, int ofs_hmac);
+static inline int ubifs_node_verify_hmac(const struct ubifs_info *c,
+					 const void *buf, int len, int ofs_hmac)
+{
+	if (ubifs_authenticated(c))
+		return __ubifs_node_verify_hmac(c, buf, len, ofs_hmac);
+	else
+		return 0;
+}
+
+/**
+ * ubifs_auth_node_sz - returns the size of an authentication node
+ * @c: UBIFS file-system description object
+ *
+ * This function returns the size of an authentication node which can
+ * be 0 for unauthenticated filesystems or the real size of an auth node
+ * authentication is enabled.
+ */
+static inline int ubifs_auth_node_sz(const struct ubifs_info *c)
+{
+	if (ubifs_authenticated(c))
+		return sizeof(struct ubifs_auth_node) + c->hmac_desc_len;
+	else
+		return 0;
+}
+
+int ubifs_hmac_wkm(struct ubifs_info *c, u8 *hmac);
+
+int __ubifs_shash_copy_state(const struct ubifs_info *c, struct shash_desc *src,
+			     struct shash_desc *target);
+static inline int ubifs_shash_copy_state(const struct ubifs_info *c,
+					   struct shash_desc *src,
+					   struct shash_desc *target)
+{
+	if (ubifs_authenticated(c))
+		return __ubifs_shash_copy_state(c, src, target);
+	else
+		return 0;
+}
+
 /* io.c */
 void ubifs_ro_mode(struct ubifs_info *c, int err);
 int ubifs_leb_read(const struct ubifs_info *c, int lnum, void *buf, int offs,
-- 
GitLab


From a384b47e4954a0f834749fcbe1c096c40ff5eb35 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Fri, 7 Sep 2018 14:36:33 +0200
Subject: [PATCH 0112/1890] ubifs: Create functions to embed a HMAC in a node

With authentication support some nodes (master node, super block node)
get a HMAC embedded into them. This patch adds functions to prepare and
write such a node.
The difficulty is that besides the HMAC the nodes also have a CRC which
must stay valid. This means we first have to initialize all fields in
the node, then calculate the HMAC (not covering the CRC) and finally
calculate the CRC.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/io.c    | 72 ++++++++++++++++++++++++++++++++++++++++++++----
 fs/ubifs/ubifs.h |  4 +++
 2 files changed, 70 insertions(+), 6 deletions(-)

diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index 4bd61fe146e06..d124117efd42d 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -394,6 +394,39 @@ void ubifs_crc_node(struct ubifs_info *c, void *node, int len)
 	ch->crc = cpu_to_le32(crc);
 }
 
+/**
+ * ubifs_prepare_node_hmac - prepare node to be written to flash.
+ * @c: UBIFS file-system description object
+ * @node: the node to pad
+ * @len: node length
+ * @hmac_offs: offset of the HMAC in the node
+ * @pad: if the buffer has to be padded
+ *
+ * This function prepares node at @node to be written to the media - it
+ * calculates node CRC, fills the common header, and adds proper padding up to
+ * the next minimum I/O unit if @pad is not zero. if @hmac_offs is positive then
+ * a HMAC is inserted into the node at the given offset.
+ *
+ * This function returns 0 for success or a negative error code otherwise.
+ */
+int ubifs_prepare_node_hmac(struct ubifs_info *c, void *node, int len,
+			    int hmac_offs, int pad)
+{
+	int err;
+
+	ubifs_init_node(c, node, len, pad);
+
+	if (hmac_offs > 0) {
+		err = ubifs_node_insert_hmac(c, node, len, hmac_offs);
+		if (err)
+			return err;
+	}
+
+	ubifs_crc_node(c, node, len);
+
+	return 0;
+}
+
 /**
  * ubifs_prepare_node - prepare node to be written to flash.
  * @c: UBIFS file-system description object
@@ -407,8 +440,11 @@ void ubifs_crc_node(struct ubifs_info *c, void *node, int len)
  */
 void ubifs_prepare_node(struct ubifs_info *c, void *node, int len, int pad)
 {
-	ubifs_init_node(c, node, len, pad);
-	ubifs_crc_node(c, node, len);
+	/*
+	 * Deliberately ignore return value since this function can only fail
+	 * when a hmac offset is given.
+	 */
+	ubifs_prepare_node_hmac(c, node, len, 0, pad);
 }
 
 /**
@@ -861,12 +897,13 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
 }
 
 /**
- * ubifs_write_node - write node to the media.
+ * ubifs_write_node_hmac - write node to the media.
  * @c: UBIFS file-system description object
  * @buf: the node to write
  * @len: node length
  * @lnum: logical eraseblock number
  * @offs: offset within the logical eraseblock
+ * @hmac_offs: offset of the HMAC within the node
  *
  * This function automatically fills node magic number, assigns sequence
  * number, and calculates node CRC checksum. The length of the @buf buffer has
@@ -874,8 +911,8 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
  * appends padding node and padding bytes if needed. Returns zero in case of
  * success and a negative error code in case of failure.
  */
-int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum,
-		     int offs)
+int ubifs_write_node_hmac(struct ubifs_info *c, void *buf, int len, int lnum,
+			  int offs, int hmac_offs)
 {
 	int err, buf_len = ALIGN(len, c->min_io_size);
 
@@ -890,7 +927,10 @@ int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum,
 	if (c->ro_error)
 		return -EROFS;
 
-	ubifs_prepare_node(c, buf, len, 1);
+	err = ubifs_prepare_node_hmac(c, buf, len, hmac_offs, 1);
+	if (err)
+		return err;
+
 	err = ubifs_leb_write(c, lnum, buf, offs, buf_len);
 	if (err)
 		ubifs_dump_node(c, buf);
@@ -898,6 +938,26 @@ int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum,
 	return err;
 }
 
+/**
+ * ubifs_write_node - write node to the media.
+ * @c: UBIFS file-system description object
+ * @buf: the node to write
+ * @len: node length
+ * @lnum: logical eraseblock number
+ * @offs: offset within the logical eraseblock
+ *
+ * This function automatically fills node magic number, assigns sequence
+ * number, and calculates node CRC checksum. The length of the @buf buffer has
+ * to be aligned to the minimal I/O unit size. This function automatically
+ * appends padding node and padding bytes if needed. Returns zero in case of
+ * success and a negative error code in case of failure.
+ */
+int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum,
+		     int offs)
+{
+	return ubifs_write_node_hmac(c, buf, len, lnum, offs, -1);
+}
+
 /**
  * ubifs_read_node_wbuf - read node from the media or write-buffer.
  * @wbuf: wbuf to check for un-written data
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 3300f68c40978..42e904b060f9b 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1710,11 +1710,15 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len,
 			 int lnum, int offs);
 int ubifs_write_node(struct ubifs_info *c, void *node, int len, int lnum,
 		     int offs);
+int ubifs_write_node_hmac(struct ubifs_info *c, void *buf, int len, int lnum,
+			  int offs, int hmac_offs);
 int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
 		     int offs, int quiet, int must_chk_crc);
 void ubifs_init_node(struct ubifs_info *c, void *buf, int len, int pad);
 void ubifs_crc_node(struct ubifs_info *c, void *buf, int len);
 void ubifs_prepare_node(struct ubifs_info *c, void *buf, int len, int pad);
+int ubifs_prepare_node_hmac(struct ubifs_info *c, void *node, int len,
+			    int hmac_offs, int pad);
 void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last);
 int ubifs_io_init(struct ubifs_info *c);
 void ubifs_pad(const struct ubifs_info *c, void *buf, int pad);
-- 
GitLab


From 823838a486888cf484e739ab37df14cb04dfddb5 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Fri, 7 Sep 2018 14:36:34 +0200
Subject: [PATCH 0113/1890] ubifs: Add hashes to the tree node cache

As part of the UBIFS authentication support every branch in the index
gets a hash covering the referenced node. To make that happen the tree
node cache needs hashes over the nodes. This patch adds a hash argument
to ubifs_tnc_add() and ubifs_tnc_add_nm(). The hashes are calculated
from the callers of these functions which actually prepare the nodes.
With this patch all the leaf nodes of the index tree get hashes, but
currently nothing is done with these hashes, this is left for a later
patch.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/journal.c | 144 +++++++++++++++++++++++++++++++++++++--------
 fs/ubifs/replay.c  |   4 +-
 fs/ubifs/tnc.c     |  10 +++-
 fs/ubifs/ubifs.h   |   7 ++-
 4 files changed, 135 insertions(+), 30 deletions(-)

diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index de7ac9e7accef..7c12bdfa9a7aa 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -518,6 +518,9 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
 	struct ubifs_dent_node *dent;
 	struct ubifs_ino_node *ino;
 	union ubifs_key dent_key, ino_key;
+	u8 hash_dent[UBIFS_HASH_ARR_SZ];
+	u8 hash_ino[UBIFS_HASH_ARR_SZ];
+	u8 hash_ino_host[UBIFS_HASH_ARR_SZ];
 
 	ubifs_assert(c, mutex_is_locked(&host_ui->ui_mutex));
 
@@ -572,11 +575,21 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
 
 	zero_dent_node_unused(dent);
 	ubifs_prep_grp_node(c, dent, dlen, 0);
+	err = ubifs_node_calc_hash(c, dent, hash_dent);
+	if (err)
+		goto out_release;
 
 	ino = (void *)dent + aligned_dlen;
 	pack_inode(c, ino, inode, 0);
+	err = ubifs_node_calc_hash(c, ino, hash_ino);
+	if (err)
+		goto out_release;
+
 	ino = (void *)ino + aligned_ilen;
 	pack_inode(c, ino, dir, 1);
+	err = ubifs_node_calc_hash(c, ino, hash_ino_host);
+	if (err)
+		goto out_release;
 
 	if (last_reference) {
 		err = ubifs_add_orphan(c, inode->i_ino);
@@ -608,7 +621,8 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
 			goto out_ro;
 		err = ubifs_add_dirt(c, lnum, dlen);
 	} else
-		err = ubifs_tnc_add_nm(c, &dent_key, lnum, dent_offs, dlen, nm);
+		err = ubifs_tnc_add_nm(c, &dent_key, lnum, dent_offs, dlen,
+				       hash_dent, nm);
 	if (err)
 		goto out_ro;
 
@@ -620,14 +634,14 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
 	 */
 	ino_key_init(c, &ino_key, inode->i_ino);
 	ino_offs = dent_offs + aligned_dlen;
-	err = ubifs_tnc_add(c, &ino_key, lnum, ino_offs, ilen);
+	err = ubifs_tnc_add(c, &ino_key, lnum, ino_offs, ilen, hash_ino);
 	if (err)
 		goto out_ro;
 
 	ino_key_init(c, &ino_key, dir->i_ino);
 	ino_offs += aligned_ilen;
 	err = ubifs_tnc_add(c, &ino_key, lnum, ino_offs,
-			    UBIFS_INO_NODE_SZ + host_ui->data_len);
+			    UBIFS_INO_NODE_SZ + host_ui->data_len, hash_ino_host);
 	if (err)
 		goto out_ro;
 
@@ -680,6 +694,7 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
 	int dlen = COMPRESSED_DATA_NODE_BUF_SZ, allocated = 1;
 	struct ubifs_inode *ui = ubifs_inode(inode);
 	bool encrypted = ubifs_crypt_is_encrypted(inode);
+	u8 hash[UBIFS_HASH_ARR_SZ];
 
 	dbg_jnlk(key, "ino %lu, blk %u, len %d, key ",
 		(unsigned long)key_inum(c, key), key_block(c, key), len);
@@ -738,10 +753,15 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
 	err = write_head(c, DATAHD, data, dlen, &lnum, &offs, 0);
 	if (err)
 		goto out_release;
+
+	err = ubifs_node_calc_hash(c, data, hash);
+	if (err)
+		goto out_release;
+
 	ubifs_wbuf_add_ino_nolock(&c->jheads[DATAHD].wbuf, key_inum(c, key));
 	release_head(c, DATAHD);
 
-	err = ubifs_tnc_add(c, key, lnum, offs, dlen);
+	err = ubifs_tnc_add(c, key, lnum, offs, dlen, hash);
 	if (err)
 		goto out_ro;
 
@@ -780,6 +800,7 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode)
 	struct ubifs_ino_node *ino;
 	struct ubifs_inode *ui = ubifs_inode(inode);
 	int sync = 0, len = UBIFS_INO_NODE_SZ, last_reference = !inode->i_nlink;
+	u8 hash[UBIFS_HASH_ARR_SZ];
 
 	dbg_jnl("ino %lu, nlink %u", inode->i_ino, inode->i_nlink);
 
@@ -801,6 +822,10 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode)
 		goto out_free;
 
 	pack_inode(c, ino, inode, 1);
+	err = ubifs_node_calc_hash(c, ino, hash);
+	if (err)
+		goto out_release;
+
 	err = write_head(c, BASEHD, ino, len, &lnum, &offs, sync);
 	if (err)
 		goto out_release;
@@ -819,7 +844,7 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode)
 		union ubifs_key key;
 
 		ino_key_init(c, &key, inode->i_ino);
-		err = ubifs_tnc_add(c, &key, lnum, offs, len);
+		err = ubifs_tnc_add(c, &key, lnum, offs, len, hash);
 	}
 	if (err)
 		goto out_ro;
@@ -929,6 +954,10 @@ int ubifs_jnl_xrename(struct ubifs_info *c, const struct inode *fst_dir,
 	int aligned_dlen1, aligned_dlen2;
 	int twoparents = (fst_dir != snd_dir);
 	void *p;
+	u8 hash_dent1[UBIFS_HASH_ARR_SZ];
+	u8 hash_dent2[UBIFS_HASH_ARR_SZ];
+	u8 hash_p1[UBIFS_HASH_ARR_SZ];
+	u8 hash_p2[UBIFS_HASH_ARR_SZ];
 
 	ubifs_assert(c, ubifs_inode(fst_dir)->data_len == 0);
 	ubifs_assert(c, ubifs_inode(snd_dir)->data_len == 0);
@@ -964,6 +993,9 @@ int ubifs_jnl_xrename(struct ubifs_info *c, const struct inode *fst_dir,
 	set_dent_cookie(c, dent1);
 	zero_dent_node_unused(dent1);
 	ubifs_prep_grp_node(c, dent1, dlen1, 0);
+	err = ubifs_node_calc_hash(c, dent1, hash_dent1);
+	if (err)
+		goto out_release;
 
 	/* Make new dent for 2nd entry */
 	dent2 = (void *)dent1 + aligned_dlen1;
@@ -977,14 +1009,26 @@ int ubifs_jnl_xrename(struct ubifs_info *c, const struct inode *fst_dir,
 	set_dent_cookie(c, dent2);
 	zero_dent_node_unused(dent2);
 	ubifs_prep_grp_node(c, dent2, dlen2, 0);
+	err = ubifs_node_calc_hash(c, dent2, hash_dent2);
+	if (err)
+		goto out_release;
 
 	p = (void *)dent2 + aligned_dlen2;
-	if (!twoparents)
+	if (!twoparents) {
 		pack_inode(c, p, fst_dir, 1);
-	else {
+		err = ubifs_node_calc_hash(c, p, hash_p1);
+		if (err)
+			goto out_release;
+	} else {
 		pack_inode(c, p, fst_dir, 0);
+		err = ubifs_node_calc_hash(c, p, hash_p1);
+		if (err)
+			goto out_release;
 		p += ALIGN(plen, 8);
 		pack_inode(c, p, snd_dir, 1);
+		err = ubifs_node_calc_hash(c, p, hash_p2);
+		if (err)
+			goto out_release;
 	}
 
 	err = write_head(c, BASEHD, dent1, len, &lnum, &offs, sync);
@@ -999,27 +1043,27 @@ int ubifs_jnl_xrename(struct ubifs_info *c, const struct inode *fst_dir,
 	release_head(c, BASEHD);
 
 	dent_key_init(c, &key, snd_dir->i_ino, snd_nm);
-	err = ubifs_tnc_add_nm(c, &key, lnum, offs, dlen1, snd_nm);
+	err = ubifs_tnc_add_nm(c, &key, lnum, offs, dlen1, hash_dent1, snd_nm);
 	if (err)
 		goto out_ro;
 
 	offs += aligned_dlen1;
 	dent_key_init(c, &key, fst_dir->i_ino, fst_nm);
-	err = ubifs_tnc_add_nm(c, &key, lnum, offs, dlen2, fst_nm);
+	err = ubifs_tnc_add_nm(c, &key, lnum, offs, dlen2, hash_dent2, fst_nm);
 	if (err)
 		goto out_ro;
 
 	offs += aligned_dlen2;
 
 	ino_key_init(c, &key, fst_dir->i_ino);
-	err = ubifs_tnc_add(c, &key, lnum, offs, plen);
+	err = ubifs_tnc_add(c, &key, lnum, offs, plen, hash_p1);
 	if (err)
 		goto out_ro;
 
 	if (twoparents) {
 		offs += ALIGN(plen, 8);
 		ino_key_init(c, &key, snd_dir->i_ino);
-		err = ubifs_tnc_add(c, &key, lnum, offs, plen);
+		err = ubifs_tnc_add(c, &key, lnum, offs, plen, hash_p2);
 		if (err)
 			goto out_ro;
 	}
@@ -1072,6 +1116,11 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
 	int last_reference = !!(new_inode && new_inode->i_nlink == 0);
 	int move = (old_dir != new_dir);
 	struct ubifs_inode *uninitialized_var(new_ui);
+	u8 hash_old_dir[UBIFS_HASH_ARR_SZ];
+	u8 hash_new_dir[UBIFS_HASH_ARR_SZ];
+	u8 hash_new_inode[UBIFS_HASH_ARR_SZ];
+	u8 hash_dent1[UBIFS_HASH_ARR_SZ];
+	u8 hash_dent2[UBIFS_HASH_ARR_SZ];
 
 	ubifs_assert(c, ubifs_inode(old_dir)->data_len == 0);
 	ubifs_assert(c, ubifs_inode(new_dir)->data_len == 0);
@@ -1114,6 +1163,9 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
 	set_dent_cookie(c, dent);
 	zero_dent_node_unused(dent);
 	ubifs_prep_grp_node(c, dent, dlen1, 0);
+	err = ubifs_node_calc_hash(c, dent, hash_dent1);
+	if (err)
+		goto out_release;
 
 	dent2 = (void *)dent + aligned_dlen1;
 	dent2->ch.node_type = UBIFS_DENT_NODE;
@@ -1133,19 +1185,36 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
 	set_dent_cookie(c, dent2);
 	zero_dent_node_unused(dent2);
 	ubifs_prep_grp_node(c, dent2, dlen2, 0);
+	err = ubifs_node_calc_hash(c, dent2, hash_dent2);
+	if (err)
+		goto out_release;
 
 	p = (void *)dent2 + aligned_dlen2;
 	if (new_inode) {
 		pack_inode(c, p, new_inode, 0);
+		err = ubifs_node_calc_hash(c, p, hash_new_inode);
+		if (err)
+			goto out_release;
+
 		p += ALIGN(ilen, 8);
 	}
 
-	if (!move)
+	if (!move) {
 		pack_inode(c, p, old_dir, 1);
-	else {
+		err = ubifs_node_calc_hash(c, p, hash_old_dir);
+		if (err)
+			goto out_release;
+	} else {
 		pack_inode(c, p, old_dir, 0);
+		err = ubifs_node_calc_hash(c, p, hash_old_dir);
+		if (err)
+			goto out_release;
+
 		p += ALIGN(plen, 8);
 		pack_inode(c, p, new_dir, 1);
+		err = ubifs_node_calc_hash(c, p, hash_new_dir);
+		if (err)
+			goto out_release;
 	}
 
 	if (last_reference) {
@@ -1172,14 +1241,14 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
 	release_head(c, BASEHD);
 
 	dent_key_init(c, &key, new_dir->i_ino, new_nm);
-	err = ubifs_tnc_add_nm(c, &key, lnum, offs, dlen1, new_nm);
+	err = ubifs_tnc_add_nm(c, &key, lnum, offs, dlen1, hash_dent1, new_nm);
 	if (err)
 		goto out_ro;
 
 	offs += aligned_dlen1;
 	if (whiteout) {
 		dent_key_init(c, &key, old_dir->i_ino, old_nm);
-		err = ubifs_tnc_add_nm(c, &key, lnum, offs, dlen2, old_nm);
+		err = ubifs_tnc_add_nm(c, &key, lnum, offs, dlen2, hash_dent2, old_nm);
 		if (err)
 			goto out_ro;
 
@@ -1198,21 +1267,21 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
 	offs += aligned_dlen2;
 	if (new_inode) {
 		ino_key_init(c, &key, new_inode->i_ino);
-		err = ubifs_tnc_add(c, &key, lnum, offs, ilen);
+		err = ubifs_tnc_add(c, &key, lnum, offs, ilen, hash_new_inode);
 		if (err)
 			goto out_ro;
 		offs += ALIGN(ilen, 8);
 	}
 
 	ino_key_init(c, &key, old_dir->i_ino);
-	err = ubifs_tnc_add(c, &key, lnum, offs, plen);
+	err = ubifs_tnc_add(c, &key, lnum, offs, plen, hash_old_dir);
 	if (err)
 		goto out_ro;
 
 	if (move) {
 		offs += ALIGN(plen, 8);
 		ino_key_init(c, &key, new_dir->i_ino);
-		err = ubifs_tnc_add(c, &key, lnum, offs, plen);
+		err = ubifs_tnc_add(c, &key, lnum, offs, plen, hash_new_dir);
 		if (err)
 			goto out_ro;
 	}
@@ -1331,6 +1400,8 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
 	struct ubifs_inode *ui = ubifs_inode(inode);
 	ino_t inum = inode->i_ino;
 	unsigned int blk;
+	u8 hash_ino[UBIFS_HASH_ARR_SZ];
+	u8 hash_dn[UBIFS_HASH_ARR_SZ];
 
 	dbg_jnl("ino %lu, size %lld -> %lld",
 		(unsigned long)inum, old_size, new_size);
@@ -1392,9 +1463,17 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
 		goto out_free;
 
 	pack_inode(c, ino, inode, 0);
+	err = ubifs_node_calc_hash(c, ino, hash_ino);
+	if (err)
+		goto out_release;
+
 	ubifs_prep_grp_node(c, trun, UBIFS_TRUN_NODE_SZ, dlen ? 0 : 1);
-	if (dlen)
+	if (dlen) {
 		ubifs_prep_grp_node(c, dn, dlen, 1);
+		err = ubifs_node_calc_hash(c, dn, hash_dn);
+		if (err)
+			goto out_release;
+	}
 
 	err = write_head(c, BASEHD, ino, len, &lnum, &offs, sync);
 	if (err)
@@ -1405,13 +1484,13 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
 
 	if (dlen) {
 		sz = offs + UBIFS_INO_NODE_SZ + UBIFS_TRUN_NODE_SZ;
-		err = ubifs_tnc_add(c, &key, lnum, sz, dlen);
+		err = ubifs_tnc_add(c, &key, lnum, sz, dlen, hash_dn);
 		if (err)
 			goto out_ro;
 	}
 
 	ino_key_init(c, &key, inum);
-	err = ubifs_tnc_add(c, &key, lnum, offs, UBIFS_INO_NODE_SZ);
+	err = ubifs_tnc_add(c, &key, lnum, offs, UBIFS_INO_NODE_SZ, hash_ino);
 	if (err)
 		goto out_ro;
 
@@ -1472,6 +1551,7 @@ int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host,
 	union ubifs_key xent_key, key1, key2;
 	int sync = IS_DIRSYNC(host);
 	struct ubifs_inode *host_ui = ubifs_inode(host);
+	u8 hash[UBIFS_HASH_ARR_SZ];
 
 	ubifs_assert(c, inode->i_nlink == 0);
 	ubifs_assert(c, mutex_is_locked(&host_ui->ui_mutex));
@@ -1511,6 +1591,9 @@ int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host,
 	pack_inode(c, ino, inode, 0);
 	ino = (void *)ino + UBIFS_INO_NODE_SZ;
 	pack_inode(c, ino, host, 1);
+	err = ubifs_node_calc_hash(c, ino, hash);
+	if (err)
+		goto out_release;
 
 	err = write_head(c, BASEHD, xent, len, &lnum, &xent_offs, sync);
 	if (!sync && !err)
@@ -1543,7 +1626,7 @@ int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host,
 
 	/* And update TNC with the new host inode position */
 	ino_key_init(c, &key1, host->i_ino);
-	err = ubifs_tnc_add(c, &key1, lnum, xent_offs + len - hlen, hlen);
+	err = ubifs_tnc_add(c, &key1, lnum, xent_offs + len - hlen, hlen, hash);
 	if (err)
 		goto out_ro;
 
@@ -1554,6 +1637,9 @@ int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host,
 	mark_inode_clean(c, host_ui);
 	return 0;
 
+out_release:
+	kfree(xent);
+	release_head(c, BASEHD);
 out_ro:
 	ubifs_ro_mode(c, err);
 	finish_reservation(c);
@@ -1581,6 +1667,8 @@ int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode,
 	struct ubifs_ino_node *ino;
 	union ubifs_key key;
 	int sync = IS_DIRSYNC(host);
+	u8 hash_host[UBIFS_HASH_ARR_SZ];
+	u8 hash[UBIFS_HASH_ARR_SZ];
 
 	dbg_jnl("ino %lu, ino %lu", host->i_ino, inode->i_ino);
 	ubifs_assert(c, host->i_nlink > 0);
@@ -1602,7 +1690,13 @@ int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode,
 		goto out_free;
 
 	pack_inode(c, ino, host, 0);
+	err = ubifs_node_calc_hash(c, ino, hash_host);
+	if (err)
+		goto out_release;
 	pack_inode(c, (void *)ino + aligned_len1, inode, 1);
+	err = ubifs_node_calc_hash(c, (void *)ino + aligned_len1, hash);
+	if (err)
+		goto out_release;
 
 	err = write_head(c, BASEHD, ino, aligned_len, &lnum, &offs, 0);
 	if (!sync && !err) {
@@ -1616,12 +1710,12 @@ int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode,
 		goto out_ro;
 
 	ino_key_init(c, &key, host->i_ino);
-	err = ubifs_tnc_add(c, &key, lnum, offs, len1);
+	err = ubifs_tnc_add(c, &key, lnum, offs, len1, hash_host);
 	if (err)
 		goto out_ro;
 
 	ino_key_init(c, &key, inode->i_ino);
-	err = ubifs_tnc_add(c, &key, lnum, offs + aligned_len1, len2);
+	err = ubifs_tnc_add(c, &key, lnum, offs + aligned_len1, len2, hash);
 	if (err)
 		goto out_ro;
 
@@ -1633,6 +1727,8 @@ int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode,
 	kfree(ino);
 	return 0;
 
+out_release:
+	release_head(c, BASEHD);
 out_ro:
 	ubifs_ro_mode(c, err);
 	finish_reservation(c);
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index 4844538eb9262..bccb35f722775 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -228,7 +228,7 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
 			err = ubifs_tnc_remove_nm(c, &r->key, &r->nm);
 		else
 			err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs,
-					       r->len, &r->nm);
+					       r->len, NULL, &r->nm);
 	} else {
 		if (r->deletion)
 			switch (key_type(c, &r->key)) {
@@ -248,7 +248,7 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
 			}
 		else
 			err = ubifs_tnc_add(c, &r->key, r->lnum, r->offs,
-					    r->len);
+					    r->len, NULL);
 		if (err)
 			return err;
 
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index 8502c07c1e0a1..bb5f989a6e067 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -2260,13 +2260,14 @@ static int tnc_insert(struct ubifs_info *c, struct ubifs_znode *znode,
  * @lnum: LEB number of node
  * @offs: node offset
  * @len: node length
+ * @hash: The hash over the node
  *
  * This function adds a node with key @key to TNC. The node may be new or it may
  * obsolete some existing one. Returns %0 on success or negative error code on
  * failure.
  */
 int ubifs_tnc_add(struct ubifs_info *c, const union ubifs_key *key, int lnum,
-		  int offs, int len)
+		  int offs, int len, const u8 *hash)
 {
 	int found, n, err = 0;
 	struct ubifs_znode *znode;
@@ -2281,6 +2282,7 @@ int ubifs_tnc_add(struct ubifs_info *c, const union ubifs_key *key, int lnum,
 		zbr.lnum = lnum;
 		zbr.offs = offs;
 		zbr.len = len;
+		ubifs_copy_hash(c, hash, zbr.hash);
 		key_copy(c, key, &zbr.key);
 		err = tnc_insert(c, znode, &zbr, n + 1);
 	} else if (found == 1) {
@@ -2291,6 +2293,7 @@ int ubifs_tnc_add(struct ubifs_info *c, const union ubifs_key *key, int lnum,
 		zbr->lnum = lnum;
 		zbr->offs = offs;
 		zbr->len = len;
+		ubifs_copy_hash(c, hash, zbr->hash);
 	} else
 		err = found;
 	if (!err)
@@ -2392,13 +2395,14 @@ int ubifs_tnc_replace(struct ubifs_info *c, const union ubifs_key *key,
  * @lnum: LEB number of node
  * @offs: node offset
  * @len: node length
+ * @hash: The hash over the node
  * @nm: node name
  *
  * This is the same as 'ubifs_tnc_add()' but it should be used with keys which
  * may have collisions, like directory entry keys.
  */
 int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key,
-		     int lnum, int offs, int len,
+		     int lnum, int offs, int len, const u8 *hash,
 		     const struct fscrypt_name *nm)
 {
 	int found, n, err = 0;
@@ -2441,6 +2445,7 @@ int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key,
 			zbr->lnum = lnum;
 			zbr->offs = offs;
 			zbr->len = len;
+			ubifs_copy_hash(c, hash, zbr->hash);
 			goto out_unlock;
 		}
 	}
@@ -2452,6 +2457,7 @@ int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key,
 		zbr.lnum = lnum;
 		zbr.offs = offs;
 		zbr.len = len;
+		ubifs_copy_hash(c, hash, zbr.hash);
 		key_copy(c, key, &zbr.key);
 		err = tnc_insert(c, znode, &zbr, n + 1);
 		if (err)
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 42e904b060f9b..a7e423347e6dd 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -747,6 +747,7 @@ struct ubifs_jhead {
  * @lnum: LEB number of the target node (indexing node or data node)
  * @offs: target node offset within @lnum
  * @len: target node length
+ * @hash: the hash of the target node
  */
 struct ubifs_zbranch {
 	union ubifs_key key;
@@ -757,6 +758,7 @@ struct ubifs_zbranch {
 	int lnum;
 	int offs;
 	int len;
+	u8 hash[UBIFS_HASH_ARR_SZ];
 };
 
 /**
@@ -1818,11 +1820,12 @@ int ubifs_tnc_lookup_dh(struct ubifs_info *c, const union ubifs_key *key,
 int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key,
 		     void *node, int *lnum, int *offs);
 int ubifs_tnc_add(struct ubifs_info *c, const union ubifs_key *key, int lnum,
-		  int offs, int len);
+		  int offs, int len, const u8 *hash);
 int ubifs_tnc_replace(struct ubifs_info *c, const union ubifs_key *key,
 		      int old_lnum, int old_offs, int lnum, int offs, int len);
 int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key,
-		     int lnum, int offs, int len, const struct fscrypt_name *nm);
+		     int lnum, int offs, int len, const u8 *hash,
+		     const struct fscrypt_name *nm);
 int ubifs_tnc_remove(struct ubifs_info *c, const union ubifs_key *key);
 int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key,
 			const struct fscrypt_name *nm);
-- 
GitLab


From 16a26b20d2afd0cf063816725b45b12e78d5bb31 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Fri, 7 Sep 2018 14:36:35 +0200
Subject: [PATCH 0114/1890] ubifs: authentication: Add hashes to index nodes

With this patch the hashes over the index nodes stored in the tree node
cache are written to flash and are checked when read back from flash.
The hash of the root index node is stored in the master node.

During journal replay the hashes are regenerated from the read nodes
and stored in the tree node cache. This means the nodes must previously
be authenticated by other means. This is done in a later patch.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/master.c     |  3 +++
 fs/ubifs/misc.h       |  5 +++--
 fs/ubifs/replay.c     | 29 ++++++++++++++++++-----------
 fs/ubifs/tnc.c        | 12 ++++++++++++
 fs/ubifs/tnc_commit.c | 27 +++++++++++++++++++++++++++
 fs/ubifs/tnc_misc.c   | 15 ++++++++++++++-
 fs/ubifs/ubifs.h      |  4 ++++
 7 files changed, 81 insertions(+), 14 deletions(-)

diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c
index 9df4a41bba523..0ca9d3513b4d7 100644
--- a/fs/ubifs/master.c
+++ b/fs/ubifs/master.c
@@ -305,6 +305,8 @@ int ubifs_read_master(struct ubifs_info *c)
 	c->lst.total_dead  = le64_to_cpu(c->mst_node->total_dead);
 	c->lst.total_dark  = le64_to_cpu(c->mst_node->total_dark);
 
+	ubifs_copy_hash(c, c->mst_node->hash_root_idx, c->zroot.hash);
+
 	c->calc_idx_sz = c->bi.old_idx_sz;
 
 	if (c->mst_node->flags & cpu_to_le32(UBIFS_MST_NO_ORPHS))
@@ -378,6 +380,7 @@ int ubifs_write_master(struct ubifs_info *c)
 	c->mst_offs = offs;
 	c->mst_node->highest_inum = cpu_to_le64(c->highest_inum);
 
+	ubifs_copy_hash(c, c->zroot.hash, c->mst_node->hash_root_idx);
 	err = ubifs_write_node(c, c->mst_node, len, lnum, offs);
 	if (err)
 		return err;
diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h
index 21d35d7dd975c..6f87237fdbf43 100644
--- a/fs/ubifs/misc.h
+++ b/fs/ubifs/misc.h
@@ -197,7 +197,8 @@ static inline int ubifs_return_leb(struct ubifs_info *c, int lnum)
  */
 static inline int ubifs_idx_node_sz(const struct ubifs_info *c, int child_cnt)
 {
-	return UBIFS_IDX_NODE_SZ + (UBIFS_BRANCH_SZ + c->key_len) * child_cnt;
+	return UBIFS_IDX_NODE_SZ + (UBIFS_BRANCH_SZ + c->key_len + c->hash_len)
+				   * child_cnt;
 }
 
 /**
@@ -212,7 +213,7 @@ struct ubifs_branch *ubifs_idx_branch(const struct ubifs_info *c,
 				      int bnum)
 {
 	return (struct ubifs_branch *)((void *)idx->branches +
-				       (UBIFS_BRANCH_SZ + c->key_len) * bnum);
+			(UBIFS_BRANCH_SZ + c->key_len + c->hash_len) * bnum);
 }
 
 /**
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index bccb35f722775..1c6ceb6265aa3 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -56,6 +56,7 @@ struct replay_entry {
 	int lnum;
 	int offs;
 	int len;
+	u8 hash[UBIFS_HASH_ARR_SZ];
 	unsigned int deletion:1;
 	unsigned long long sqnum;
 	struct list_head list;
@@ -228,7 +229,7 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
 			err = ubifs_tnc_remove_nm(c, &r->key, &r->nm);
 		else
 			err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs,
-					       r->len, NULL, &r->nm);
+					       r->len, r->hash, &r->nm);
 	} else {
 		if (r->deletion)
 			switch (key_type(c, &r->key)) {
@@ -248,7 +249,7 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
 			}
 		else
 			err = ubifs_tnc_add(c, &r->key, r->lnum, r->offs,
-					    r->len, NULL);
+					    r->len, r->hash);
 		if (err)
 			return err;
 
@@ -352,9 +353,9 @@ static void destroy_replay_list(struct ubifs_info *c)
  * in case of success and a negative error code in case of failure.
  */
 static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
-		       union ubifs_key *key, unsigned long long sqnum,
-		       int deletion, int *used, loff_t old_size,
-		       loff_t new_size)
+		       const u8 *hash, union ubifs_key *key,
+		       unsigned long long sqnum, int deletion, int *used,
+		       loff_t old_size, loff_t new_size)
 {
 	struct replay_entry *r;
 
@@ -372,6 +373,7 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
 	r->lnum = lnum;
 	r->offs = offs;
 	r->len = len;
+	ubifs_copy_hash(c, hash, r->hash);
 	r->deletion = !!deletion;
 	r->sqnum = sqnum;
 	key_copy(c, key, &r->key);
@@ -400,8 +402,9 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
  * negative error code in case of failure.
  */
 static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len,
-		       union ubifs_key *key, const char *name, int nlen,
-		       unsigned long long sqnum, int deletion, int *used)
+		       const u8 *hash, union ubifs_key *key,
+		       const char *name, int nlen, unsigned long long sqnum,
+		       int deletion, int *used)
 {
 	struct replay_entry *r;
 	char *nbuf;
@@ -425,6 +428,7 @@ static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len,
 	r->lnum = lnum;
 	r->offs = offs;
 	r->len = len;
+	ubifs_copy_hash(c, hash, r->hash);
 	r->deletion = !!deletion;
 	r->sqnum = sqnum;
 	key_copy(c, key, &r->key);
@@ -582,6 +586,7 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
 	 */
 
 	list_for_each_entry(snod, &sleb->nodes, list) {
+		u8 hash[UBIFS_HASH_ARR_SZ];
 		int deletion = 0;
 
 		cond_resched();
@@ -591,6 +596,8 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
 			goto out_dump;
 		}
 
+		ubifs_node_calc_hash(c, snod->node, hash);
+
 		if (snod->sqnum > c->max_sqnum)
 			c->max_sqnum = snod->sqnum;
 
@@ -602,7 +609,7 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
 
 			if (le32_to_cpu(ino->nlink) == 0)
 				deletion = 1;
-			err = insert_node(c, lnum, snod->offs, snod->len,
+			err = insert_node(c, lnum, snod->offs, snod->len, hash,
 					  &snod->key, snod->sqnum, deletion,
 					  &used, 0, new_size);
 			break;
@@ -614,7 +621,7 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
 					  key_block(c, &snod->key) *
 					  UBIFS_BLOCK_SIZE;
 
-			err = insert_node(c, lnum, snod->offs, snod->len,
+			err = insert_node(c, lnum, snod->offs, snod->len, hash,
 					  &snod->key, snod->sqnum, deletion,
 					  &used, 0, new_size);
 			break;
@@ -628,7 +635,7 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
 			if (err)
 				goto out_dump;
 
-			err = insert_dent(c, lnum, snod->offs, snod->len,
+			err = insert_dent(c, lnum, snod->offs, snod->len, hash,
 					  &snod->key, dent->name,
 					  le16_to_cpu(dent->nlen), snod->sqnum,
 					  !le64_to_cpu(dent->inum), &used);
@@ -654,7 +661,7 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
 			 * functions which expect nodes to have keys.
 			 */
 			trun_key_init(c, &key, le32_to_cpu(trun->inum));
-			err = insert_node(c, lnum, snod->offs, snod->len,
+			err = insert_node(c, lnum, snod->offs, snod->len, hash,
 					  &key, snod->sqnum, 1, &used,
 					  old_size, new_size);
 			break;
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index bb5f989a6e067..25572ffea1634 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -488,6 +488,12 @@ static int try_read_node(const struct ubifs_info *c, void *buf, int type,
 	if (crc != node_crc)
 		return 0;
 
+	err = ubifs_node_check_hash(c, buf, zbr->hash);
+	if (err) {
+		ubifs_bad_hash(c, buf, zbr->hash, lnum, offs);
+		return 0;
+	}
+
 	return 1;
 }
 
@@ -1713,6 +1719,12 @@ static int validate_data_node(struct ubifs_info *c, void *buf,
 		goto out;
 	}
 
+	err = ubifs_node_check_hash(c, buf, zbr->hash);
+	if (err) {
+		ubifs_bad_hash(c, buf, zbr->hash, zbr->lnum, zbr->offs);
+		return err;
+	}
+
 	len = le32_to_cpu(ch->len);
 	if (len != zbr->len) {
 		ubifs_err(c, "bad node length %d, expected %d", len, zbr->len);
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
index dba87d09b9893..dbcd2c350b652 100644
--- a/fs/ubifs/tnc_commit.c
+++ b/fs/ubifs/tnc_commit.c
@@ -38,6 +38,7 @@ static int make_idx_node(struct ubifs_info *c, struct ubifs_idx_node *idx,
 			 struct ubifs_znode *znode, int lnum, int offs, int len)
 {
 	struct ubifs_znode *zp;
+	u8 hash[UBIFS_HASH_ARR_SZ];
 	int i, err;
 
 	/* Make index node */
@@ -52,6 +53,7 @@ static int make_idx_node(struct ubifs_info *c, struct ubifs_idx_node *idx,
 		br->lnum = cpu_to_le32(zbr->lnum);
 		br->offs = cpu_to_le32(zbr->offs);
 		br->len = cpu_to_le32(zbr->len);
+		ubifs_copy_hash(c, zbr->hash, ubifs_branch_hash(c, br));
 		if (!zbr->lnum || !zbr->len) {
 			ubifs_err(c, "bad ref in znode");
 			ubifs_dump_znode(c, znode);
@@ -62,6 +64,7 @@ static int make_idx_node(struct ubifs_info *c, struct ubifs_idx_node *idx,
 		}
 	}
 	ubifs_prepare_node(c, idx, len, 0);
+	ubifs_node_calc_hash(c, idx, hash);
 
 	znode->lnum = lnum;
 	znode->offs = offs;
@@ -78,10 +81,12 @@ static int make_idx_node(struct ubifs_info *c, struct ubifs_idx_node *idx,
 		zbr->lnum = lnum;
 		zbr->offs = offs;
 		zbr->len = len;
+		ubifs_copy_hash(c, hash, zbr->hash);
 	} else {
 		c->zroot.lnum = lnum;
 		c->zroot.offs = offs;
 		c->zroot.len = len;
+		ubifs_copy_hash(c, hash, c->zroot.hash);
 	}
 	c->calc_idx_sz += ALIGN(len, 8);
 
@@ -647,6 +652,8 @@ static int get_znodes_to_commit(struct ubifs_info *c)
 			znode->cnext = c->cnext;
 			break;
 		}
+		znode->cparent = znode->parent;
+		znode->ciip = znode->iip;
 		znode->cnext = cnext;
 		znode = cnext;
 		cnt += 1;
@@ -840,6 +847,8 @@ static int write_index(struct ubifs_info *c)
 	}
 
 	while (1) {
+		u8 hash[UBIFS_HASH_ARR_SZ];
+
 		cond_resched();
 
 		znode = cnext;
@@ -857,6 +866,7 @@ static int write_index(struct ubifs_info *c)
 			br->lnum = cpu_to_le32(zbr->lnum);
 			br->offs = cpu_to_le32(zbr->offs);
 			br->len = cpu_to_le32(zbr->len);
+			ubifs_copy_hash(c, zbr->hash, ubifs_branch_hash(c, br));
 			if (!zbr->lnum || !zbr->len) {
 				ubifs_err(c, "bad ref in znode");
 				ubifs_dump_znode(c, znode);
@@ -868,6 +878,23 @@ static int write_index(struct ubifs_info *c)
 		}
 		len = ubifs_idx_node_sz(c, znode->child_cnt);
 		ubifs_prepare_node(c, idx, len, 0);
+		ubifs_node_calc_hash(c, idx, hash);
+
+		mutex_lock(&c->tnc_mutex);
+
+		if (znode->cparent)
+			ubifs_copy_hash(c, hash,
+					znode->cparent->zbranch[znode->ciip].hash);
+
+		if (znode->parent) {
+			if (!ubifs_zn_obsolete(znode))
+				ubifs_copy_hash(c, hash,
+					znode->parent->zbranch[znode->iip].hash);
+		} else {
+			ubifs_copy_hash(c, hash, c->zroot.hash);
+		}
+
+		mutex_unlock(&c->tnc_mutex);
 
 		/* Determine the index node position */
 		if (lnum == -1) {
diff --git a/fs/ubifs/tnc_misc.c b/fs/ubifs/tnc_misc.c
index 6ce75999f2733..d1815e9590071 100644
--- a/fs/ubifs/tnc_misc.c
+++ b/fs/ubifs/tnc_misc.c
@@ -293,6 +293,12 @@ static int read_znode(struct ubifs_info *c, struct ubifs_zbranch *zzbr,
 		return err;
 	}
 
+	err = ubifs_node_check_hash(c, idx, zzbr->hash);
+	if (err) {
+		ubifs_bad_hash(c, idx, zzbr->hash, lnum, offs);
+		return err;
+	}
+
 	znode->child_cnt = le16_to_cpu(idx->child_cnt);
 	znode->level = le16_to_cpu(idx->level);
 
@@ -309,13 +315,14 @@ static int read_znode(struct ubifs_info *c, struct ubifs_zbranch *zzbr,
 	}
 
 	for (i = 0; i < znode->child_cnt; i++) {
-		const struct ubifs_branch *br = ubifs_idx_branch(c, idx, i);
+		struct ubifs_branch *br = ubifs_idx_branch(c, idx, i);
 		struct ubifs_zbranch *zbr = &znode->zbranch[i];
 
 		key_read(c, &br->key, &zbr->key);
 		zbr->lnum = le32_to_cpu(br->lnum);
 		zbr->offs = le32_to_cpu(br->offs);
 		zbr->len  = le32_to_cpu(br->len);
+		ubifs_copy_hash(c, ubifs_branch_hash(c, br), zbr->hash);
 		zbr->znode = NULL;
 
 		/* Validate branch */
@@ -497,5 +504,11 @@ int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr,
 		return -EINVAL;
 	}
 
+	err = ubifs_node_check_hash(c, node, zbr->hash);
+	if (err) {
+		ubifs_bad_hash(c, node, zbr->hash, zbr->lnum, zbr->offs);
+		return err;
+	}
+
 	return 0;
 }
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index a7e423347e6dd..67bfd58d28d46 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -765,6 +765,8 @@ struct ubifs_zbranch {
  * struct ubifs_znode - in-memory representation of an indexing node.
  * @parent: parent znode or NULL if it is the root
  * @cnext: next znode to commit
+ * @cparent: parent node for this commit
+ * @ciip: index in cparent's zbranch array
  * @flags: znode flags (%DIRTY_ZNODE, %COW_ZNODE or %OBSOLETE_ZNODE)
  * @time: last access time (seconds)
  * @level: level of the entry in the TNC tree
@@ -782,6 +784,8 @@ struct ubifs_zbranch {
 struct ubifs_znode {
 	struct ubifs_znode *parent;
 	struct ubifs_znode *cnext;
+	struct ubifs_znode *cparent;
+	int ciip;
 	unsigned long flags;
 	time64_t time;
 	int level;
-- 
GitLab


From 6a98bc4614de8fac8c6d520a6b20b194e23c9936 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Fri, 7 Sep 2018 14:36:36 +0200
Subject: [PATCH 0115/1890] ubifs: Add authentication nodes to journal

Nodes that are written to flash can only be authenticated through the
index after the next commit. When a journal replay is necessary the
nodes are not yet referenced by the index and thus can't be
authenticated.

This patch overcomes this situation by creating a hash over all nodes
beginning from the commit start node over the reference node(s) and
the buds themselves. From
time to time we insert authentication nodes. Authentication nodes
contain a HMAC from the current hash state, so that they can be
used to authenticate a journal replay up to the point where the
authentication node is. The hash is continued afterwards
so that theoretically we would only have to check the HMAC of
the last authentication node we find.

Overall we get this picture:

,,,,,,,,
,......,...........................................
,. CS  ,               hash1.----.           hash2.----.
,.  |  ,                    .    |hmac            .    |hmac
,.  v  ,                    .    v                .    v
,.REF#0,-> bud -> bud -> bud.-> auth -> bud -> bud.-> auth ...
,..|...,...........................................
,  |   ,
,  |   ,,,,,,,,,,,,,,,
.  |            hash3,----.
,  |                 ,    |hmac
,  v                 ,    v
, REF#1 -> bud -> bud,-> auth ...
,,,|,,,,,,,,,,,,,,,,,,
   v
  REF#2 -> ...
   |
   V
  ...

Note how hash3 covers CS, REF#0 and REF#1 so that it is not possible to
exchange or skip any reference nodes. Unlike the picture suggests the
auth nodes themselves are not hashed.

With this it is possible for an offline attacker to cut each journal
head or to drop the last reference node(s), but not to skip any journal
heads or to reorder any operations.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/gc.c      |   3 +-
 fs/ubifs/journal.c | 124 ++++++++++++++++++++++++++++++++++++++-------
 fs/ubifs/log.c     |  24 +++++++++
 fs/ubifs/replay.c  |   2 +
 fs/ubifs/super.c   |  10 ++++
 fs/ubifs/ubifs.h   |   8 +++
 6 files changed, 153 insertions(+), 18 deletions(-)

diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index d2680e0b4a36f..399d764f83cb1 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -254,7 +254,8 @@ static int sort_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
 			     snod->type == UBIFS_DATA_NODE ||
 			     snod->type == UBIFS_DENT_NODE ||
 			     snod->type == UBIFS_XENT_NODE ||
-			     snod->type == UBIFS_TRUN_NODE);
+			     snod->type == UBIFS_TRUN_NODE ||
+			     snod->type == UBIFS_AUTH_NODE);
 
 		if (snod->type != UBIFS_INO_NODE  &&
 		    snod->type != UBIFS_DATA_NODE &&
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index 7c12bdfa9a7aa..729dc76c83dff 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -90,6 +90,12 @@ static inline void zero_trun_node_unused(struct ubifs_trun_node *trun)
 	memset(trun->padding, 0, 12);
 }
 
+static void ubifs_add_auth_dirt(struct ubifs_info *c, int lnum)
+{
+	if (ubifs_authenticated(c))
+		ubifs_add_dirt(c, lnum, ubifs_auth_node_sz(c));
+}
+
 /**
  * reserve_space - reserve space in the journal.
  * @c: UBIFS file-system description object
@@ -228,6 +234,35 @@ static int reserve_space(struct ubifs_info *c, int jhead, int len)
 	return err;
 }
 
+static int ubifs_hash_nodes(struct ubifs_info *c, void *node,
+			     int len, struct shash_desc *hash)
+{
+	int auth_node_size = ubifs_auth_node_sz(c);
+	int err;
+
+	while (1) {
+		const struct ubifs_ch *ch = node;
+		int nodelen = le32_to_cpu(ch->len);
+
+		ubifs_assert(c, len >= auth_node_size);
+
+		if (len == auth_node_size)
+			break;
+
+		ubifs_assert(c, len > nodelen);
+		ubifs_assert(c, ch->magic == cpu_to_le32(UBIFS_NODE_MAGIC));
+
+		err = ubifs_shash_update(c, hash, (void *)node, nodelen);
+		if (err)
+			return err;
+
+		node += ALIGN(nodelen, 8);
+		len -= ALIGN(nodelen, 8);
+	}
+
+	return ubifs_prepare_auth_node(c, node, hash);
+}
+
 /**
  * write_head - write data to a journal head.
  * @c: UBIFS file-system description object
@@ -255,6 +290,12 @@ static int write_head(struct ubifs_info *c, int jhead, void *buf, int len,
 	dbg_jnl("jhead %s, LEB %d:%d, len %d",
 		dbg_jhead(jhead), *lnum, *offs, len);
 
+	if (ubifs_authenticated(c)) {
+		err = ubifs_hash_nodes(c, buf, len, c->jheads[jhead].log_hash);
+		if (err)
+			return err;
+	}
+
 	err = ubifs_wbuf_write_nolock(wbuf, buf, len);
 	if (err)
 		return err;
@@ -543,7 +584,10 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
 
 	len = aligned_dlen + aligned_ilen + UBIFS_INO_NODE_SZ;
 	/* Make sure to also account for extended attributes */
-	len += host_ui->data_len;
+	if (ubifs_authenticated(c))
+		len += ALIGN(host_ui->data_len, 8) + ubifs_auth_node_sz(c);
+	else
+		len += host_ui->data_len;
 
 	dent = kzalloc(len, GFP_NOFS);
 	if (!dent)
@@ -611,6 +655,7 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
 	}
 	release_head(c, BASEHD);
 	kfree(dent);
+	ubifs_add_auth_dirt(c, lnum);
 
 	if (deletion) {
 		if (nm->hash)
@@ -690,8 +735,9 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
 			 const union ubifs_key *key, const void *buf, int len)
 {
 	struct ubifs_data_node *data;
-	int err, lnum, offs, compr_type, out_len, compr_len;
+	int err, lnum, offs, compr_type, out_len, compr_len, auth_len;
 	int dlen = COMPRESSED_DATA_NODE_BUF_SZ, allocated = 1;
+	int write_len;
 	struct ubifs_inode *ui = ubifs_inode(inode);
 	bool encrypted = ubifs_crypt_is_encrypted(inode);
 	u8 hash[UBIFS_HASH_ARR_SZ];
@@ -703,7 +749,9 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
 	if (encrypted)
 		dlen += UBIFS_CIPHER_BLOCK_SIZE;
 
-	data = kmalloc(dlen, GFP_NOFS | __GFP_NOWARN);
+	auth_len = ubifs_auth_node_sz(c);
+
+	data = kmalloc(dlen + auth_len, GFP_NOFS | __GFP_NOWARN);
 	if (!data) {
 		/*
 		 * Fall-back to the write reserve buffer. Note, we might be
@@ -742,15 +790,20 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
 	}
 
 	dlen = UBIFS_DATA_NODE_SZ + out_len;
+	if (ubifs_authenticated(c))
+		write_len = ALIGN(dlen, 8) + auth_len;
+	else
+		write_len = dlen;
+
 	data->compr_type = cpu_to_le16(compr_type);
 
 	/* Make reservation before allocating sequence numbers */
-	err = make_reservation(c, DATAHD, dlen);
+	err = make_reservation(c, DATAHD, write_len);
 	if (err)
 		goto out_free;
 
 	ubifs_prepare_node(c, data, dlen, 0);
-	err = write_head(c, DATAHD, data, dlen, &lnum, &offs, 0);
+	err = write_head(c, DATAHD, data, write_len, &lnum, &offs, 0);
 	if (err)
 		goto out_release;
 
@@ -761,6 +814,8 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
 	ubifs_wbuf_add_ino_nolock(&c->jheads[DATAHD].wbuf, key_inum(c, key));
 	release_head(c, DATAHD);
 
+	ubifs_add_auth_dirt(c, lnum);
+
 	err = ubifs_tnc_add(c, key, lnum, offs, dlen, hash);
 	if (err)
 		goto out_ro;
@@ -799,7 +854,8 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode)
 	int err, lnum, offs;
 	struct ubifs_ino_node *ino;
 	struct ubifs_inode *ui = ubifs_inode(inode);
-	int sync = 0, len = UBIFS_INO_NODE_SZ, last_reference = !inode->i_nlink;
+	int sync = 0, write_len, ilen = UBIFS_INO_NODE_SZ;
+	int last_reference = !inode->i_nlink;
 	u8 hash[UBIFS_HASH_ARR_SZ];
 
 	dbg_jnl("ino %lu, nlink %u", inode->i_ino, inode->i_nlink);
@@ -809,15 +865,21 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode)
 	 * need to synchronize the write-buffer either.
 	 */
 	if (!last_reference) {
-		len += ui->data_len;
+		ilen += ui->data_len;
 		sync = IS_SYNC(inode);
 	}
-	ino = kmalloc(len, GFP_NOFS);
+
+	if (ubifs_authenticated(c))
+		write_len = ALIGN(ilen, 8) + ubifs_auth_node_sz(c);
+	else
+		write_len = ilen;
+
+	ino = kmalloc(write_len, GFP_NOFS);
 	if (!ino)
 		return -ENOMEM;
 
 	/* Make reservation before allocating sequence numbers */
-	err = make_reservation(c, BASEHD, len);
+	err = make_reservation(c, BASEHD, write_len);
 	if (err)
 		goto out_free;
 
@@ -826,7 +888,7 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode)
 	if (err)
 		goto out_release;
 
-	err = write_head(c, BASEHD, ino, len, &lnum, &offs, sync);
+	err = write_head(c, BASEHD, ino, write_len, &lnum, &offs, sync);
 	if (err)
 		goto out_release;
 	if (!sync)
@@ -834,17 +896,19 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode)
 					  inode->i_ino);
 	release_head(c, BASEHD);
 
+	ubifs_add_auth_dirt(c, lnum);
+
 	if (last_reference) {
 		err = ubifs_tnc_remove_ino(c, inode->i_ino);
 		if (err)
 			goto out_ro;
 		ubifs_delete_orphan(c, inode->i_ino);
-		err = ubifs_add_dirt(c, lnum, len);
+		err = ubifs_add_dirt(c, lnum, ilen);
 	} else {
 		union ubifs_key key;
 
 		ino_key_init(c, &key, inode->i_ino);
-		err = ubifs_tnc_add(c, &key, lnum, offs, len, hash);
+		err = ubifs_tnc_add(c, &key, lnum, offs, ilen, hash);
 	}
 	if (err)
 		goto out_ro;
@@ -973,6 +1037,8 @@ int ubifs_jnl_xrename(struct ubifs_info *c, const struct inode *fst_dir,
 	if (twoparents)
 		len += plen;
 
+	len += ubifs_auth_node_sz(c);
+
 	dent1 = kzalloc(len, GFP_NOFS);
 	if (!dent1)
 		return -ENOMEM;
@@ -1042,6 +1108,8 @@ int ubifs_jnl_xrename(struct ubifs_info *c, const struct inode *fst_dir,
 	}
 	release_head(c, BASEHD);
 
+	ubifs_add_auth_dirt(c, lnum);
+
 	dent_key_init(c, &key, snd_dir->i_ino, snd_nm);
 	err = ubifs_tnc_add_nm(c, &key, lnum, offs, dlen1, hash_dent1, snd_nm);
 	if (err)
@@ -1143,6 +1211,9 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
 	len = aligned_dlen1 + aligned_dlen2 + ALIGN(ilen, 8) + ALIGN(plen, 8);
 	if (move)
 		len += plen;
+
+	len += ubifs_auth_node_sz(c);
+
 	dent = kzalloc(len, GFP_NOFS);
 	if (!dent)
 		return -ENOMEM;
@@ -1240,6 +1311,8 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
 	}
 	release_head(c, BASEHD);
 
+	ubifs_add_auth_dirt(c, lnum);
+
 	dent_key_init(c, &key, new_dir->i_ino, new_nm);
 	err = ubifs_tnc_add_nm(c, &key, lnum, offs, dlen1, hash_dent1, new_nm);
 	if (err)
@@ -1411,6 +1484,9 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
 
 	sz = UBIFS_TRUN_NODE_SZ + UBIFS_INO_NODE_SZ +
 	     UBIFS_MAX_DATA_NODE_SZ * WORST_COMPR_FACTOR;
+
+	sz += ubifs_auth_node_sz(c);
+
 	ino = kmalloc(sz, GFP_NOFS);
 	if (!ino)
 		return -ENOMEM;
@@ -1456,8 +1532,12 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
 
 	/* Must make reservation before allocating sequence numbers */
 	len = UBIFS_TRUN_NODE_SZ + UBIFS_INO_NODE_SZ;
-	if (dlen)
+
+	if (ubifs_authenticated(c))
+		len += ALIGN(dlen, 8) + ubifs_auth_node_sz(c);
+	else
 		len += dlen;
+
 	err = make_reservation(c, BASEHD, len);
 	if (err)
 		goto out_free;
@@ -1482,6 +1562,8 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
 		ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf, inum);
 	release_head(c, BASEHD);
 
+	ubifs_add_auth_dirt(c, lnum);
+
 	if (dlen) {
 		sz = offs + UBIFS_INO_NODE_SZ + UBIFS_TRUN_NODE_SZ;
 		err = ubifs_tnc_add(c, &key, lnum, sz, dlen, hash_dn);
@@ -1545,7 +1627,7 @@ int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host,
 			   const struct inode *inode,
 			   const struct fscrypt_name *nm)
 {
-	int err, xlen, hlen, len, lnum, xent_offs, aligned_xlen;
+	int err, xlen, hlen, len, lnum, xent_offs, aligned_xlen, write_len;
 	struct ubifs_dent_node *xent;
 	struct ubifs_ino_node *ino;
 	union ubifs_key xent_key, key1, key2;
@@ -1565,12 +1647,14 @@ int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host,
 	hlen = host_ui->data_len + UBIFS_INO_NODE_SZ;
 	len = aligned_xlen + UBIFS_INO_NODE_SZ + ALIGN(hlen, 8);
 
-	xent = kzalloc(len, GFP_NOFS);
+	write_len = len + ubifs_auth_node_sz(c);
+
+	xent = kzalloc(write_len, GFP_NOFS);
 	if (!xent)
 		return -ENOMEM;
 
 	/* Make reservation before allocating sequence numbers */
-	err = make_reservation(c, BASEHD, len);
+	err = make_reservation(c, BASEHD, write_len);
 	if (err) {
 		kfree(xent);
 		return err;
@@ -1595,10 +1679,12 @@ int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host,
 	if (err)
 		goto out_release;
 
-	err = write_head(c, BASEHD, xent, len, &lnum, &xent_offs, sync);
+	err = write_head(c, BASEHD, xent, write_len, &lnum, &xent_offs, sync);
 	if (!sync && !err)
 		ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf, host->i_ino);
 	release_head(c, BASEHD);
+
+	ubifs_add_auth_dirt(c, lnum);
 	kfree(xent);
 	if (err)
 		goto out_ro;
@@ -1680,6 +1766,8 @@ int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode,
 	aligned_len1 = ALIGN(len1, 8);
 	aligned_len = aligned_len1 + ALIGN(len2, 8);
 
+	aligned_len += ubifs_auth_node_sz(c);
+
 	ino = kzalloc(aligned_len, GFP_NOFS);
 	if (!ino)
 		return -ENOMEM;
@@ -1709,6 +1797,8 @@ int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode,
 	if (err)
 		goto out_ro;
 
+	ubifs_add_auth_dirt(c, lnum);
+
 	ino_key_init(c, &key, host->i_ino);
 	err = ubifs_tnc_add(c, &key, lnum, offs, len1, hash_host);
 	if (err)
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c
index 86b0828f54991..15fd854149bbf 100644
--- a/fs/ubifs/log.c
+++ b/fs/ubifs/log.c
@@ -236,6 +236,7 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs)
 	bud->lnum = lnum;
 	bud->start = offs;
 	bud->jhead = jhead;
+	bud->log_hash = NULL;
 
 	ref->ch.node_type = UBIFS_REF_NODE;
 	ref->lnum = cpu_to_le32(bud->lnum);
@@ -275,6 +276,14 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs)
 	if (err)
 		goto out_unlock;
 
+	err = ubifs_shash_update(c, c->log_hash, ref, UBIFS_REF_NODE_SZ);
+	if (err)
+		goto out_unlock;
+
+	err = ubifs_shash_copy_state(c, c->log_hash, c->jheads[jhead].log_hash);
+	if (err)
+		goto out_unlock;
+
 	c->lhead_offs += c->ref_node_alsz;
 
 	ubifs_add_bud(c, bud);
@@ -377,6 +386,14 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum)
 	cs->cmt_no = cpu_to_le64(c->cmt_no);
 	ubifs_prepare_node(c, cs, UBIFS_CS_NODE_SZ, 0);
 
+	err = ubifs_shash_init(c, c->log_hash);
+	if (err)
+		goto out;
+
+	err = ubifs_shash_update(c, c->log_hash, cs, UBIFS_CS_NODE_SZ);
+	if (err < 0)
+		goto out;
+
 	/*
 	 * Note, we do not lock 'c->log_mutex' because this is the commit start
 	 * phase and we are exclusively using the log. And we do not lock
@@ -402,6 +419,12 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum)
 
 		ubifs_prepare_node(c, ref, UBIFS_REF_NODE_SZ, 0);
 		len += UBIFS_REF_NODE_SZ;
+
+		err = ubifs_shash_update(c, c->log_hash, ref,
+					 UBIFS_REF_NODE_SZ);
+		if (err)
+			goto out;
+		ubifs_shash_copy_state(c, c->log_hash, c->jheads[i].log_hash);
 	}
 
 	ubifs_pad(c, buf + len, ALIGN(len, c->min_io_size) - len);
@@ -516,6 +539,7 @@ int ubifs_log_post_commit(struct ubifs_info *c, int old_ltail_lnum)
 		if (err)
 			return err;
 		list_del(&bud->list);
+		kfree(bud->log_hash);
 		kfree(bud);
 	}
 	mutex_lock(&c->log_mutex);
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index 1c6ceb6265aa3..db489d93439c1 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -666,6 +666,8 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
 					  old_size, new_size);
 			break;
 		}
+		case UBIFS_AUTH_NODE:
+			break;
 		default:
 			ubifs_err(c, "unexpected node type %d in bud LEB %d:%d",
 				  snod->type, lnum, snod->offs);
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 0194e3c0853f6..2722ca077d234 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -817,6 +817,9 @@ static int alloc_wbufs(struct ubifs_info *c)
 		c->jheads[i].wbuf.sync_callback = &bud_wbuf_callback;
 		c->jheads[i].wbuf.jhead = i;
 		c->jheads[i].grouped = 1;
+		c->jheads[i].log_hash = ubifs_hash_get_desc(c);
+		if (IS_ERR(c->jheads[i].log_hash))
+			goto out;
 	}
 
 	/*
@@ -827,6 +830,12 @@ static int alloc_wbufs(struct ubifs_info *c)
 	c->jheads[GCHD].grouped = 0;
 
 	return 0;
+
+out:
+	while (i--)
+		kfree(c->jheads[i].log_hash);
+
+	return err;
 }
 
 /**
@@ -841,6 +850,7 @@ static void free_wbufs(struct ubifs_info *c)
 		for (i = 0; i < c->jhead_cnt; i++) {
 			kfree(c->jheads[i].wbuf.buf);
 			kfree(c->jheads[i].wbuf.inodes);
+			kfree(c->jheads[i].log_hash);
 		}
 		kfree(c->jheads);
 		c->jheads = NULL;
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 67bfd58d28d46..600a25b93a809 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -717,6 +717,7 @@ struct ubifs_wbuf {
  * @jhead: journal head number this bud belongs to
  * @list: link in the list buds belonging to the same journal head
  * @rb: link in the tree of all buds
+ * @log_hash: the log hash from the commit start node up to this bud
  */
 struct ubifs_bud {
 	int lnum;
@@ -724,6 +725,7 @@ struct ubifs_bud {
 	int jhead;
 	struct list_head list;
 	struct rb_node rb;
+	struct shash_desc *log_hash;
 };
 
 /**
@@ -731,6 +733,7 @@ struct ubifs_bud {
  * @wbuf: head's write-buffer
  * @buds_list: list of bud LEBs belonging to this journal head
  * @grouped: non-zero if UBIFS groups nodes when writing to this journal head
+ * @log_hash: the log hash from the commit start node up to this journal head
  *
  * Note, the @buds list is protected by the @c->buds_lock.
  */
@@ -738,6 +741,7 @@ struct ubifs_jhead {
 	struct ubifs_wbuf wbuf;
 	struct list_head buds_list;
 	unsigned int grouped:1;
+	struct shash_desc *log_hash;
 };
 
 /**
@@ -1236,6 +1240,8 @@ struct ubifs_debug_info;
  * @auth_key_name: the authentication key name
  * @auth_hash_name: the name of the hash algorithm used for authentication
  * @auth_hash_algo: the authentication hash used for this fs
+ * @log_hash: the log hash from the commit start node up to the latest reference
+ *            node.
  *
  * @empty: %1 if the UBI device is empty
  * @need_recovery: %1 if the file-system needs recovery
@@ -1478,6 +1484,8 @@ struct ubifs_info {
 	char *auth_hash_name;
 	enum hash_algo auth_hash_algo;
 
+	struct shash_desc *log_hash;
+
 	/* The below fields are used only during mounting and re-mounting */
 	unsigned int empty:1;
 	unsigned int need_recovery:1;
-- 
GitLab


From 6f06d96fdf624be3e1d65c6100704a1fd79a30b7 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Fri, 7 Sep 2018 14:36:37 +0200
Subject: [PATCH 0116/1890] ubifs: Add auth nodes to garbage collector journal
 head

To be able to authenticate the garbage collector journal head add
authentication nodes to the buds the garbage collector creates.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/gc.c | 46 +++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 43 insertions(+), 3 deletions(-)

diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index 399d764f83cb1..bf75fdc76fc35 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -365,12 +365,13 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
 
 	/* Write nodes to their new location. Use the first-fit strategy */
 	while (1) {
-		int avail;
+		int avail, moved = 0;
 		struct ubifs_scan_node *snod, *tmp;
 
 		/* Move data nodes */
 		list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) {
-			avail = c->leb_size - wbuf->offs - wbuf->used;
+			avail = c->leb_size - wbuf->offs - wbuf->used -
+					ubifs_auth_node_sz(c);
 			if  (snod->len > avail)
 				/*
 				 * Do not skip data nodes in order to optimize
@@ -378,14 +379,21 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
 				 */
 				break;
 
+			err = ubifs_shash_update(c, c->jheads[GCHD].log_hash,
+						 snod->node, snod->len);
+			if (err)
+				goto out;
+
 			err = move_node(c, sleb, snod, wbuf);
 			if (err)
 				goto out;
+			moved = 1;
 		}
 
 		/* Move non-data nodes */
 		list_for_each_entry_safe(snod, tmp, &nondata, list) {
-			avail = c->leb_size - wbuf->offs - wbuf->used;
+			avail = c->leb_size - wbuf->offs - wbuf->used -
+					ubifs_auth_node_sz(c);
 			if (avail < min)
 				break;
 
@@ -403,9 +411,41 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
 				continue;
 			}
 
+			err = ubifs_shash_update(c, c->jheads[GCHD].log_hash,
+						 snod->node, snod->len);
+			if (err)
+				goto out;
+
 			err = move_node(c, sleb, snod, wbuf);
 			if (err)
 				goto out;
+			moved = 1;
+		}
+
+		if (ubifs_authenticated(c) && moved) {
+			struct ubifs_auth_node *auth;
+
+			auth = kmalloc(ubifs_auth_node_sz(c), GFP_NOFS);
+			if (!auth) {
+				err = -ENOMEM;
+				goto out;
+			}
+
+			err = ubifs_prepare_auth_node(c, auth,
+						c->jheads[GCHD].log_hash);
+			if (err) {
+				kfree(auth);
+				goto out;
+			}
+
+			err = ubifs_wbuf_write_nolock(wbuf, auth,
+						      ubifs_auth_node_sz(c));
+			if (err) {
+				kfree(auth);
+				goto out;
+			}
+
+			ubifs_add_dirt(c, wbuf->lnum, ubifs_auth_node_sz(c));
 		}
 
 		if (list_empty(&sleb->nodes) && list_empty(&nondata))
-- 
GitLab


From da8ef65f9573952c717d86f8f501773daf29bd10 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Fri, 7 Sep 2018 14:36:38 +0200
Subject: [PATCH 0117/1890] ubifs: Authenticate replayed journal

Make sure that during replay all buds can be authenticated. To do
this we calculate the hash chain until we find an authentication
node and check the HMAC in that node against the current status
of the hash chain.

After a power cut it can happen that some nodes have been written, but
not yet the authentication node for them. These nodes have to be
discarded during replay.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/replay.c | 146 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 144 insertions(+), 2 deletions(-)

diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index db489d93439c1..75f961c4c0449 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -34,6 +34,8 @@
 
 #include "ubifs.h"
 #include <linux/list_sort.h>
+#include <crypto/hash.h>
+#include <crypto/algapi.h>
 
 /**
  * struct replay_entry - replay list entry.
@@ -531,6 +533,105 @@ static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud)
 	return data == 0xFFFFFFFF;
 }
 
+/**
+ * authenticate_sleb - authenticate one scan LEB
+ * @c: UBIFS file-system description object
+ * @sleb: the scan LEB to authenticate
+ * @log_hash:
+ * @is_last: if true, this is is the last LEB
+ *
+ * This function iterates over the buds of a single LEB authenticating all buds
+ * with the authentication nodes on this LEB. Authentication nodes are written
+ * after some buds and contain a HMAC covering the authentication node itself
+ * and the buds between the last authentication node and the current
+ * authentication node. It can happen that the last buds cannot be authenticated
+ * because a powercut happened when some nodes were written but not the
+ * corresponding authentication node. This function returns the number of nodes
+ * that could be authenticated or a negative error code.
+ */
+static int authenticate_sleb(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
+			     struct shash_desc *log_hash, int is_last)
+{
+	int n_not_auth = 0;
+	struct ubifs_scan_node *snod;
+	int n_nodes = 0;
+	int err;
+	u8 *hash, *hmac;
+
+	if (!ubifs_authenticated(c))
+		return sleb->nodes_cnt;
+
+	hash = kmalloc(crypto_shash_descsize(c->hash_tfm), GFP_NOFS);
+	hmac = kmalloc(c->hmac_desc_len, GFP_NOFS);
+	if (!hash || !hmac) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	list_for_each_entry(snod, &sleb->nodes, list) {
+
+		n_nodes++;
+
+		if (snod->type == UBIFS_AUTH_NODE) {
+			struct ubifs_auth_node *auth = snod->node;
+			SHASH_DESC_ON_STACK(hash_desc, c->hash_tfm);
+			SHASH_DESC_ON_STACK(hmac_desc, c->hmac_tfm);
+
+			hash_desc->tfm = c->hash_tfm;
+			hash_desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+			ubifs_shash_copy_state(c, log_hash, hash_desc);
+			err = crypto_shash_final(hash_desc, hash);
+			if (err)
+				goto out;
+
+			hmac_desc->tfm = c->hmac_tfm;
+			hmac_desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+			err = crypto_shash_digest(hmac_desc, hash, c->hash_len,
+						  hmac);
+			if (err)
+				goto out;
+
+			err = ubifs_check_hmac(c, auth->hmac, hmac);
+			if (err) {
+				err = -EPERM;
+				goto out;
+			}
+			n_not_auth = 0;
+		} else {
+			err = crypto_shash_update(log_hash, snod->node,
+						  snod->len);
+			if (err)
+				goto out;
+			n_not_auth++;
+		}
+	}
+
+	/*
+	 * A powercut can happen when some nodes were written, but not yet
+	 * the corresponding authentication node. This may only happen on
+	 * the last bud though.
+	 */
+	if (n_not_auth) {
+		if (is_last) {
+			dbg_mnt("%d unauthenticated nodes found on LEB %d, Ignoring them",
+				n_not_auth, sleb->lnum);
+			err = 0;
+		} else {
+			dbg_mnt("%d unauthenticated nodes found on non-last LEB %d",
+				n_not_auth, sleb->lnum);
+			err = -EPERM;
+		}
+	} else {
+		err = 0;
+	}
+out:
+	kfree(hash);
+	kfree(hmac);
+
+	return err ? err : n_nodes - n_not_auth;
+}
+
 /**
  * replay_bud - replay a bud logical eraseblock.
  * @c: UBIFS file-system description object
@@ -544,6 +645,7 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
 {
 	int is_last = is_last_bud(c, b->bud);
 	int err = 0, used = 0, lnum = b->bud->lnum, offs = b->bud->start;
+	int n_nodes, n = 0;
 	struct ubifs_scan_leb *sleb;
 	struct ubifs_scan_node *snod;
 
@@ -563,6 +665,15 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
 	if (IS_ERR(sleb))
 		return PTR_ERR(sleb);
 
+	n_nodes = authenticate_sleb(c, sleb, b->bud->log_hash, is_last);
+	if (n_nodes < 0) {
+		err = n_nodes;
+		goto out;
+	}
+
+	ubifs_shash_copy_state(c, b->bud->log_hash,
+			       c->jheads[b->bud->jhead].log_hash);
+
 	/*
 	 * The bud does not have to start from offset zero - the beginning of
 	 * the 'lnum' LEB may contain previously committed data. One of the
@@ -676,6 +787,10 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
 		}
 		if (err)
 			goto out;
+
+		n++;
+		if (n == n_nodes)
+			break;
 	}
 
 	ubifs_assert(c, ubifs_search_bud(c, lnum));
@@ -754,6 +869,7 @@ static int add_replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
 {
 	struct ubifs_bud *bud;
 	struct bud_entry *b;
+	int err;
 
 	dbg_mnt("add replay bud LEB %d:%d, head %d", lnum, offs, jhead);
 
@@ -763,13 +879,21 @@ static int add_replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
 
 	b = kmalloc(sizeof(struct bud_entry), GFP_KERNEL);
 	if (!b) {
-		kfree(bud);
-		return -ENOMEM;
+		err = -ENOMEM;
+		goto out;
 	}
 
 	bud->lnum = lnum;
 	bud->start = offs;
 	bud->jhead = jhead;
+	bud->log_hash = ubifs_hash_get_desc(c);
+	if (IS_ERR(bud->log_hash)) {
+		err = PTR_ERR(bud->log_hash);
+		goto out;
+	}
+
+	ubifs_shash_copy_state(c, c->log_hash, bud->log_hash);
+
 	ubifs_add_bud(c, bud);
 
 	b->bud = bud;
@@ -777,6 +901,11 @@ static int add_replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
 	list_add_tail(&b->list, &c->replay_buds);
 
 	return 0;
+out:
+	kfree(bud);
+	kfree(b);
+
+	return err;
 }
 
 /**
@@ -882,6 +1011,14 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
 
 		c->cs_sqnum = le64_to_cpu(node->ch.sqnum);
 		dbg_mnt("commit start sqnum %llu", c->cs_sqnum);
+
+		err = ubifs_shash_init(c, c->log_hash);
+		if (err)
+			goto out;
+
+		err = ubifs_shash_update(c, c->log_hash, node, UBIFS_CS_NODE_SZ);
+		if (err < 0)
+			goto out;
 	}
 
 	if (snod->sqnum < c->cs_sqnum) {
@@ -929,6 +1066,11 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
 			if (err)
 				goto out_dump;
 
+			err = ubifs_shash_update(c, c->log_hash, ref,
+						 UBIFS_REF_NODE_SZ);
+			if (err)
+				goto out;
+
 			err = add_replay_bud(c, le32_to_cpu(ref->lnum),
 					     le32_to_cpu(ref->offs),
 					     le32_to_cpu(ref->jhead),
-- 
GitLab


From a1dc58140f7e63e3b23050eb43b4e5581cb28c88 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Fri, 7 Sep 2018 14:36:39 +0200
Subject: [PATCH 0118/1890] ubifs: authentication: Authenticate LPT

The LPT needs to be authenticated aswell. Since the LPT is only written
during commit it is enough to authenticate the whole LPT with a single
hash which is stored in the master node. Only the leaf nodes (pnodes)
are hashed which makes the implementation much simpler than it would be
to hash the complete LPT.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/lpt.c        | 129 ++++++++++++++++++++++++++++++++++++++++++
 fs/ubifs/lpt_commit.c |   4 ++
 fs/ubifs/ubifs.h      |   1 +
 3 files changed, 134 insertions(+)

diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
index 5f57af224b8f9..ee18305a6152d 100644
--- a/fs/ubifs/lpt.c
+++ b/fs/ubifs/lpt.c
@@ -1635,6 +1635,131 @@ struct ubifs_lprops *ubifs_lpt_lookup_dirty(struct ubifs_info *c, int lnum)
 	return &pnode->lprops[iip];
 }
 
+/**
+ * ubifs_lpt_calc_hash - Calculate hash of the LPT pnodes
+ * @c: UBIFS file-system description object
+ * @hash: the returned hash of the LPT pnodes
+ *
+ * This function iterates over the LPT pnodes and creates a hash over them.
+ * Returns 0 for success or a negative error code otherwise.
+ */
+int ubifs_lpt_calc_hash(struct ubifs_info *c, u8 *hash)
+{
+	struct ubifs_nnode *nnode, *nn;
+	struct ubifs_cnode *cnode;
+	struct shash_desc *desc;
+	int iip = 0, i;
+	int bufsiz = max_t(int, c->nnode_sz, c->pnode_sz);
+	void *buf;
+	int err;
+
+	if (!ubifs_authenticated(c))
+		return 0;
+
+	desc = ubifs_hash_get_desc(c);
+	if (IS_ERR(desc))
+		return PTR_ERR(desc);
+
+	buf = kmalloc(bufsiz, GFP_NOFS);
+	if (!buf) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	if (!c->nroot) {
+		err = ubifs_read_nnode(c, NULL, 0);
+		if (err)
+			return err;
+	}
+
+	cnode = (struct ubifs_cnode *)c->nroot;
+
+	while (cnode) {
+		nnode = cnode->parent;
+		nn = (struct ubifs_nnode *)cnode;
+		if (cnode->level > 1) {
+			while (iip < UBIFS_LPT_FANOUT) {
+				if (nn->nbranch[iip].lnum == 0) {
+					/* Go right */
+					iip++;
+					continue;
+				}
+
+				nnode = ubifs_get_nnode(c, nn, iip);
+				if (IS_ERR(nnode)) {
+					err = PTR_ERR(nnode);
+					goto out;
+				}
+
+				/* Go down */
+				iip = 0;
+				cnode = (struct ubifs_cnode *)nnode;
+				break;
+			}
+			if (iip < UBIFS_LPT_FANOUT)
+				continue;
+		} else {
+			struct ubifs_pnode *pnode;
+
+			for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
+				if (nn->nbranch[i].lnum == 0)
+					continue;
+				pnode = ubifs_get_pnode(c, nn, i);
+				if (IS_ERR(pnode)) {
+					err = PTR_ERR(pnode);
+					goto out;
+				}
+
+				ubifs_pack_pnode(c, buf, pnode);
+				err = ubifs_shash_update(c, desc, buf,
+							 c->pnode_sz);
+				if (err)
+					goto out;
+			}
+		}
+		/* Go up and to the right */
+		iip = cnode->iip + 1;
+		cnode = (struct ubifs_cnode *)nnode;
+	}
+
+	err = ubifs_shash_final(c, desc, hash);
+out:
+	kfree(desc);
+	kfree(buf);
+
+	return err;
+}
+
+/**
+ * lpt_check_hash - check the hash of the LPT.
+ * @c: UBIFS file-system description object
+ *
+ * This function calculates a hash over all pnodes in the LPT and compares it with
+ * the hash stored in the master node. Returns %0 on success and a negative error
+ * code on failure.
+ */
+static int lpt_check_hash(struct ubifs_info *c)
+{
+	int err;
+	u8 hash[UBIFS_HASH_ARR_SZ];
+
+	if (!ubifs_authenticated(c))
+		return 0;
+
+	err = ubifs_lpt_calc_hash(c, hash);
+	if (err)
+		return err;
+
+	if (ubifs_check_hash(c, c->mst_node->hash_lpt, hash)) {
+		err = -EPERM;
+		ubifs_err(c, "Failed to authenticate LPT");
+	} else {
+		err = 0;
+	}
+
+	return err;
+}
+
 /**
  * lpt_init_rd - initialize the LPT for reading.
  * @c: UBIFS file-system description object
@@ -1676,6 +1801,10 @@ static int lpt_init_rd(struct ubifs_info *c)
 	if (err)
 		return err;
 
+	err = lpt_check_hash(c);
+	if (err)
+		return err;
+
 	dbg_lp("space_bits %d", c->space_bits);
 	dbg_lp("lpt_lnum_bits %d", c->lpt_lnum_bits);
 	dbg_lp("lpt_offs_bits %d", c->lpt_offs_bits);
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index 62d6a87d4f5d0..1f88caffdf2ac 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -1247,6 +1247,10 @@ int ubifs_lpt_start_commit(struct ubifs_info *c)
 	if (err)
 		goto out;
 
+	err = ubifs_lpt_calc_hash(c, c->mst_node->hash_lpt);
+	if (err)
+		goto out;
+
 	/* Copy the LPT's own lprops for end commit to write */
 	memcpy(c->ltab_cmt, c->ltab,
 	       sizeof(struct ubifs_lpt_lprops) * c->lpt_lebs);
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 600a25b93a809..7e519a4885a8c 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1961,6 +1961,7 @@ struct ubifs_nnode *ubifs_first_nnode(struct ubifs_info *c, int *hght);
 /* Needed only in debugging code in lpt_commit.c */
 int ubifs_unpack_nnode(const struct ubifs_info *c, void *buf,
 		       struct ubifs_nnode *nnode);
+int ubifs_lpt_calc_hash(struct ubifs_info *c, u8 *hash);
 
 /* lpt_commit.c */
 int ubifs_lpt_start_commit(struct ubifs_info *c);
-- 
GitLab


From 625700ccb5069ec81d15aae3b47282ecc59d63b5 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Fri, 7 Sep 2018 14:36:40 +0200
Subject: [PATCH 0119/1890] ubfis: authentication: Authenticate master node

The master node contains hashes over the root index node and the LPT.
This patch adds a HMAC to authenticate the master node itself.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/master.c   | 61 ++++++++++++++++++++++++++++++++++++++++-----
 fs/ubifs/recovery.c |  9 ++++---
 fs/ubifs/ubifs.h    |  1 +
 3 files changed, 61 insertions(+), 10 deletions(-)

diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c
index 0ca9d3513b4d7..5ea51bbd14c7f 100644
--- a/fs/ubifs/master.c
+++ b/fs/ubifs/master.c
@@ -24,6 +24,42 @@
 
 #include "ubifs.h"
 
+/**
+ * ubifs_compare_master_node - compare two UBIFS master nodes
+ * @c: UBIFS file-system description object
+ * @m1: the first node
+ * @m2: the second node
+ *
+ * This function compares two UBIFS master nodes. Returns 0 if they are equal
+ * and nonzero if not.
+ */
+int ubifs_compare_master_node(struct ubifs_info *c, void *m1, void *m2)
+{
+	int ret;
+	int behind;
+	int hmac_offs = offsetof(struct ubifs_mst_node, hmac);
+
+	/*
+	 * Do not compare the common node header since the sequence number and
+	 * hence the CRC are different.
+	 */
+	ret = memcmp(m1 + UBIFS_CH_SZ, m2 + UBIFS_CH_SZ,
+		     hmac_offs - UBIFS_CH_SZ);
+	if (ret)
+		return ret;
+
+	/*
+	 * Do not compare the embedded HMAC aswell which also must be different
+	 * due to the different common node header.
+	 */
+	behind = hmac_offs + UBIFS_MAX_HMAC_LEN;
+
+	if (UBIFS_MST_NODE_SZ > behind)
+		return memcmp(m1 + behind, m2 + behind, UBIFS_MST_NODE_SZ - behind);
+
+	return 0;
+}
+
 /**
  * scan_for_master - search the valid master node.
  * @c: UBIFS file-system description object
@@ -37,7 +73,7 @@ static int scan_for_master(struct ubifs_info *c)
 {
 	struct ubifs_scan_leb *sleb;
 	struct ubifs_scan_node *snod;
-	int lnum, offs = 0, nodes_cnt;
+	int lnum, offs = 0, nodes_cnt, err;
 
 	lnum = UBIFS_MST_LNUM;
 
@@ -69,12 +105,23 @@ static int scan_for_master(struct ubifs_info *c)
 		goto out_dump;
 	if (snod->offs != offs)
 		goto out;
-	if (memcmp((void *)c->mst_node + UBIFS_CH_SZ,
-		   (void *)snod->node + UBIFS_CH_SZ,
-		   UBIFS_MST_NODE_SZ - UBIFS_CH_SZ))
+	if (ubifs_compare_master_node(c, c->mst_node, snod->node))
 		goto out;
+
 	c->mst_offs = offs;
 	ubifs_scan_destroy(sleb);
+
+	if (!ubifs_authenticated(c))
+		return 0;
+
+	err = ubifs_node_verify_hmac(c, c->mst_node,
+				     sizeof(struct ubifs_mst_node),
+				     offsetof(struct ubifs_mst_node, hmac));
+	if (err) {
+		ubifs_err(c, "Failed to verify master node HMAC");
+		return -EPERM;
+	}
+
 	return 0;
 
 out:
@@ -381,7 +428,8 @@ int ubifs_write_master(struct ubifs_info *c)
 	c->mst_node->highest_inum = cpu_to_le64(c->highest_inum);
 
 	ubifs_copy_hash(c, c->zroot.hash, c->mst_node->hash_root_idx);
-	err = ubifs_write_node(c, c->mst_node, len, lnum, offs);
+	err = ubifs_write_node_hmac(c, c->mst_node, len, lnum, offs,
+				    offsetof(struct ubifs_mst_node, hmac));
 	if (err)
 		return err;
 
@@ -392,7 +440,8 @@ int ubifs_write_master(struct ubifs_info *c)
 		if (err)
 			return err;
 	}
-	err = ubifs_write_node(c, c->mst_node, len, lnum, offs);
+	err = ubifs_write_node_hmac(c, c->mst_node, len, lnum, offs,
+				    offsetof(struct ubifs_mst_node, hmac));
 
 	return err;
 }
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index 984e30e83c0b3..5c1334e6bc81f 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -212,7 +212,10 @@ static int write_rcvrd_mst_node(struct ubifs_info *c,
 	save_flags = mst->flags;
 	mst->flags |= cpu_to_le32(UBIFS_MST_RCVRY);
 
-	ubifs_prepare_node(c, mst, UBIFS_MST_NODE_SZ, 1);
+	err = ubifs_prepare_node_hmac(c, mst, UBIFS_MST_NODE_SZ,
+				      offsetof(struct ubifs_mst_node, hmac), 1);
+	if (err)
+		goto out;
 	err = ubifs_leb_change(c, lnum, mst, sz);
 	if (err)
 		goto out;
@@ -264,9 +267,7 @@ int ubifs_recover_master_node(struct ubifs_info *c)
 			offs2 = (void *)mst2 - buf2;
 			if (offs1 == offs2) {
 				/* Same offset, so must be the same */
-				if (memcmp((void *)mst1 + UBIFS_CH_SZ,
-					   (void *)mst2 + UBIFS_CH_SZ,
-					   UBIFS_MST_NODE_SZ - UBIFS_CH_SZ))
+				if (ubifs_compare_master_node(c, mst1, mst2))
 					goto out_err;
 				mst = mst1;
 			} else if (offs2 + sz == offs1) {
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 7e519a4885a8c..c26d3c600e4d4 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1900,6 +1900,7 @@ int ubifs_gc_should_commit(struct ubifs_info *c);
 void ubifs_wait_for_commit(struct ubifs_info *c);
 
 /* master.c */
+int ubifs_compare_master_node(struct ubifs_info *c, void *m1, void *m2);
 int ubifs_read_master(struct ubifs_info *c);
 int ubifs_write_master(struct ubifs_info *c);
 
-- 
GitLab


From b5b1f08369222394540fb4b3b947fe26a2557d1d Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Fri, 7 Sep 2018 14:36:41 +0200
Subject: [PATCH 0120/1890] ubifs: Create hash for default LPT

During creation of the default filesystem on an empty flash the default
LPT is created. With this patch a hash over the default LPT is
calculated which can be added to the default filesystems master node.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/lpt.c   | 21 ++++++++++++++++++++-
 fs/ubifs/sb.c    |  3 ++-
 fs/ubifs/ubifs.h |  2 +-
 3 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
index ee18305a6152d..d1d5e96350ddb 100644
--- a/fs/ubifs/lpt.c
+++ b/fs/ubifs/lpt.c
@@ -604,11 +604,12 @@ static int calc_pnode_num_from_parent(const struct ubifs_info *c,
  * @lpt_first: LEB number of first LPT LEB
  * @lpt_lebs: number of LEBs for LPT is passed and returned here
  * @big_lpt: use big LPT model is passed and returned here
+ * @hash: hash of the LPT is returned here
  *
  * This function returns %0 on success and a negative error code on failure.
  */
 int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
-			  int *lpt_lebs, int *big_lpt)
+			  int *lpt_lebs, int *big_lpt, u8 *hash)
 {
 	int lnum, err = 0, node_sz, iopos, i, j, cnt, len, alen, row;
 	int blnum, boffs, bsz, bcnt;
@@ -617,6 +618,7 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
 	void *buf = NULL, *p;
 	struct ubifs_lpt_lprops *ltab = NULL;
 	int *lsave = NULL;
+	struct shash_desc *desc;
 
 	err = calc_dflt_lpt_geom(c, main_lebs, big_lpt);
 	if (err)
@@ -630,6 +632,10 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
 	/* Needed by 'ubifs_pack_lsave()' */
 	c->main_first = c->leb_cnt - *main_lebs;
 
+	desc = ubifs_hash_get_desc(c);
+	if (IS_ERR(desc))
+		return PTR_ERR(desc);
+
 	lsave = kmalloc_array(c->lsave_cnt, sizeof(int), GFP_KERNEL);
 	pnode = kzalloc(sizeof(struct ubifs_pnode), GFP_KERNEL);
 	nnode = kzalloc(sizeof(struct ubifs_nnode), GFP_KERNEL);
@@ -677,6 +683,10 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
 
 	/* Add first pnode */
 	ubifs_pack_pnode(c, p, pnode);
+	err = ubifs_shash_update(c, desc, p, c->pnode_sz);
+	if (err)
+		goto out;
+
 	p += c->pnode_sz;
 	len = c->pnode_sz;
 	pnode->num += 1;
@@ -711,6 +721,10 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
 			len = 0;
 		}
 		ubifs_pack_pnode(c, p, pnode);
+		err = ubifs_shash_update(c, desc, p, c->pnode_sz);
+		if (err)
+			goto out;
+
 		p += c->pnode_sz;
 		len += c->pnode_sz;
 		/*
@@ -830,6 +844,10 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
 	if (err)
 		goto out;
 
+	err = ubifs_shash_final(c, desc, hash);
+	if (err)
+		goto out;
+
 	c->nhead_lnum = lnum;
 	c->nhead_offs = ALIGN(len, c->min_io_size);
 
@@ -853,6 +871,7 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
 		dbg_lp("LPT lsave is at %d:%d", c->lsave_lnum, c->lsave_offs);
 out:
 	c->ltab = NULL;
+	kfree(desc);
 	kfree(lsave);
 	vfree(ltab);
 	vfree(buf);
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index 7e08c81433165..5727565795327 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -87,6 +87,7 @@ static int create_default_filesystem(struct ubifs_info *c)
 	__le64 tmp_le64;
 	__le32 tmp_le32;
 	struct timespec64 ts;
+	u8 hash_lpt[UBIFS_HASH_ARR_SZ];
 
 	/* Some functions called from here depend on the @c->key_len filed */
 	c->key_len = UBIFS_SK_LEN;
@@ -148,7 +149,7 @@ static int create_default_filesystem(struct ubifs_info *c)
 	c->lsave_cnt = DEFAULT_LSAVE_CNT;
 	c->max_leb_cnt = c->leb_cnt;
 	err = ubifs_create_dflt_lpt(c, &main_lebs, lpt_first, &lpt_lebs,
-				    &big_lpt);
+				    &big_lpt, hash_lpt);
 	if (err)
 		return err;
 
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index c26d3c600e4d4..504b651b78f10 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1934,7 +1934,7 @@ int ubifs_clear_orphans(struct ubifs_info *c);
 /* lpt.c */
 int ubifs_calc_lpt_geom(struct ubifs_info *c);
 int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
-			  int *lpt_lebs, int *big_lpt);
+			  int *lpt_lebs, int *big_lpt, u8 *hash);
 int ubifs_lpt_init(struct ubifs_info *c, int rd, int wr);
 struct ubifs_lprops *ubifs_lpt_lookup(struct ubifs_info *c, int lnum);
 struct ubifs_lprops *ubifs_lpt_lookup_dirty(struct ubifs_info *c, int lnum);
-- 
GitLab


From e158e02ff70038936d374928a5484f16daa4c7d4 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Fri, 7 Sep 2018 14:36:42 +0200
Subject: [PATCH 0121/1890] ubifs: authentication: Authenticate super block
 node

This adds a HMAC covering the super block node and adds the logic that
decides if a filesystem shall be mounted unauthenticated or
authenticated.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/sb.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 69 insertions(+), 1 deletion(-)

diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index 5727565795327..7f0967771c9f7 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -517,6 +517,65 @@ static struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c)
 	return sup;
 }
 
+static int authenticate_sb_node(struct ubifs_info *c,
+				const struct ubifs_sb_node *sup)
+{
+	unsigned int sup_flags = le32_to_cpu(sup->flags);
+	u8 hmac_wkm[UBIFS_HMAC_ARR_SZ];
+	int authenticated = !!(sup_flags & UBIFS_FLG_AUTHENTICATION);
+	int hash_algo;
+	int err;
+
+	if (c->authenticated && !authenticated) {
+		ubifs_err(c, "authenticated FS forced, but found FS without authentication");
+		return -EINVAL;
+	}
+
+	if (!c->authenticated && authenticated) {
+		ubifs_err(c, "authenticated FS found, but no key given");
+		return -EINVAL;
+	}
+
+	ubifs_msg(c, "Mounting in %sauthenticated mode",
+		  c->authenticated ? "" : "un");
+
+	if (!c->authenticated)
+		return 0;
+
+	if (!IS_ENABLED(CONFIG_UBIFS_FS_AUTHENTICATION))
+		return -EOPNOTSUPP;
+
+	hash_algo = le16_to_cpu(sup->hash_algo);
+	if (hash_algo >= HASH_ALGO__LAST) {
+		ubifs_err(c, "superblock uses unknown hash algo %d",
+			  hash_algo);
+		return -EINVAL;
+	}
+
+	if (strcmp(hash_algo_name[hash_algo], c->auth_hash_name)) {
+		ubifs_err(c, "This filesystem uses %s for hashing,"
+			     " but %s is specified", hash_algo_name[hash_algo],
+			     c->auth_hash_name);
+		return -EINVAL;
+	}
+
+	err = ubifs_hmac_wkm(c, hmac_wkm);
+	if (err)
+		return err;
+
+	if (ubifs_check_hmac(c, hmac_wkm, sup->hmac_wkm)) {
+		ubifs_err(c, "provided key does not fit");
+		return -ENOKEY;
+	}
+
+	err = ubifs_node_verify_hmac(c, sup, sizeof(*sup),
+				     offsetof(struct ubifs_sb_node, hmac));
+	if (err)
+		ubifs_err(c, "Failed to authenticate superblock: %d", err);
+
+	return err;
+}
+
 /**
  * ubifs_write_sb_node - write superblock node.
  * @c: UBIFS file-system description object
@@ -527,8 +586,13 @@ static struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c)
 int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup)
 {
 	int len = ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size);
+	int err;
+
+	err = ubifs_prepare_node_hmac(c, sup, UBIFS_SB_NODE_SZ,
+				      offsetof(struct ubifs_sb_node, hmac), 1);
+	if (err)
+		return err;
 
-	ubifs_prepare_node(c, sup, UBIFS_SB_NODE_SZ, 1);
 	return ubifs_leb_change(c, UBIFS_SB_LNUM, sup, len);
 }
 
@@ -642,6 +706,10 @@ int ubifs_read_superblock(struct ubifs_info *c)
 	c->double_hash = !!(sup_flags & UBIFS_FLG_DOUBLE_HASH);
 	c->encrypted = !!(sup_flags & UBIFS_FLG_ENCRYPTION);
 
+	err = authenticate_sb_node(c, sup);
+	if (err)
+		goto out;
+
 	if ((sup_flags & ~UBIFS_FLG_MASK) != 0) {
 		ubifs_err(c, "Unknown feature flags found: %#x",
 			  sup_flags & ~UBIFS_FLG_MASK);
-- 
GitLab


From 104115a3eb54e7e804cd4ef1d6426c0b8aaaeb60 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Fri, 7 Sep 2018 14:36:43 +0200
Subject: [PATCH 0122/1890] ubifs: Add hashes and HMACs to default filesystem

This patch calculates the necessary hashes and HMACs for the default
filesystem so that the dynamically created default fs can be
authenticated.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/sb.c | 34 +++++++++++++++++++++++++++-------
 1 file changed, 27 insertions(+), 7 deletions(-)

diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index 7f0967771c9f7..ac1ed5ad126db 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -87,6 +87,7 @@ static int create_default_filesystem(struct ubifs_info *c)
 	__le64 tmp_le64;
 	__le32 tmp_le32;
 	struct timespec64 ts;
+	u8 hash[UBIFS_HASH_ARR_SZ];
 	u8 hash_lpt[UBIFS_HASH_ARR_SZ];
 
 	/* Some functions called from here depend on the @c->key_len filed */
@@ -177,6 +178,16 @@ static int create_default_filesystem(struct ubifs_info *c)
 		sup_flags |= UBIFS_FLG_BIGLPT;
 	sup_flags |= UBIFS_FLG_DOUBLE_HASH;
 
+	if (ubifs_authenticated(c)) {
+		sup_flags |= UBIFS_FLG_AUTHENTICATION;
+		sup->hash_algo = cpu_to_le16(c->auth_hash_algo);
+		err = ubifs_hmac_wkm(c, sup->hmac_wkm);
+		if (err)
+			goto out;
+	} else {
+		sup->hash_algo = 0xffff;
+	}
+
 	sup->ch.node_type  = UBIFS_SB_NODE;
 	sup->key_hash      = UBIFS_KEY_HASH_R5;
 	sup->flags         = cpu_to_le32(sup_flags);
@@ -235,6 +246,7 @@ static int create_default_filesystem(struct ubifs_info *c)
 	mst->empty_lebs   = cpu_to_le32(main_lebs - 2);
 	mst->idx_lebs     = cpu_to_le32(1);
 	mst->leb_cnt      = cpu_to_le32(c->leb_cnt);
+	ubifs_copy_hash(c, hash_lpt, mst->hash_lpt);
 
 	/* Calculate lprops statistics */
 	tmp64 = main_bytes;
@@ -307,25 +319,33 @@ static int create_default_filesystem(struct ubifs_info *c)
 
 	cs->ch.node_type = UBIFS_CS_NODE;
 
-	err = ubifs_write_node(c, sup, UBIFS_SB_NODE_SZ, 0, 0);
+	err = ubifs_write_node_hmac(c, sup, UBIFS_SB_NODE_SZ, 0, 0,
+				    offsetof(struct ubifs_sb_node, hmac));
 	if (err)
 		goto out;
 
-	err = ubifs_write_node(c, mst, UBIFS_MST_NODE_SZ, UBIFS_MST_LNUM, 0);
+	err = ubifs_write_node(c, ino, UBIFS_INO_NODE_SZ,
+			       main_first + DEFAULT_DATA_LEB, 0);
 	if (err)
 		goto out;
 
-	err = ubifs_write_node(c, mst, UBIFS_MST_NODE_SZ, UBIFS_MST_LNUM + 1,
-			       0);
+	ubifs_node_calc_hash(c, ino, hash);
+	ubifs_copy_hash(c, hash, ubifs_branch_hash(c, br));
+
+	err = ubifs_write_node(c, idx, idx_node_size, main_first + DEFAULT_IDX_LEB, 0);
 	if (err)
 		goto out;
 
-	err = ubifs_write_node(c, idx, idx_node_size, main_first + DEFAULT_IDX_LEB, 0);
+	ubifs_node_calc_hash(c, idx, hash);
+	ubifs_copy_hash(c, hash, mst->hash_root_idx);
+
+	err = ubifs_write_node_hmac(c, mst, UBIFS_MST_NODE_SZ, UBIFS_MST_LNUM, 0,
+		offsetof(struct ubifs_mst_node, hmac));
 	if (err)
 		goto out;
 
-	err = ubifs_write_node(c, ino, UBIFS_INO_NODE_SZ,
-			       main_first + DEFAULT_DATA_LEB, 0);
+	err = ubifs_write_node_hmac(c, mst, UBIFS_MST_NODE_SZ, UBIFS_MST_LNUM + 1,
+			       0, offsetof(struct ubifs_mst_node, hmac));
 	if (err)
 		goto out;
 
-- 
GitLab


From 1e76592f2c3208ac635c2758aa8326d82fa64a72 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Fri, 7 Sep 2018 14:36:44 +0200
Subject: [PATCH 0123/1890] ubifs: Do not update inode size in-place in
 authenticated mode

In authenticated mode we cannot fixup the inode sizes in-place
during recovery as this would invalidate the hashes and HMACs
we stored for this inode.

Instead, we just write the updated inodes to the journal. We can
only do this after ubifs_rcvry_gc_commit() is done though, so for
authenticated mode call ubifs_recover_size() after
ubifs_rcvry_gc_commit() and not vice versa as normally done.

Calling ubifs_recover_size() after ubifs_rcvry_gc_commit() has the
drawback that after a commit the size fixup information is gone, so
when a powercut happens while recovering from another powercut
we may lose some data written right before the first powercut.
This is why we only do this in authenticated mode and leave the
behaviour for unauthenticated mode untouched.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/recovery.c | 111 +++++++++++++++++++++++++++++++++-----------
 fs/ubifs/super.c    |  38 +++++++++++----
 fs/ubifs/ubifs.h    |   2 +-
 3 files changed, 113 insertions(+), 38 deletions(-)

diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index 5c1334e6bc81f..8526b7ec47077 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -1462,16 +1462,82 @@ static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e)
 	return err;
 }
 
+/**
+ * inode_fix_size - fix inode size
+ * @c: UBIFS file-system description object
+ * @e: inode size information for recovery
+ */
+static int inode_fix_size(struct ubifs_info *c, struct size_entry *e)
+{
+	struct inode *inode;
+	struct ubifs_inode *ui;
+	int err;
+
+	if (c->ro_mount)
+		ubifs_assert(c, !e->inode);
+
+	if (e->inode) {
+		/* Remounting rw, pick up inode we stored earlier */
+		inode = e->inode;
+	} else {
+		inode = ubifs_iget(c->vfs_sb, e->inum);
+		if (IS_ERR(inode))
+			return PTR_ERR(inode);
+
+		if (inode->i_size >= e->d_size) {
+			/*
+			 * The original inode in the index already has a size
+			 * big enough, nothing to do
+			 */
+			iput(inode);
+			return 0;
+		}
+
+		dbg_rcvry("ino %lu size %lld -> %lld",
+			  (unsigned long)e->inum,
+			  inode->i_size, e->d_size);
+
+		ui = ubifs_inode(inode);
+
+		inode->i_size = e->d_size;
+		ui->ui_size = e->d_size;
+		ui->synced_i_size = e->d_size;
+
+		e->inode = inode;
+	}
+
+	/*
+	 * In readonly mode just keep the inode pinned in memory until we go
+	 * readwrite. In readwrite mode write the inode to the journal with the
+	 * fixed size.
+	 */
+	if (c->ro_mount)
+		return 0;
+
+	err = ubifs_jnl_write_inode(c, inode);
+
+	iput(inode);
+
+	if (err)
+		return err;
+
+	rb_erase(&e->rb, &c->size_tree);
+	kfree(e);
+
+	return 0;
+}
+
 /**
  * ubifs_recover_size - recover inode size.
  * @c: UBIFS file-system description object
+ * @in_place: If true, do a in-place size fixup
  *
  * This function attempts to fix inode size discrepancies identified by the
  * 'ubifs_recover_size_accum()' function.
  *
  * This functions returns %0 on success and a negative error code on failure.
  */
-int ubifs_recover_size(struct ubifs_info *c)
+int ubifs_recover_size(struct ubifs_info *c, bool in_place)
 {
 	struct rb_node *this = rb_first(&c->size_tree);
 
@@ -1480,6 +1546,9 @@ int ubifs_recover_size(struct ubifs_info *c)
 		int err;
 
 		e = rb_entry(this, struct size_entry, rb);
+
+		this = rb_next(this);
+
 		if (!e->exists) {
 			union ubifs_key key;
 
@@ -1503,40 +1572,26 @@ int ubifs_recover_size(struct ubifs_info *c)
 		}
 
 		if (e->exists && e->i_size < e->d_size) {
-			if (c->ro_mount) {
-				/* Fix the inode size and pin it in memory */
-				struct inode *inode;
-				struct ubifs_inode *ui;
-
-				ubifs_assert(c, !e->inode);
-
-				inode = ubifs_iget(c->vfs_sb, e->inum);
-				if (IS_ERR(inode))
-					return PTR_ERR(inode);
-
-				ui = ubifs_inode(inode);
-				if (inode->i_size < e->d_size) {
-					dbg_rcvry("ino %lu size %lld -> %lld",
-						  (unsigned long)e->inum,
-						  inode->i_size, e->d_size);
-					inode->i_size = e->d_size;
-					ui->ui_size = e->d_size;
-					ui->synced_i_size = e->d_size;
-					e->inode = inode;
-					this = rb_next(this);
-					continue;
-				}
-				iput(inode);
-			} else {
-				/* Fix the size in place */
+			ubifs_assert(c, !(c->ro_mount && in_place));
+
+			/*
+			 * We found data that is outside the found inode size,
+			 * fixup the inode size
+			 */
+
+			if (in_place) {
 				err = fix_size_in_place(c, e);
 				if (err)
 					return err;
 				iput(e->inode);
+			} else {
+				err = inode_fix_size(c, e);
+				if (err)
+					return err;
+				continue;
 			}
 		}
 
-		this = rb_next(this);
 		rb_erase(&e->rb, &c->size_tree);
 		kfree(e);
 	}
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 2722ca077d234..e2964ce81dee8 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1378,12 +1378,21 @@ static int mount_ubifs(struct ubifs_info *c)
 		}
 
 		if (c->need_recovery) {
-			err = ubifs_recover_size(c);
-			if (err)
-				goto out_orphans;
+			if (!ubifs_authenticated(c)) {
+				err = ubifs_recover_size(c, true);
+				if (err)
+					goto out_orphans;
+			}
+
 			err = ubifs_rcvry_gc_commit(c);
 			if (err)
 				goto out_orphans;
+
+			if (ubifs_authenticated(c)) {
+				err = ubifs_recover_size(c, false);
+				if (err)
+					goto out_orphans;
+			}
 		} else {
 			err = take_gc_lnum(c);
 			if (err)
@@ -1402,7 +1411,7 @@ static int mount_ubifs(struct ubifs_info *c)
 		if (err)
 			goto out_orphans;
 	} else if (c->need_recovery) {
-		err = ubifs_recover_size(c);
+		err = ubifs_recover_size(c, false);
 		if (err)
 			goto out_orphans;
 	} else {
@@ -1629,9 +1638,11 @@ static int ubifs_remount_rw(struct ubifs_info *c)
 		err = ubifs_write_rcvrd_mst_node(c);
 		if (err)
 			goto out;
-		err = ubifs_recover_size(c);
-		if (err)
-			goto out;
+		if (!ubifs_authenticated(c)) {
+			err = ubifs_recover_size(c, true);
+			if (err)
+				goto out;
+		}
 		err = ubifs_clean_lebs(c, c->sbuf);
 		if (err)
 			goto out;
@@ -1697,10 +1708,19 @@ static int ubifs_remount_rw(struct ubifs_info *c)
 			goto out;
 	}
 
-	if (c->need_recovery)
+	if (c->need_recovery) {
 		err = ubifs_rcvry_gc_commit(c);
-	else
+		if (err)
+			goto out;
+
+		if (ubifs_authenticated(c)) {
+			err = ubifs_recover_size(c, false);
+			if (err)
+				goto out;
+		}
+	} else {
 		err = ubifs_leb_unmap(c, c->gc_lnum);
+	}
 	if (err)
 		goto out;
 
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 504b651b78f10..38401adaa00d6 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -2050,7 +2050,7 @@ int ubifs_clean_lebs(struct ubifs_info *c, void *sbuf);
 int ubifs_rcvry_gc_commit(struct ubifs_info *c);
 int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key,
 			     int deletion, loff_t new_size);
-int ubifs_recover_size(struct ubifs_info *c);
+int ubifs_recover_size(struct ubifs_info *c, bool in_place);
 void ubifs_destroy_size_tree(struct ubifs_info *c);
 
 /* ioctl.c */
-- 
GitLab


From d8a22773a12c6d78ee758c9e530f3a488bb7cb29 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Fri, 7 Sep 2018 14:36:45 +0200
Subject: [PATCH 0124/1890] ubifs: Enable authentication support

With the preparations all being done this patch now enables authentication
support for UBIFS. Authentication is enabled when the newly introduced
auth_key and auth_hash_name mount options are passed. auth_key provides
the key which is used for authentication whereas auth_hash_name provides
the hashing algorithm used for this FS. Passing these options make
authentication mandatory and only UBIFS images that can be authenticated
with the given key are allowed.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 Documentation/filesystems/ubifs.txt |  7 ++++++
 fs/ubifs/Kconfig                    | 10 ++++++++
 fs/ubifs/super.c                    | 36 ++++++++++++++++++++++++++++-
 3 files changed, 52 insertions(+), 1 deletion(-)

diff --git a/Documentation/filesystems/ubifs.txt b/Documentation/filesystems/ubifs.txt
index a0a61d2f389f4..acc80442a3bbe 100644
--- a/Documentation/filesystems/ubifs.txt
+++ b/Documentation/filesystems/ubifs.txt
@@ -91,6 +91,13 @@ chk_data_crc		do not skip checking CRCs on data nodes
 compr=none              override default compressor and set it to "none"
 compr=lzo               override default compressor and set it to "lzo"
 compr=zlib              override default compressor and set it to "zlib"
+auth_key=		specify the key used for authenticating the filesystem.
+			Passing this option makes authentication mandatory.
+			The passed key must be present in the kernel keyring
+			and must be of type 'logon'
+auth_hash_name=		The hash algorithm used for authentication. Used for
+			both hashing and for creating HMACs. Typical values
+			include "sha256" or "sha512"
 
 
 Quick usage instructions
diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig
index 853c77579b4e9..529856fbccd0e 100644
--- a/fs/ubifs/Kconfig
+++ b/fs/ubifs/Kconfig
@@ -86,3 +86,13 @@ config UBIFS_FS_SECURITY
 	  the extended attribute support in advance.
 
 	  If you are not using a security module, say N.
+
+config UBIFS_FS_AUTHENTICATION
+	bool "UBIFS authentication support"
+	select CRYPTO_HMAC
+	help
+	  Enable authentication support for UBIFS. This feature offers protection
+	  against offline changes for both data and metadata of the filesystem.
+	  If you say yes here you should also select a hashing algorithm such as
+	  sha256, these are not selected automatically since there are many
+	  different options.
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index e2964ce81dee8..1fac1133dadd2 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -579,7 +579,9 @@ static int init_constants_early(struct ubifs_info *c)
 	c->ranges[UBIFS_REF_NODE].len  = UBIFS_REF_NODE_SZ;
 	c->ranges[UBIFS_TRUN_NODE].len = UBIFS_TRUN_NODE_SZ;
 	c->ranges[UBIFS_CS_NODE].len   = UBIFS_CS_NODE_SZ;
-	c->ranges[UBIFS_AUTH_NODE].len = UBIFS_AUTH_NODE_SZ;
+	c->ranges[UBIFS_AUTH_NODE].min_len = UBIFS_AUTH_NODE_SZ;
+	c->ranges[UBIFS_AUTH_NODE].max_len = UBIFS_AUTH_NODE_SZ +
+				UBIFS_MAX_HMAC_LEN;
 
 	c->ranges[UBIFS_INO_NODE].min_len  = UBIFS_INO_NODE_SZ;
 	c->ranges[UBIFS_INO_NODE].max_len  = UBIFS_MAX_INO_NODE_SZ;
@@ -935,6 +937,8 @@ static int check_volume_empty(struct ubifs_info *c)
  * Opt_no_chk_data_crc: do not check CRCs when reading data nodes
  * Opt_override_compr: override default compressor
  * Opt_assert: set ubifs_assert() action
+ * Opt_auth_key: The key name used for authentication
+ * Opt_auth_hash_name: The hash type used for authentication
  * Opt_err: just end of array marker
  */
 enum {
@@ -946,6 +950,8 @@ enum {
 	Opt_no_chk_data_crc,
 	Opt_override_compr,
 	Opt_assert,
+	Opt_auth_key,
+	Opt_auth_hash_name,
 	Opt_ignore,
 	Opt_err,
 };
@@ -958,6 +964,8 @@ static const match_table_t tokens = {
 	{Opt_chk_data_crc, "chk_data_crc"},
 	{Opt_no_chk_data_crc, "no_chk_data_crc"},
 	{Opt_override_compr, "compr=%s"},
+	{Opt_auth_key, "auth_key=%s"},
+	{Opt_auth_hash_name, "auth_hash_name=%s"},
 	{Opt_ignore, "ubi=%s"},
 	{Opt_ignore, "vol=%s"},
 	{Opt_assert, "assert=%s"},
@@ -1081,6 +1089,16 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options,
 			kfree(act);
 			break;
 		}
+		case Opt_auth_key:
+			c->auth_key_name = kstrdup(args[0].from, GFP_KERNEL);
+			if (!c->auth_key_name)
+				return -ENOMEM;
+			break;
+		case Opt_auth_hash_name:
+			c->auth_hash_name = kstrdup(args[0].from, GFP_KERNEL);
+			if (!c->auth_hash_name)
+				return -ENOMEM;
+			break;
 		case Opt_ignore:
 			break;
 		default:
@@ -1260,6 +1278,19 @@ static int mount_ubifs(struct ubifs_info *c)
 
 	c->mounting = 1;
 
+	if (c->auth_key_name) {
+		if (IS_ENABLED(CONFIG_UBIFS_FS_AUTHENTICATION)) {
+			err = ubifs_init_authentication(c);
+			if (err)
+				goto out_free;
+		} else {
+			ubifs_err(c, "auth_key_name, but UBIFS is built without"
+				  " authentication support");
+			err = -EINVAL;
+			goto out_free;
+		}
+	}
+
 	err = ubifs_read_superblock(c);
 	if (err)
 		goto out_free;
@@ -1577,7 +1608,10 @@ static void ubifs_umount(struct ubifs_info *c)
 	free_wbufs(c);
 	free_orphans(c);
 	ubifs_lpt_free(c, 0);
+	ubifs_exit_authentication(c);
 
+	kfree(c->auth_key_name);
+	kfree(c->auth_hash_name);
 	kfree(c->cbuf);
 	kfree(c->rcvrd_mst_node);
 	kfree(c->mst_node);
-- 
GitLab


From e453fa60e086786fe89ba15ee8fef80bc2e6ecc3 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Fri, 7 Sep 2018 14:36:46 +0200
Subject: [PATCH 0125/1890] Documentation: ubifs: Add authentication whitepaper

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 .../filesystems/ubifs-authentication.md       | 426 ++++++++++++++++++
 1 file changed, 426 insertions(+)
 create mode 100644 Documentation/filesystems/ubifs-authentication.md

diff --git a/Documentation/filesystems/ubifs-authentication.md b/Documentation/filesystems/ubifs-authentication.md
new file mode 100644
index 0000000000000..028b3e2e25f97
--- /dev/null
+++ b/Documentation/filesystems/ubifs-authentication.md
@@ -0,0 +1,426 @@
+% UBIFS Authentication
+% sigma star gmbh
+% 2018
+
+# Introduction
+
+UBIFS utilizes the fscrypt framework to provide confidentiality for file
+contents and file names. This prevents attacks where an attacker is able to
+read contents of the filesystem on a single point in time. A classic example
+is a lost smartphone where the attacker is unable to read personal data stored
+on the device without the filesystem decryption key.
+
+At the current state, UBIFS encryption however does not prevent attacks where
+the attacker is able to modify the filesystem contents and the user uses the
+device afterwards. In such a scenario an attacker can modify filesystem
+contents arbitrarily without the user noticing. One example is to modify a
+binary to perform a malicious action when executed [DMC-CBC-ATTACK]. Since
+most of the filesystem metadata of UBIFS is stored in plain, this makes it
+fairly easy to swap files and replace their contents.
+
+Other full disk encryption systems like dm-crypt cover all filesystem metadata,
+which makes such kinds of attacks more complicated, but not impossible.
+Especially, if the attacker is given access to the device multiple points in
+time. For dm-crypt and other filesystems that build upon the Linux block IO
+layer, the dm-integrity or dm-verity subsystems [DM-INTEGRITY, DM-VERITY]
+can be used to get full data authentication at the block layer.
+These can also be combined with dm-crypt [CRYPTSETUP2].
+
+This document describes an approach to get file contents _and_ full metadata
+authentication for UBIFS. Since UBIFS uses fscrypt for file contents and file
+name encryption, the authentication system could be tied into fscrypt such that
+existing features like key derivation can be utilized. It should however also
+be possible to use UBIFS authentication without using encryption.
+
+
+## MTD, UBI & UBIFS
+
+On Linux, the MTD (Memory Technology Devices) subsystem provides a uniform
+interface to access raw flash devices. One of the more prominent subsystems that
+work on top of MTD is UBI (Unsorted Block Images). It provides volume management
+for flash devices and is thus somewhat similar to LVM for block devices. In
+addition, it deals with flash-specific wear-leveling and transparent I/O error
+handling. UBI offers logical erase blocks (LEBs) to the layers on top of it
+and maps them transparently to physical erase blocks (PEBs) on the flash.
+
+UBIFS is a filesystem for raw flash which operates on top of UBI. Thus, wear
+leveling and some flash specifics are left to UBI, while UBIFS focuses on
+scalability, performance and recoverability.
+
+
+
+	+------------+ +*******+ +-----------+ +-----+
+	|            | * UBIFS * | UBI-BLOCK | | ... |
+	| JFFS/JFFS2 | +*******+ +-----------+ +-----+
+	|            | +-----------------------------+ +-----------+ +-----+
+	|            | |              UBI            | | MTD-BLOCK | | ... |
+	+------------+ +-----------------------------+ +-----------+ +-----+
+	+------------------------------------------------------------------+
+	|                  MEMORY TECHNOLOGY DEVICES (MTD)                 |
+	+------------------------------------------------------------------+
+	+-----------------------------+ +--------------------------+ +-----+
+	|         NAND DRIVERS        | |        NOR DRIVERS       | | ... |
+	+-----------------------------+ +--------------------------+ +-----+
+
+            Figure 1: Linux kernel subsystems for dealing with raw flash
+
+
+
+Internally, UBIFS maintains multiple data structures which are persisted on
+the flash:
+
+- *Index*: an on-flash B+ tree where the leaf nodes contain filesystem data
+- *Journal*: an additional data structure to collect FS changes before updating
+  the on-flash index and reduce flash wear.
+- *Tree Node Cache (TNC)*: an in-memory B+ tree that reflects the current FS
+  state to avoid frequent flash reads. It is basically the in-memory
+  representation of the index, but contains additional attributes.
+- *LEB property tree (LPT)*: an on-flash B+ tree for free space accounting per
+  UBI LEB.
+
+In the remainder of this section we will cover the on-flash UBIFS data
+structures in more detail. The TNC is of less importance here since it is never
+persisted onto the flash directly. More details on UBIFS can also be found in
+[UBIFS-WP].
+
+
+### UBIFS Index & Tree Node Cache
+
+Basic on-flash UBIFS entities are called *nodes*. UBIFS knows different types
+of nodes. Eg. data nodes (`struct ubifs_data_node`) which store chunks of file
+contents or inode nodes (`struct ubifs_ino_node`) which represent VFS inodes.
+Almost all types of nodes share a common header (`ubifs_ch`) containing basic
+information like node type, node length, a sequence number, etc. (see
+`fs/ubifs/ubifs-media.h`in kernel source). Exceptions are entries of the LPT
+and some less important node types like padding nodes which are used to pad
+unusable content at the end of LEBs.
+
+To avoid re-writing the whole B+ tree on every single change, it is implemented
+as *wandering tree*, where only the changed nodes are re-written and previous
+versions of them are obsoleted without erasing them right away. As a result,
+the index is not stored in a single place on the flash, but *wanders* around
+and there are obsolete parts on the flash as long as the LEB containing them is
+not reused by UBIFS. To find the most recent version of the index, UBIFS stores
+a special node called *master node* into UBI LEB 1 which always points to the
+most recent root node of the UBIFS index. For recoverability, the master node
+is additionally duplicated to LEB 2. Mounting UBIFS is thus a simple read of
+LEB 1 and 2 to get the current master node and from there get the location of
+the most recent on-flash index.
+
+The TNC is the in-memory representation of the on-flash index. It contains some
+additional runtime attributes per node which are not persisted. One of these is
+a dirty-flag which marks nodes that have to be persisted the next time the
+index is written onto the flash. The TNC acts as a write-back cache and all
+modifications of the on-flash index are done through the TNC. Like other caches,
+the TNC does not have to mirror the full index into memory, but reads parts of
+it from flash whenever needed. A *commit* is the UBIFS operation of updating the
+on-flash filesystem structures like the index. On every commit, the TNC nodes
+marked as dirty are written to the flash to update the persisted index.
+
+
+### Journal
+
+To avoid wearing out the flash, the index is only persisted (*commited*) when
+certain conditions are met (eg. `fsync(2)`). The journal is used to record
+any changes (in form of inode nodes, data nodes etc.) between commits
+of the index. During mount, the journal is read from the flash and replayed
+onto the TNC (which will be created on-demand from the on-flash index).
+
+UBIFS reserves a bunch of LEBs just for the journal called *log area*. The
+amount of log area LEBs is configured on filesystem creation (using
+`mkfs.ubifs`) and stored in the superblock node. The log area contains only
+two types of nodes: *reference nodes* and *commit start nodes*. A commit start
+node is written whenever an index commit is performed. Reference nodes are
+written on every journal update. Each reference node points to the position of
+other nodes (inode nodes, data nodes etc.) on the flash that are part of this
+journal entry. These nodes are called *buds* and describe the actual filesystem
+changes including their data.
+
+The log area is maintained as a ring. Whenever the journal is almost full,
+a commit is initiated. This also writes a commit start node so that during
+mount, UBIFS will seek for the most recent commit start node and just replay
+every reference node after that. Every reference node before the commit start
+node will be ignored as they are already part of the on-flash index.
+
+When writing a journal entry, UBIFS first ensures that enough space is
+available to write the reference node and buds part of this entry. Then, the
+reference node is written and afterwards the buds describing the file changes.
+On replay, UBIFS will record every reference node and inspect the location of
+the referenced LEBs to discover the buds. If these are corrupt or missing,
+UBIFS will attempt to recover them by re-reading the LEB. This is however only
+done for the last referenced LEB of the journal. Only this can become corrupt
+because of a power cut. If the recovery fails, UBIFS will not mount. An error
+for every other LEB will directly cause UBIFS to fail the mount operation.
+
+
+       | ----    LOG AREA     ---- | ----------    MAIN AREA    ------------ |
+
+        -----+------+-----+--------+----   ------+-----+-----+---------------
+        \    |      |     |        |   /  /      |     |     |               \
+        / CS |  REF | REF |        |   \  \ DENT | INO | INO |               /
+        \    |      |     |        |   /  /      |     |     |               \
+         ----+------+-----+--------+---   -------+-----+-----+----------------
+                 |     |                  ^            ^
+                 |     |                  |            |
+                 +------------------------+            |
+                       |                               |
+                       +-------------------------------+
+
+
+                Figure 2: UBIFS flash layout of log area with commit start nodes
+                          (CS) and reference nodes (REF) pointing to main area
+                          containing their buds
+
+
+### LEB Property Tree/Table
+
+The LEB property tree is used to store per-LEB information. This includes the
+LEB type and amount of free and *dirty* (old, obsolete content) space [1] on
+the LEB. The type is important, because UBIFS never mixes index nodes with data
+nodes on a single LEB and thus each LEB has a specific purpose. This again is
+useful for free space calculations. See [UBIFS-WP] for more details.
+
+The LEB property tree again is a B+ tree, but it is much smaller than the
+index. Due to its smaller size it is always written as one chunk on every
+commit. Thus, saving the LPT is an atomic operation.
+
+
+[1] Since LEBs can only be appended and never overwritten, there is a
+difference between free space ie. the remaining space left on the LEB to be
+written to without erasing it and previously written content that is obsolete
+but can't be overwritten without erasing the full LEB.
+
+
+# UBIFS Authentication
+
+This chapter introduces UBIFS authentication which enables UBIFS to verify
+the authenticity and integrity of metadata and file contents stored on flash.
+
+
+## Threat Model
+
+UBIFS authentication enables detection of offline data modification. While it
+does not prevent it, it enables (trusted) code to check the integrity and
+authenticity of on-flash file contents and filesystem metadata. This covers
+attacks where file contents are swapped.
+
+UBIFS authentication will not protect against rollback of full flash contents.
+Ie. an attacker can still dump the flash and restore it at a later time without
+detection. It will also not protect against partial rollback of individual
+index commits. That means that an attacker is able to partially undo changes.
+This is possible because UBIFS does not immediately overwrites obsolete
+versions of the index tree or the journal, but instead marks them as obsolete
+and garbage collection erases them at a later time. An attacker can use this by
+erasing parts of the current tree and restoring old versions that are still on
+the flash and have not yet been erased. This is possible, because every commit
+will always write a new version of the index root node and the master node
+without overwriting the previous version. This is further helped by the
+wear-leveling operations of UBI which copies contents from one physical
+eraseblock to another and does not atomically erase the first eraseblock.
+
+UBIFS authentication does not cover attacks where an attacker is able to
+execute code on the device after the authentication key was provided.
+Additional measures like secure boot and trusted boot have to be taken to
+ensure that only trusted code is executed on a device.
+
+
+## Authentication
+
+To be able to fully trust data read from flash, all UBIFS data structures
+stored on flash are authenticated. That is:
+
+- The index which includes file contents, file metadata like extended
+  attributes, file length etc.
+- The journal which also contains file contents and metadata by recording changes
+  to the filesystem
+- The LPT which stores UBI LEB metadata which UBIFS uses for free space accounting
+
+
+### Index Authentication
+
+Through UBIFS' concept of a wandering tree, it already takes care of only
+updating and persisting changed parts from leaf node up to the root node
+of the full B+ tree. This enables us to augment the index nodes of the tree
+with a hash over each node's child nodes. As a result, the index basically also
+a Merkle tree. Since the leaf nodes of the index contain the actual filesystem
+data, the hashes of their parent index nodes thus cover all the file contents
+and file metadata. When a file changes, the UBIFS index is updated accordingly
+from the leaf nodes up to the root node including the master node. This process
+can be hooked to recompute the hash only for each changed node at the same time.
+Whenever a file is read, UBIFS can verify the hashes from each leaf node up to
+the root node to ensure the node's integrity.
+
+To ensure the authenticity of the whole index, the UBIFS master node stores a
+keyed hash (HMAC) over its own contents and a hash of the root node of the index
+tree. As mentioned above, the master node is always written to the flash whenever
+the index is persisted (ie. on index commit).
+
+Using this approach only UBIFS index nodes and the master node are changed to
+include a hash. All other types of nodes will remain unchanged. This reduces
+the storage overhead which is precious for users of UBIFS (ie. embedded
+devices).
+
+
+                             +---------------+
+                             |  Master Node  |
+                             |    (hash)     |
+                             +---------------+
+                                     |
+                                     v
+                            +-------------------+
+                            |  Index Node #1    |
+                            |                   |
+                            | branch0   branchn |
+                            | (hash)    (hash)  |
+                            +-------------------+
+                               |    ...   |  (fanout: 8)
+                               |          |
+                       +-------+          +------+
+                       |                         |
+                       v                         v
+            +-------------------+       +-------------------+
+            |  Index Node #2    |       |  Index Node #3    |
+            |                   |       |                   |
+            | branch0   branchn |       | branch0   branchn |
+            | (hash)    (hash)  |       | (hash)    (hash)  |
+            +-------------------+       +-------------------+
+                 |   ...                     |   ...   |
+                 v                           v         v
+               +-----------+         +----------+  +-----------+
+               | Data Node |         | INO Node |  | DENT Node |
+               +-----------+         +----------+  +-----------+
+
+
+           Figure 3: Coverage areas of index node hash and master node HMAC
+
+
+
+The most important part for robustness and power-cut safety is to atomically
+persist the hash and file contents. Here the existing UBIFS logic for how
+changed nodes are persisted is already designed for this purpose such that
+UBIFS can safely recover if a power-cut occurs while persisting. Adding
+hashes to index nodes does not change this since each hash will be persisted
+atomically together with its respective node.
+
+
+### Journal Authentication
+
+The journal is authenticated too. Since the journal is continuously written
+it is necessary to also add authentication information frequently to the
+journal so that in case of a powercut not too much data can't be authenticated.
+This is done by creating a continuous hash beginning from the commit start node
+over the previous reference nodes, the current reference node, and the bud
+nodes. From time to time whenever it is suitable authentication nodes are added
+between the bud nodes. This new node type contains a HMAC over the current state
+of the hash chain. That way a journal can be authenticated up to the last
+authentication node. The tail of the journal which may not have a authentication
+node cannot be authenticated and is skipped during journal replay.
+
+We get this picture for journal authentication:
+
+    ,,,,,,,,
+    ,......,...........................................
+    ,. CS  ,               hash1.----.           hash2.----.
+    ,.  |  ,                    .    |hmac            .    |hmac
+    ,.  v  ,                    .    v                .    v
+    ,.REF#0,-> bud -> bud -> bud.-> auth -> bud -> bud.-> auth ...
+    ,..|...,...........................................
+    ,  |   ,
+    ,  |   ,,,,,,,,,,,,,,,
+    .  |            hash3,----.
+    ,  |                 ,    |hmac
+    ,  v                 ,    v
+    , REF#1 -> bud -> bud,-> auth ...
+    ,,,|,,,,,,,,,,,,,,,,,,
+       v
+      REF#2 -> ...
+       |
+       V
+      ...
+
+Since the hash also includes the reference nodes an attacker cannot reorder or
+skip any journal heads for replay. An attacker can only remove bud nodes or
+reference nodes from the end of the journal, effectively rewinding the
+filesystem at maximum back to the last commit.
+
+The location of the log area is stored in the master node. Since the master
+node is authenticated with a HMAC as described above, it is not possible to
+tamper with that without detection. The size of the log area is specified when
+the filesystem is created using `mkfs.ubifs` and stored in the superblock node.
+To avoid tampering with this and other values stored there, a HMAC is added to
+the superblock struct. The superblock node is stored in LEB 0 and is only
+modified on feature flag or similar changes, but never on file changes.
+
+
+### LPT Authentication
+
+The location of the LPT root node on the flash is stored in the UBIFS master
+node. Since the LPT is written and read atomically on every commit, there is
+no need to authenticate individual nodes of the tree. It suffices to
+protect the integrity of the full LPT by a simple hash stored in the master
+node. Since the master node itself is authenticated, the LPTs authenticity can
+be verified by verifying the authenticity of the master node and comparing the
+LTP hash stored there with the hash computed from the read on-flash LPT.
+
+
+## Key Management
+
+For simplicity, UBIFS authentication uses a single key to compute the HMACs
+of superblock, master, commit start and reference nodes. This key has to be
+available on creation of the filesystem (`mkfs.ubifs`) to authenticate the
+superblock node. Further, it has to be available on mount of the filesystem
+to verify authenticated nodes and generate new HMACs for changes.
+
+UBIFS authentication is intended to operate side-by-side with UBIFS encryption
+(fscrypt) to provide confidentiality and authenticity. Since UBIFS encryption
+has a different approach of encryption policies per directory, there can be
+multiple fscrypt master keys and there might be folders without encryption.
+UBIFS authentication on the other hand has an all-or-nothing approach in the
+sense that it either authenticates everything of the filesystem or nothing.
+Because of this and because UBIFS authentication should also be usable without
+encryption, it does not share the same master key with fscrypt, but manages
+a dedicated authentication key.
+
+The API for providing the authentication key has yet to be defined, but the
+key can eg. be provided by userspace through a keyring similar to the way it
+is currently done in fscrypt. It should however be noted that the current
+fscrypt approach has shown its flaws and the userspace API will eventually
+change [FSCRYPT-POLICY2].
+
+Nevertheless, it will be possible for a user to provide a single passphrase
+or key in userspace that covers UBIFS authentication and encryption. This can
+be solved by the corresponding userspace tools which derive a second key for
+authentication in addition to the derived fscrypt master key used for
+encryption.
+
+To be able to check if the proper key is available on mount, the UBIFS
+superblock node will additionally store a hash of the authentication key. This
+approach is similar to the approach proposed for fscrypt encryption policy v2
+[FSCRYPT-POLICY2].
+
+
+# Future Extensions
+
+In certain cases where a vendor wants to provide an authenticated filesystem
+image to customers, it should be possible to do so without sharing the secret
+UBIFS authentication key. Instead, in addition the each HMAC a digital
+signature could be stored where the vendor shares the public key alongside the
+filesystem image. In case this filesystem has to be modified afterwards,
+UBIFS can exchange all digital signatures with HMACs on first mount similar
+to the way the IMA/EVM subsystem deals with such situations. The HMAC key
+will then have to be provided beforehand in the normal way.
+
+
+# References
+
+[CRYPTSETUP2]        http://www.saout.de/pipermail/dm-crypt/2017-November/005745.html
+
+[DMC-CBC-ATTACK]     http://www.jakoblell.com/blog/2013/12/22/practical-malleability-attack-against-cbc-encrypted-luks-partitions/
+
+[DM-INTEGRITY]       https://www.kernel.org/doc/Documentation/device-mapper/dm-integrity.txt
+
+[DM-VERITY]          https://www.kernel.org/doc/Documentation/device-mapper/verity.txt
+
+[FSCRYPT-POLICY2]    https://www.spinics.net/lists/linux-ext4/msg58710.html
+
+[UBIFS-WP]           http://www.linux-mtd.infradead.org/doc/ubifs_whitepaper.pdf
-- 
GitLab


From 84db119f5a83e1bf9cffbc6d9cf16487eda1c056 Mon Sep 17 00:00:00 2001
From: Ding Xiang <dingxiang@cmss.chinamobile.com>
Date: Fri, 24 Aug 2018 01:15:05 -0400
Subject: [PATCH 0126/1890] ubifs: Remove unneeded semicolon

delete redundant semicolon

Signed-off-by: Ding Xiang <dingxiang@cmss.chinamobile.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 fs/ubifs/sb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index ac1ed5ad126db..75a69dd26d6ea 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -689,7 +689,7 @@ int ubifs_read_superblock(struct ubifs_info *c)
 		c->key_hash = key_test_hash;
 		c->key_hash_type = UBIFS_KEY_HASH_TEST;
 		break;
-	};
+	}
 
 	c->key_fmt = sup->key_fmt;
 
-- 
GitLab


From 1d865c06f5715df94528f76d6bb7f6f98975e04e Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Mon, 1 Oct 2018 09:28:36 +0300
Subject: [PATCH 0127/1890] perf scripts python: call-graph-from-sql.py:
 Provide better default column sizes

Set initial column sizes to improve initial display.

Committer testing:

Extended instructions on testing this, using the sqlite variant:

Make sure you have the SQLite glue for python+Qt installed, on fedora 27
I used:

  # dnf install python-pyside

Collect some PT samples, say 5-secs worth, system wide:

  # perf record -r 10 -e intel_pt//u -a sleep 5
  [ perf record: Woken up 49 times to write data ]
  [ perf record: Captured and wrote 96.131 MB perf.data ]

This results in this perf.data file:

  # ls -larth perf.data
  -rw-------. 1 root root 97M Oct 23 10:11 perf.data

With the following attributes:

  # perf evlist -v
  intel_pt//u: type: 8, size: 112, config: 0x300e601, { sample_period, sample_freq }: 1, sample_type: IP|TID|TIME|CPU|IDENTIFIER, read_format: ID, disabled: 1, inherit: 1, exclude_kernel: 1, exclude_hv: 1, sample_id_all: 1
  dummy:u: type: 1, size: 112, config: 0x9, { sample_period, sample_freq }: 1, sample_type: IP|TID|TIME|CPU|IDENTIFIER, read_format: ID, inherit: 1, exclude_kernel: 1, exclude_hv: 1, mmap: 1, comm: 1, task: 1, sample_id_all: 1, mmap2: 1, comm_exec: 1, context_switch: 1
  #

Then generate the "pt_example" tables using:

  # perf script -s ~/libexec/perf-core/scripts/python/export-to-sqlite.py pt_example branches calls
  2018-10-23 10:56:59.177711 Creating database...
  2018-10-23 10:56:59.195842 Writing records...
   instruction trace error type 1 cpu 2 pid 1644 tid 1644 ip 0x263984516750 code 5: Failed to get instruction
   instruction trace error type 1 cpu 2 pid 1644 tid 1644 ip 0x7f26e116fd20 code 6: Trace doesn't match instruction
   instruction trace error type 1 cpu 2 pid 1644 tid 1644 ip 0x7f26e162c9ee code 6: Trace doesn't match instruction
   instruction trace error type 1 cpu 2 pid 1644 tid 1644 ip 0x7f26e9ce831a code 6: Trace doesn't match instruction
  <SNIP>
   instruction trace error type 1 cpu 0 pid 1644 tid 1644 ip 0x7f26e13d07b4 code 6: Trace doesn't match instruction
  Warning:
  132 instruction trace errors
  2018-10-23 11:25:25.015717 Adding indexes
  2018-10-23 11:25:28.788061 Done
  #

In my example, that perf.data file generated this db:

  # file pt_example
  pt_example: SQLite 3.x database, last written using SQLite version 3020001
  [root@seventh perf]# ls -lah pt_example
  -rw-r--r--. 1 root root 6.6G Oct 23 11:25 pt_example
  #

Then use this python script to use that db and provide a GUI:

  $ python tools/perf/scripts/python/call-graph-from-sql.py pt_example branches calls

I compared the column widths before this patch and after applying it,
the visual results match the patch intent.

The following patches will refer to this set of instructions in the "Committer
Testing" section.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/20181001062853.28285-3-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/scripts/python/call-graph-from-sql.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/perf/scripts/python/call-graph-from-sql.py b/tools/perf/scripts/python/call-graph-from-sql.py
index ce1b91fcd6b88..e1014f2628a7d 100644
--- a/tools/perf/scripts/python/call-graph-from-sql.py
+++ b/tools/perf/scripts/python/call-graph-from-sql.py
@@ -280,6 +280,9 @@ class MainWindow(QMainWindow):
 		self.view = QTreeView()
 		self.view.setModel(self.model)
 
+		for c, w in ((0, 250), (1, 100), (2, 60), (3, 70), (4, 70), (5, 100)):
+			self.view.setColumnWidth(c, w)
+
 		self.setCentralWidget(self.view)
 
 if __name__ == '__main__':
-- 
GitLab


From 3c4ef451506897a15aff76ff141c995c6cd32f4d Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Mon, 1 Oct 2018 09:28:37 +0300
Subject: [PATCH 0128/1890] perf scripts python: call-graph-from-sql.py: Set a
 minimum window size

Prevent weirdly small window size.

Committer testing:

Seems to work, but even before this patch, on my system, it always
started with:

xwininfo: Window id: 0x1e00002 "Call Graph: pt_example"
<SNIP>
  Width: 800
  Height: 600
<SNIP>

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/20181001062853.28285-4-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/scripts/python/call-graph-from-sql.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/perf/scripts/python/call-graph-from-sql.py b/tools/perf/scripts/python/call-graph-from-sql.py
index e1014f2628a7d..68153fa1b4d12 100644
--- a/tools/perf/scripts/python/call-graph-from-sql.py
+++ b/tools/perf/scripts/python/call-graph-from-sql.py
@@ -274,6 +274,7 @@ class MainWindow(QMainWindow):
 		style = self.style()
 		icon = style.standardIcon(QStyle.SP_MessageBoxInformation)
 		self.setWindowIcon(icon);
+		self.setMinimumSize(200, 100)
 
 		self.model = TreeModel(db)
 
-- 
GitLab


From 99a097c987c26c8c82293fcb92908d07009c925e Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Mon, 1 Oct 2018 09:28:38 +0300
Subject: [PATCH 0129/1890] perf scripts python: call-graph-from-sql.py: Change
 icon

There are not many standard icons, but the computer icon looks slightly
better than the information icon.

Committer testing:

Noticed the change on the icon on the gnome menu right next to the
"Activities" menu, looks nicer indeed.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/20181001062853.28285-5-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/scripts/python/call-graph-from-sql.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tools/perf/scripts/python/call-graph-from-sql.py b/tools/perf/scripts/python/call-graph-from-sql.py
index 68153fa1b4d12..2e33540f3de01 100644
--- a/tools/perf/scripts/python/call-graph-from-sql.py
+++ b/tools/perf/scripts/python/call-graph-from-sql.py
@@ -271,9 +271,7 @@ class MainWindow(QMainWindow):
 		self.setWindowTitle("Call Graph: " + dbname)
 		self.move(100, 100)
 		self.resize(800, 600)
-		style = self.style()
-		icon = style.standardIcon(QStyle.SP_MessageBoxInformation)
-		self.setWindowIcon(icon);
+		self.setWindowIcon(self.style().standardIcon(QStyle.SP_ComputerIcon))
 		self.setMinimumSize(200, 100)
 
 		self.model = TreeModel(db)
-- 
GitLab


From 7e4fc93e2ade2b0c453a97e307203ffe3f930c98 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Mon, 1 Oct 2018 09:28:39 +0300
Subject: [PATCH 0130/1890] perf scripts python: call-graph-from-sql.py: Make a
 "Main" function

Make a "Main" function so that the variables used do not pollute the global
namespace.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/20181001062853.28285-6-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/scripts/python/call-graph-from-sql.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tools/perf/scripts/python/call-graph-from-sql.py b/tools/perf/scripts/python/call-graph-from-sql.py
index 2e33540f3de01..2b74b94eecccd 100644
--- a/tools/perf/scripts/python/call-graph-from-sql.py
+++ b/tools/perf/scripts/python/call-graph-from-sql.py
@@ -284,7 +284,9 @@ class MainWindow(QMainWindow):
 
 		self.setCentralWidget(self.view)
 
-if __name__ == '__main__':
+# Main
+
+def Main():
 	if (len(sys.argv) < 2):
 		print >> sys.stderr, "Usage is: call-graph-from-sql.py <database name>"
 		raise Exception("Too few arguments")
@@ -331,3 +333,6 @@ if __name__ == '__main__':
 	err = app.exec_()
 	db.close()
 	sys.exit(err)
+
+if __name__ == "__main__":
+	Main()
-- 
GitLab


From b2556c46a69b4c0e6bbf690ac4ca2913cbe90e1e Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Mon, 1 Oct 2018 09:28:40 +0300
Subject: [PATCH 0131/1890] perf scripts python: call-graph-from-sql.py:
 Separate the database details into a class

Separate the database details into a class that can provide different
connections using the same connection information.  That paves the way
for sub-processes that require their own connection.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/20181001062853.28285-7-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../scripts/python/call-graph-from-sql.py     | 63 +++++++++++--------
 1 file changed, 38 insertions(+), 25 deletions(-)

diff --git a/tools/perf/scripts/python/call-graph-from-sql.py b/tools/perf/scripts/python/call-graph-from-sql.py
index 2b74b94eecccd..9d056deab2b19 100644
--- a/tools/perf/scripts/python/call-graph-from-sql.py
+++ b/tools/perf/scripts/python/call-graph-from-sql.py
@@ -284,6 +284,42 @@ class MainWindow(QMainWindow):
 
 		self.setCentralWidget(self.view)
 
+# Database reference
+
+class DBRef():
+
+	def __init__(self, is_sqlite3, dbname):
+		self.is_sqlite3 = is_sqlite3
+		self.dbname = dbname
+
+	def Open(self, connection_name):
+		dbname = self.dbname
+		if self.is_sqlite3:
+			db = QSqlDatabase.addDatabase("QSQLITE", connection_name)
+		else:
+			db = QSqlDatabase.addDatabase("QPSQL", connection_name)
+			opts = dbname.split()
+			for opt in opts:
+				if "=" in opt:
+					opt = opt.split("=")
+					if opt[0] == "hostname":
+						db.setHostName(opt[1])
+					elif opt[0] == "port":
+						db.setPort(int(opt[1]))
+					elif opt[0] == "username":
+						db.setUserName(opt[1])
+					elif opt[0] == "password":
+						db.setPassword(opt[1])
+					elif opt[0] == "dbname":
+						dbname = opt[1]
+				else:
+					dbname = opt
+
+		db.setDatabaseName(dbname)
+		if not db.open():
+			raise Exception("Failed to open database " + dbname + " error: " + db.lastError().text())
+		return db, dbname
+
 # Main
 
 def Main():
@@ -302,31 +338,8 @@ def Main():
 	except:
 		pass
 
-	if is_sqlite3:
-		db = QSqlDatabase.addDatabase('QSQLITE')
-	else:
-		db = QSqlDatabase.addDatabase('QPSQL')
-		opts = dbname.split()
-		for opt in opts:
-			if '=' in opt:
-				opt = opt.split('=')
-				if opt[0] == 'hostname':
-					db.setHostName(opt[1])
-				elif opt[0] == 'port':
-					db.setPort(int(opt[1]))
-				elif opt[0] == 'username':
-					db.setUserName(opt[1])
-				elif opt[0] == 'password':
-					db.setPassword(opt[1])
-				elif opt[0] == 'dbname':
-					dbname = opt[1]
-			else:
-				dbname = opt
-
-	db.setDatabaseName(dbname)
-	if not db.open():
-		raise Exception("Failed to open database " + dbname + " error: " + db.lastError().text())
-
+	dbref = DBRef(is_sqlite3, dbname)
+	db, dbname = dbref.Open("main")
 	app = QApplication(sys.argv)
 	window = MainWindow(db, dbname)
 	window.show()
-- 
GitLab


From 5f9dfef1bb7fadfb2d001244ef23359982fedd06 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Mon, 1 Oct 2018 09:28:41 +0300
Subject: [PATCH 0132/1890] perf scripts python: call-graph-from-sql.py: Add a
 class for global data

Keep global data in a single object that is easy to pass around as
needed, without polluting the global namespace.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/20181001062853.28285-8-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../scripts/python/call-graph-from-sql.py     | 26 +++++++++++++++----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/tools/perf/scripts/python/call-graph-from-sql.py b/tools/perf/scripts/python/call-graph-from-sql.py
index 9d056deab2b19..0a4dc13d4818a 100644
--- a/tools/perf/scripts/python/call-graph-from-sql.py
+++ b/tools/perf/scripts/python/call-graph-from-sql.py
@@ -264,17 +264,19 @@ class TreeModel(QAbstractItemModel):
 
 class MainWindow(QMainWindow):
 
-	def __init__(self, db, dbname, parent=None):
+	def __init__(self, glb, parent=None):
 		super(MainWindow, self).__init__(parent)
 
+		self.glb = glb
+
 		self.setObjectName("MainWindow")
-		self.setWindowTitle("Call Graph: " + dbname)
+		self.setWindowTitle("Call Graph: " + glb.dbname)
 		self.move(100, 100)
 		self.resize(800, 600)
 		self.setWindowIcon(self.style().standardIcon(QStyle.SP_ComputerIcon))
 		self.setMinimumSize(200, 100)
 
-		self.model = TreeModel(db)
+		self.model = TreeModel(glb.db)
 
 		self.view = QTreeView()
 		self.view.setModel(self.model)
@@ -284,6 +286,17 @@ class MainWindow(QMainWindow):
 
 		self.setCentralWidget(self.view)
 
+# Global data
+
+class Glb():
+
+	def __init__(self, dbref, db, dbname):
+		self.dbref = dbref
+		self.db = db
+		self.dbname = dbname
+		self.app = None
+		self.mainwindow = None
+
 # Database reference
 
 class DBRef():
@@ -340,9 +353,12 @@ def Main():
 
 	dbref = DBRef(is_sqlite3, dbname)
 	db, dbname = dbref.Open("main")
+	glb = Glb(dbref, db, dbname)
 	app = QApplication(sys.argv)
-	window = MainWindow(db, dbname)
-	window.show()
+	glb.app = app
+	mainwindow = MainWindow(glb)
+	glb.mainwindow = mainwindow
+	mainwindow.show()
 	err = app.exec_()
 	db.close()
 	sys.exit(err)
-- 
GitLab


From e99ef8141a6d97abaf47647cfd0034769144d080 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Mon, 1 Oct 2018 09:28:42 +0300
Subject: [PATCH 0133/1890] perf scripts python: call-graph-from-sql.py: Remove
 use of setObjectName()

The object name is never used, so don't bother setting it.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/20181001062853.28285-9-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/scripts/python/call-graph-from-sql.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tools/perf/scripts/python/call-graph-from-sql.py b/tools/perf/scripts/python/call-graph-from-sql.py
index 0a4dc13d4818a..65c18e351bc40 100644
--- a/tools/perf/scripts/python/call-graph-from-sql.py
+++ b/tools/perf/scripts/python/call-graph-from-sql.py
@@ -269,7 +269,6 @@ class MainWindow(QMainWindow):
 
 		self.glb = glb
 
-		self.setObjectName("MainWindow")
 		self.setWindowTitle("Call Graph: " + glb.dbname)
 		self.move(100, 100)
 		self.resize(800, 600)
-- 
GitLab


From 70d831e85c1bdd87d193e85666bf3aa39aab7f21 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Mon, 1 Oct 2018 09:28:43 +0300
Subject: [PATCH 0134/1890] perf scripts python: call-graph-from-sql.py: Factor
 out CallGraphModel from TreeModel

Factor out CallGraphModel from TreeModel, which paves the way to reuse
TreeModel in future reports.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/20181001062853.28285-10-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../scripts/python/call-graph-from-sql.py     | 90 +++++++++++++------
 1 file changed, 61 insertions(+), 29 deletions(-)

diff --git a/tools/perf/scripts/python/call-graph-from-sql.py b/tools/perf/scripts/python/call-graph-from-sql.py
index 65c18e351bc40..ada486048ad8d 100644
--- a/tools/perf/scripts/python/call-graph-from-sql.py
+++ b/tools/perf/scripts/python/call-graph-from-sql.py
@@ -201,42 +201,47 @@ class TreeItem():
 			self.selectCalls()
 		return self.child_count
 
-	def columnCount(self):
-		return 7
-
-	def columnHeader(self, column):
-		headers = ["Call Path", "Object", "Count ", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "]
-		return headers[column]
+	def hasChildren(self):
+		if not self.query_done:
+			return True
+		return self.child_count > 0
 
 	def getData(self, column):
 		return self.data[column]
 
+# Tree data model
+
 class TreeModel(QAbstractItemModel):
 
-	def __init__(self, db, parent=None):
+	def __init__(self, root, parent=None):
 		super(TreeModel, self).__init__(parent)
-		self.db = db
-		self.root = TreeItem(db, 0, None)
+		self.root = root
+		self.last_row_read = 0
 
-	def columnCount(self, parent):
-		return self.root.columnCount()
-
-	def rowCount(self, parent):
+	def Item(self, parent):
 		if parent.isValid():
-			parent_item = parent.internalPointer()
+			return parent.internalPointer()
 		else:
-			parent_item = self.root
-		return parent_item.childCount()
+			return self.root
+
+	def rowCount(self, parent):
+		result = self.Item(parent).childCount()
+		if result < 0:
+			result = 0
+			self.dataChanged.emit(parent, parent)
+		return result
+
+	def hasChildren(self, parent):
+		return self.Item(parent).hasChildren()
 
 	def headerData(self, section, orientation, role):
 		if role == Qt.TextAlignmentRole:
-			if section > 1:
-				return Qt.AlignRight
+			return self.columnAlignment(section)
 		if role != Qt.DisplayRole:
 			return None
 		if orientation != Qt.Horizontal:
 			return None
-		return self.root.columnHeader(section)
+		return self.columnHeader(section)
 
 	def parent(self, child):
 		child_item = child.internalPointer()
@@ -246,21 +251,48 @@ class TreeModel(QAbstractItemModel):
 		return self.createIndex(parent_item.getRow(), 0, parent_item)
 
 	def index(self, row, column, parent):
-		if parent.isValid():
-			parent_item = parent.internalPointer()
-		else:
-			parent_item = self.root
-		child_item = parent_item.getChildItem(row)
+		child_item = self.Item(parent).getChildItem(row)
 		return self.createIndex(row, column, child_item)
 
+	def DisplayData(self, item, index):
+		return item.getData(index.column())
+
+	def columnAlignment(self, column):
+		return Qt.AlignLeft
+
+	def columnFont(self, column):
+		return None
+
 	def data(self, index, role):
 		if role == Qt.TextAlignmentRole:
-			if index.column() > 1:
-				return Qt.AlignRight
+			return self.columnAlignment(index.column())
+		if role == Qt.FontRole:
+			return self.columnFont(index.column())
 		if role != Qt.DisplayRole:
 			return None
-		index_item = index.internalPointer()
-		return index_item.getData(index.column())
+		item = index.internalPointer()
+		return self.DisplayData(item, index)
+
+# Context-sensitive call graph data model
+
+class CallGraphModel(TreeModel):
+
+	def __init__(self, glb, parent=None):
+		super(CallGraphModel, self).__init__(TreeItem(glb.db, 0, None), parent)
+		self.glb = glb
+
+	def columnCount(self, parent=None):
+		return 7
+
+	def columnHeader(self, column):
+		headers = ["Call Path", "Object", "Count ", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "]
+		return headers[column]
+
+	def columnAlignment(self, column):
+		alignment = [ Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight ]
+		return alignment[column]
+
+# Main window
 
 class MainWindow(QMainWindow):
 
@@ -275,7 +307,7 @@ class MainWindow(QMainWindow):
 		self.setWindowIcon(self.style().standardIcon(QStyle.SP_ComputerIcon))
 		self.setMinimumSize(200, 100)
 
-		self.model = TreeModel(glb.db)
+		self.model = CallGraphModel(glb)
 
 		self.view = QTreeView()
 		self.view.setModel(self.model)
-- 
GitLab


From 4be9ace7e1cdcb44c1fba1fb41ec2b92dda06732 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Mon, 1 Oct 2018 09:28:44 +0300
Subject: [PATCH 0135/1890] perf scripts python: call-graph-from-sql.py: Add
 data helper functions

Add helper functions for a few common cases.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/20181001062853.28285-11-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../scripts/python/call-graph-from-sql.py     | 54 ++++++++++---------
 1 file changed, 29 insertions(+), 25 deletions(-)

diff --git a/tools/perf/scripts/python/call-graph-from-sql.py b/tools/perf/scripts/python/call-graph-from-sql.py
index ada486048ad8d..7f2eabe7dacdc 100644
--- a/tools/perf/scripts/python/call-graph-from-sql.py
+++ b/tools/perf/scripts/python/call-graph-from-sql.py
@@ -52,6 +52,28 @@ from PySide.QtGui import *
 from PySide.QtSql import *
 from decimal import *
 
+# Data formatting helpers
+
+def dsoname(name):
+	if name == "[kernel.kallsyms]":
+		return "[kernel]"
+	return name
+
+# Percent to one decimal place
+
+def PercentToOneDP(n, d):
+	if not d:
+		return "0.0"
+	x = (n * Decimal(100)) / d
+	return str(x.quantize(Decimal(".1"), rounding=ROUND_HALF_UP))
+
+# Helper for queries that must not fail
+
+def QueryExec(query, stmt):
+	ret = query.exec_(stmt)
+	if not ret:
+		raise Exception("Query failed: " + query.lastError().text())
+
 class TreeItem():
 
 	def __init__(self, db, row, parent_item):
@@ -73,9 +95,7 @@ class TreeItem():
 	def setUpRoot(self):
 		self.query_done = True
 		query = QSqlQuery(self.db)
-		ret = query.exec_('SELECT id, comm FROM comms')
-		if not ret:
-			raise Exception("Query failed: " + query.lastError().text())
+		QueryExec(query, 'SELECT id, comm FROM comms')
 		while query.next():
 			if not query.value(0):
 				continue
@@ -91,9 +111,7 @@ class TreeItem():
 		self.child_items = []
 		self.child_count = 0
 		query = QSqlQuery(self.db)
-		ret = query.exec_('SELECT thread_id, ( SELECT pid FROM threads WHERE id = thread_id ), ( SELECT tid FROM threads WHERE id = thread_id ) FROM comm_threads WHERE comm_id = ' + str(comm_id))
-		if not ret:
-			raise Exception("Query failed: " + query.lastError().text())
+		QueryExec(query, 'SELECT thread_id, ( SELECT pid FROM threads WHERE id = thread_id ), ( SELECT tid FROM threads WHERE id = thread_id ) FROM comm_threads WHERE comm_id = ' + str(comm_id))
 		while query.next():
 			child_item = TreeItem(self.db, self.child_count, self)
 			self.child_items.append(child_item)
@@ -114,18 +132,6 @@ class TreeItem():
 	def getRow(self):
 		return self.row
 
-	def timePercent(self, b):
-		if not self.time:
-			return "0.0"
-		x = (b * Decimal(100)) / self.time
-		return str(x.quantize(Decimal('.1'), rounding=ROUND_HALF_UP))
-
-	def branchPercent(self, b):
-		if not self.branch_count:
-			return "0.0"
-		x = (b * Decimal(100)) / self.branch_count
-		return str(x.quantize(Decimal('.1'), rounding=ROUND_HALF_UP))
-
 	def addChild(self, call_path_id, name, dso, count, time, branch_count):
 		child_item = TreeItem(self.db, self.child_count, self)
 		child_item.comm_id = self.comm_id
@@ -134,14 +140,12 @@ class TreeItem():
 		child_item.branch_count = branch_count
 		child_item.time = time
 		child_item.data[0] = name
-		if dso == "[kernel.kallsyms]":
-			dso = "[kernel]"
-		child_item.data[1] = dso
+		child_item.data[1] = dsoname(dso)
 		child_item.data[2] = str(count)
 		child_item.data[3] = str(time)
-		child_item.data[4] = self.timePercent(time)
+		child_item.data[4] = PercentToOneDP(time, self.time)
 		child_item.data[5] = str(branch_count)
-		child_item.data[6] = self.branchPercent(branch_count)
+		child_item.data[6] = PercentToOneDP(branch_count, self.branch_count)
 		self.child_items.append(child_item)
 		self.child_count += 1
 
@@ -189,12 +193,12 @@ class TreeItem():
 			self.branch_count = total_branch_count
 			if self.branch_count:
 				for child_item in self.child_items:
-					child_item.data[6] = self.branchPercent(child_item.branch_count)
+					child_item.data[6] = PercentToOneDP(child_item.branch_count, self.branch_count)
 		if total_time > self.time:
 			self.time = total_time
 			if self.time:
 				for child_item in self.child_items:
-					child_item.data[4] = self.timePercent(child_item.time)
+					child_item.data[4] = PercentToOneDP(child_item.time, self.time)
 
 	def childCount(self):
 		if not self.query_done:
-- 
GitLab


From 341e73cbd3019d350d1271803b45d84af88f2408 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Mon, 1 Oct 2018 09:28:45 +0300
Subject: [PATCH 0136/1890] perf scripts python: call-graph-from-sql.py:
 Refactor TreeItem class

class TreeItem represents items at all levels of the call-graph tree.
However, not all the levels represent the same data i.e. the top-level is
comms, the next level is threads, and subsequent levels are functions.
Consequently it is simpler to have separate classes for different levels
with commonality in a base class. Refactor TreeItem class accordingly.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/20181001062853.28285-12-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../scripts/python/call-graph-from-sql.py     | 273 +++++++++---------
 1 file changed, 133 insertions(+), 140 deletions(-)

diff --git a/tools/perf/scripts/python/call-graph-from-sql.py b/tools/perf/scripts/python/call-graph-from-sql.py
index 7f2eabe7dacdc..ee1085169a3ed 100644
--- a/tools/perf/scripts/python/call-graph-from-sql.py
+++ b/tools/perf/scripts/python/call-graph-from-sql.py
@@ -74,145 +74,6 @@ def QueryExec(query, stmt):
 	if not ret:
 		raise Exception("Query failed: " + query.lastError().text())
 
-class TreeItem():
-
-	def __init__(self, db, row, parent_item):
-		self.db = db
-		self.row = row
-		self.parent_item = parent_item
-		self.query_done = False;
-		self.child_count = 0
-		self.child_items = []
-		self.data = ["", "", "", "", "", "", ""]
-		self.comm_id = 0
-		self.thread_id = 0
-		self.call_path_id = 1
-		self.branch_count = 0
-		self.time = 0
-		if not parent_item:
-			self.setUpRoot()
-
-	def setUpRoot(self):
-		self.query_done = True
-		query = QSqlQuery(self.db)
-		QueryExec(query, 'SELECT id, comm FROM comms')
-		while query.next():
-			if not query.value(0):
-				continue
-			child_item = TreeItem(self.db, self.child_count, self)
-			self.child_items.append(child_item)
-			self.child_count += 1
-			child_item.setUpLevel1(query.value(0), query.value(1))
-
-	def setUpLevel1(self, comm_id, comm):
-		self.query_done = True;
-		self.comm_id = comm_id
-		self.data[0] = comm
-		self.child_items = []
-		self.child_count = 0
-		query = QSqlQuery(self.db)
-		QueryExec(query, 'SELECT thread_id, ( SELECT pid FROM threads WHERE id = thread_id ), ( SELECT tid FROM threads WHERE id = thread_id ) FROM comm_threads WHERE comm_id = ' + str(comm_id))
-		while query.next():
-			child_item = TreeItem(self.db, self.child_count, self)
-			self.child_items.append(child_item)
-			self.child_count += 1
-			child_item.setUpLevel2(comm_id, query.value(0), query.value(1), query.value(2))
-
-	def setUpLevel2(self, comm_id, thread_id, pid, tid):
-		self.comm_id = comm_id
-		self.thread_id = thread_id
-		self.data[0] = str(pid) + ":" + str(tid)
-
-	def getChildItem(self, row):
-		return self.child_items[row]
-
-	def getParentItem(self):
-		return self.parent_item
-
-	def getRow(self):
-		return self.row
-
-	def addChild(self, call_path_id, name, dso, count, time, branch_count):
-		child_item = TreeItem(self.db, self.child_count, self)
-		child_item.comm_id = self.comm_id
-		child_item.thread_id = self.thread_id
-		child_item.call_path_id = call_path_id
-		child_item.branch_count = branch_count
-		child_item.time = time
-		child_item.data[0] = name
-		child_item.data[1] = dsoname(dso)
-		child_item.data[2] = str(count)
-		child_item.data[3] = str(time)
-		child_item.data[4] = PercentToOneDP(time, self.time)
-		child_item.data[5] = str(branch_count)
-		child_item.data[6] = PercentToOneDP(branch_count, self.branch_count)
-		self.child_items.append(child_item)
-		self.child_count += 1
-
-	def selectCalls(self):
-		self.query_done = True;
-		query = QSqlQuery(self.db)
-		ret = query.exec_('SELECT id, call_path_id, branch_count, call_time, return_time, '
-				  '( SELECT name FROM symbols WHERE id = ( SELECT symbol_id FROM call_paths WHERE id = call_path_id ) ), '
-				  '( SELECT short_name FROM dsos WHERE id = ( SELECT dso_id FROM symbols WHERE id = ( SELECT symbol_id FROM call_paths WHERE id = call_path_id ) ) ), '
-				  '( SELECT ip FROM call_paths where id = call_path_id ) '
-				  'FROM calls WHERE parent_call_path_id = ' + str(self.call_path_id) + ' AND comm_id = ' + str(self.comm_id) + ' AND thread_id = ' + str(self.thread_id) +
-				  ' ORDER BY call_path_id')
-		if not ret:
-			raise Exception("Query failed: " + query.lastError().text())
-		last_call_path_id = 0
-		name = ""
-		dso = ""
-		count = 0
-		branch_count = 0
-		total_branch_count = 0
-		time = 0
-		total_time = 0
-		while query.next():
-			if query.value(1) == last_call_path_id:
-				count += 1
-				branch_count += query.value(2)
-				time += query.value(4) - query.value(3)
-			else:
-				if count:
-					self.addChild(last_call_path_id, name, dso, count, time, branch_count)
-				last_call_path_id = query.value(1)
-				name = query.value(5)
-				dso = query.value(6)
-				count = 1
-				total_branch_count += branch_count
-				total_time += time
-				branch_count = query.value(2)
-				time = query.value(4) - query.value(3)
-		if count:
-			self.addChild(last_call_path_id, name, dso, count, time, branch_count)
-		total_branch_count += branch_count
-		total_time += time
-		# Top level does not have time or branch count, so fix that here
-		if total_branch_count > self.branch_count:
-			self.branch_count = total_branch_count
-			if self.branch_count:
-				for child_item in self.child_items:
-					child_item.data[6] = PercentToOneDP(child_item.branch_count, self.branch_count)
-		if total_time > self.time:
-			self.time = total_time
-			if self.time:
-				for child_item in self.child_items:
-					child_item.data[4] = PercentToOneDP(child_item.time, self.time)
-
-	def childCount(self):
-		if not self.query_done:
-			self.selectCalls()
-		return self.child_count
-
-	def hasChildren(self):
-		if not self.query_done:
-			return True
-		return self.child_count > 0
-
-	def getData(self, column):
-		return self.data[column]
-
 # Tree data model
 
 class TreeModel(QAbstractItemModel):
@@ -277,12 +138,144 @@ class TreeModel(QAbstractItemModel):
 		item = index.internalPointer()
 		return self.DisplayData(item, index)
 
+# Context-sensitive call graph data model item base
+
+class CallGraphLevelItemBase(object):
+
+	def __init__(self, glb, row, parent_item):
+		self.glb = glb
+		self.row = row
+		self.parent_item = parent_item
+		self.query_done = False;
+		self.child_count = 0
+		self.child_items = []
+
+	def getChildItem(self, row):
+		return self.child_items[row]
+
+	def getParentItem(self):
+		return self.parent_item
+
+	def getRow(self):
+		return self.row
+
+	def childCount(self):
+		if not self.query_done:
+			self.Select()
+			if not self.child_count:
+				return -1
+		return self.child_count
+
+	def hasChildren(self):
+		if not self.query_done:
+			return True
+		return self.child_count > 0
+
+	def getData(self, column):
+		return self.data[column]
+
+# Context-sensitive call graph data model level 2+ item base
+
+class CallGraphLevelTwoPlusItemBase(CallGraphLevelItemBase):
+
+	def __init__(self, glb, row, comm_id, thread_id, call_path_id, time, branch_count, parent_item):
+		super(CallGraphLevelTwoPlusItemBase, self).__init__(glb, row, parent_item)
+		self.comm_id = comm_id
+		self.thread_id = thread_id
+		self.call_path_id = call_path_id
+		self.branch_count = branch_count
+		self.time = time
+
+	def Select(self):
+		self.query_done = True;
+		query = QSqlQuery(self.glb.db)
+		QueryExec(query, "SELECT call_path_id, name, short_name, COUNT(calls.id), SUM(return_time - call_time), SUM(branch_count)"
+					" FROM calls"
+					" INNER JOIN call_paths ON calls.call_path_id = call_paths.id"
+					" INNER JOIN symbols ON call_paths.symbol_id = symbols.id"
+					" INNER JOIN dsos ON symbols.dso_id = dsos.id"
+					" WHERE parent_call_path_id = " + str(self.call_path_id) +
+					" AND comm_id = " + str(self.comm_id) +
+					" AND thread_id = " + str(self.thread_id) +
+					" GROUP BY call_path_id, name, short_name"
+					" ORDER BY call_path_id")
+		while query.next():
+			child_item = CallGraphLevelThreeItem(self.glb, self.child_count, self.comm_id, self.thread_id, query.value(0), query.value(1), query.value(2), query.value(3), int(query.value(4)), int(query.value(5)), self)
+			self.child_items.append(child_item)
+			self.child_count += 1
+
+# Context-sensitive call graph data model level three item
+
+class CallGraphLevelThreeItem(CallGraphLevelTwoPlusItemBase):
+
+	def __init__(self, glb, row, comm_id, thread_id, call_path_id, name, dso, count, time, branch_count, parent_item):
+		super(CallGraphLevelThreeItem, self).__init__(glb, row, comm_id, thread_id, call_path_id, time, branch_count, parent_item)
+		dso = dsoname(dso)
+		self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(branch_count), PercentToOneDP(branch_count, parent_item.branch_count) ]
+		self.dbid = call_path_id
+
+# Context-sensitive call graph data model level two item
+
+class CallGraphLevelTwoItem(CallGraphLevelTwoPlusItemBase):
+
+	def __init__(self, glb, row, comm_id, thread_id, pid, tid, parent_item):
+		super(CallGraphLevelTwoItem, self).__init__(glb, row, comm_id, thread_id, 1, 0, 0, parent_item)
+		self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", ""]
+		self.dbid = thread_id
+
+	def Select(self):
+		super(CallGraphLevelTwoItem, self).Select()
+		for child_item in self.child_items:
+			self.time += child_item.time
+			self.branch_count += child_item.branch_count
+		for child_item in self.child_items:
+			child_item.data[4] = PercentToOneDP(child_item.time, self.time)
+			child_item.data[6] = PercentToOneDP(child_item.branch_count, self.branch_count)
+
+# Context-sensitive call graph data model level one item
+
+class CallGraphLevelOneItem(CallGraphLevelItemBase):
+
+	def __init__(self, glb, row, comm_id, comm, parent_item):
+		super(CallGraphLevelOneItem, self).__init__(glb, row, parent_item)
+		self.data = [comm, "", "", "", "", "", ""]
+		self.dbid = comm_id
+
+	def Select(self):
+		self.query_done = True;
+		query = QSqlQuery(self.glb.db)
+		QueryExec(query, "SELECT thread_id, pid, tid"
+					" FROM comm_threads"
+					" INNER JOIN threads ON thread_id = threads.id"
+					" WHERE comm_id = " + str(self.dbid))
+		while query.next():
+			child_item = CallGraphLevelTwoItem(self.glb, self.child_count, self.dbid, query.value(0), query.value(1), query.value(2), self)
+			self.child_items.append(child_item)
+			self.child_count += 1
+
+# Context-sensitive call graph data model root item
+
+class CallGraphRootItem(CallGraphLevelItemBase):
+
+	def __init__(self, glb):
+		super(CallGraphRootItem, self).__init__(glb, 0, None)
+		self.dbid = 0
+		self.query_done = True;
+		query = QSqlQuery(glb.db)
+		QueryExec(query, "SELECT id, comm FROM comms")
+		while query.next():
+			if not query.value(0):
+				continue
+			child_item = CallGraphLevelOneItem(glb, self.child_count, query.value(0), query.value(1), self)
+			self.child_items.append(child_item)
+			self.child_count += 1
+
 # Context-sensitive call graph data model
 
 class CallGraphModel(TreeModel):
 
 	def __init__(self, glb, parent=None):
-		super(CallGraphModel, self).__init__(TreeItem(glb.db, 0, None), parent)
+		super(CallGraphModel, self).__init__(CallGraphRootItem(glb), parent)
 		self.glb = glb
 
 	def columnCount(self, parent=None):
-- 
GitLab


From 031c2a004ba75a4f8f2a6d0a7ca6f2fe5912de22 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Mon, 1 Oct 2018 09:28:46 +0300
Subject: [PATCH 0137/1890] perf scripts python: call-graph-from-sql.py: Rename
 to exported-sql-viewer.py

Additional reports will be added to the script so rename to reflect the
more general purpose.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/20181001062853.28285-13-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/intel-pt.txt                       | 2 +-
 tools/perf/scripts/python/export-to-postgresql.py           | 2 +-
 tools/perf/scripts/python/export-to-sqlite.py               | 2 +-
 .../{call-graph-from-sql.py => exported-sql-viewer.py}      | 6 +++---
 4 files changed, 6 insertions(+), 6 deletions(-)
 rename tools/perf/scripts/python/{call-graph-from-sql.py => exported-sql-viewer.py} (98%)
 mode change 100644 => 100755

diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt
index 76971d2e41645..115eaacc455fd 100644
--- a/tools/perf/Documentation/intel-pt.txt
+++ b/tools/perf/Documentation/intel-pt.txt
@@ -106,7 +106,7 @@ in transaction, respectively.
 While it is possible to create scripts to analyze the data, an alternative
 approach is available to export the data to a sqlite or postgresql database.
 Refer to script export-to-sqlite.py or export-to-postgresql.py for more details,
-and to script call-graph-from-sql.py for an example of using the database.
+and to script exported-sql-viewer.py for an example of using the database.
 
 There is also script intel-pt-events.py which provides an example of how to
 unpack the raw data for power events and PTWRITE.
diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py
index e46f51b175131..0564dd7377f22 100644
--- a/tools/perf/scripts/python/export-to-postgresql.py
+++ b/tools/perf/scripts/python/export-to-postgresql.py
@@ -59,7 +59,7 @@ import datetime
 #	pt_example=# \q
 #
 # An example of using the database is provided by the script
-# call-graph-from-sql.py.  Refer to that script for details.
+# exported-sql-viewer.py.  Refer to that script for details.
 #
 # Tables:
 #
diff --git a/tools/perf/scripts/python/export-to-sqlite.py b/tools/perf/scripts/python/export-to-sqlite.py
index e4bb82c8aba9e..245caf2643ed1 100644
--- a/tools/perf/scripts/python/export-to-sqlite.py
+++ b/tools/perf/scripts/python/export-to-sqlite.py
@@ -40,7 +40,7 @@ import datetime
 #	sqlite> .quit
 #
 # An example of using the database is provided by the script
-# call-graph-from-sql.py.  Refer to that script for details.
+# exported-sql-viewer.py.  Refer to that script for details.
 #
 # The database structure is practically the same as created by the script
 # export-to-postgresql.py. Refer to that script for details.  A notable
diff --git a/tools/perf/scripts/python/call-graph-from-sql.py b/tools/perf/scripts/python/exported-sql-viewer.py
old mode 100644
new mode 100755
similarity index 98%
rename from tools/perf/scripts/python/call-graph-from-sql.py
rename to tools/perf/scripts/python/exported-sql-viewer.py
index ee1085169a3ed..03e7a1de7f31d
--- a/tools/perf/scripts/python/call-graph-from-sql.py
+++ b/tools/perf/scripts/python/exported-sql-viewer.py
@@ -10,12 +10,12 @@
 # Following on from the example in the export scripts, a
 # call-graph can be displayed for the pt_example database like this:
 #
-#	python tools/perf/scripts/python/call-graph-from-sql.py pt_example
+#	python tools/perf/scripts/python/exported-sql-viewer.py pt_example
 #
 # Note that for PostgreSQL, this script supports connecting to remote databases
 # by setting hostname, port, username, password, and dbname e.g.
 #
-#	python tools/perf/scripts/python/call-graph-from-sql.py "hostname=myhost username=myuser password=mypassword dbname=pt_example"
+#	python tools/perf/scripts/python/exported-sql-viewer.py "hostname=myhost username=myuser password=mypassword dbname=pt_example"
 #
 # The result is a GUI window with a tree representing a context-sensitive
 # call-graph.  Expanding a couple of levels of the tree and adjusting column
@@ -365,7 +365,7 @@ class DBRef():
 
 def Main():
 	if (len(sys.argv) < 2):
-		print >> sys.stderr, "Usage is: call-graph-from-sql.py <database name>"
+		print >> sys.stderr, "Usage is: exported-sql-viewer.py <database name>"
 		raise Exception("Too few arguments")
 
 	dbname = sys.argv[1]
-- 
GitLab


From 1beb5c7b07040b70975a2ae0e90b87d412fabf06 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Mon, 1 Oct 2018 09:28:47 +0300
Subject: [PATCH 0138/1890] perf scripts python: exported-sql-viewer.py: Add
 support for multiple sub-windows

Use Qt MDI (multiple document interface) to support multiple sub-windows.
Put the data model in a cache so that each sub-window can share the same
data. This allows mutiple views of the call-graph at the same time and
paves the way to add more reports.

Committer testing:

Starts with a "File  Reports  Windows" main menu, from the "Reports" I
can get what was available up to now, the "Context-Sensitivi Call Graph"
option.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/20181001062853.28285-14-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../scripts/python/exported-sql-viewer.py     | 182 +++++++++++++++++-
 1 file changed, 173 insertions(+), 9 deletions(-)

diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py
index 03e7a1de7f31d..c2f44351821ea 100755
--- a/tools/perf/scripts/python/exported-sql-viewer.py
+++ b/tools/perf/scripts/python/exported-sql-viewer.py
@@ -47,6 +47,8 @@
 #       functions that it calls
 
 import sys
+import weakref
+import threading
 from PySide.QtCore import *
 from PySide.QtGui import *
 from PySide.QtSql import *
@@ -138,6 +140,23 @@ class TreeModel(QAbstractItemModel):
 		item = index.internalPointer()
 		return self.DisplayData(item, index)
 
+# Model cache
+
+model_cache = weakref.WeakValueDictionary()
+model_cache_lock = threading.Lock()
+
+def LookupCreateModel(model_name, create_fn):
+	model_cache_lock.acquire()
+	try:
+		model = model_cache[model_name]
+	except:
+		model = None
+	if model is None:
+		model = create_fn()
+		model_cache[model_name] = model
+	model_cache_lock.release()
+	return model
+
 # Context-sensitive call graph data model item base
 
 class CallGraphLevelItemBase(object):
@@ -289,6 +308,144 @@ class CallGraphModel(TreeModel):
 		alignment = [ Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight ]
 		return alignment[column]
 
+# Context-sensitive call graph window
+
+class CallGraphWindow(QMdiSubWindow):
+
+	def __init__(self, glb, parent=None):
+		super(CallGraphWindow, self).__init__(parent)
+
+		self.model = LookupCreateModel("Context-Sensitive Call Graph", lambda x=glb: CallGraphModel(x))
+
+		self.view = QTreeView()
+		self.view.setModel(self.model)
+
+		for c, w in ((0, 250), (1, 100), (2, 60), (3, 70), (4, 70), (5, 100)):
+			self.view.setColumnWidth(c, w)
+
+		self.setWidget(self.view)
+
+		AddSubWindow(glb.mainwindow.mdi_area, self, "Context-Sensitive Call Graph")
+
+# Action Definition
+
+def CreateAction(label, tip, callback, parent=None, shortcut=None):
+	action = QAction(label, parent)
+	if shortcut != None:
+		action.setShortcuts(shortcut)
+	action.setStatusTip(tip)
+	action.triggered.connect(callback)
+	return action
+
+# Typical application actions
+
+def CreateExitAction(app, parent=None):
+	return CreateAction("&Quit", "Exit the application", app.closeAllWindows, parent, QKeySequence.Quit)
+
+# Typical MDI actions
+
+def CreateCloseActiveWindowAction(mdi_area):
+	return CreateAction("Cl&ose", "Close the active window", mdi_area.closeActiveSubWindow, mdi_area)
+
+def CreateCloseAllWindowsAction(mdi_area):
+	return CreateAction("Close &All", "Close all the windows", mdi_area.closeAllSubWindows, mdi_area)
+
+def CreateTileWindowsAction(mdi_area):
+	return CreateAction("&Tile", "Tile the windows", mdi_area.tileSubWindows, mdi_area)
+
+def CreateCascadeWindowsAction(mdi_area):
+	return CreateAction("&Cascade", "Cascade the windows", mdi_area.cascadeSubWindows, mdi_area)
+
+def CreateNextWindowAction(mdi_area):
+	return CreateAction("Ne&xt", "Move the focus to the next window", mdi_area.activateNextSubWindow, mdi_area, QKeySequence.NextChild)
+
+def CreatePreviousWindowAction(mdi_area):
+	return CreateAction("Pre&vious", "Move the focus to the previous window", mdi_area.activatePreviousSubWindow, mdi_area, QKeySequence.PreviousChild)
+
+# Typical MDI window menu
+
+class WindowMenu():
+
+	def __init__(self, mdi_area, menu):
+		self.mdi_area = mdi_area
+		self.window_menu = menu.addMenu("&Windows")
+		self.close_active_window = CreateCloseActiveWindowAction(mdi_area)
+		self.close_all_windows = CreateCloseAllWindowsAction(mdi_area)
+		self.tile_windows = CreateTileWindowsAction(mdi_area)
+		self.cascade_windows = CreateCascadeWindowsAction(mdi_area)
+		self.next_window = CreateNextWindowAction(mdi_area)
+		self.previous_window = CreatePreviousWindowAction(mdi_area)
+		self.window_menu.aboutToShow.connect(self.Update)
+
+	def Update(self):
+		self.window_menu.clear()
+		sub_window_count = len(self.mdi_area.subWindowList())
+		have_sub_windows = sub_window_count != 0
+		self.close_active_window.setEnabled(have_sub_windows)
+		self.close_all_windows.setEnabled(have_sub_windows)
+		self.tile_windows.setEnabled(have_sub_windows)
+		self.cascade_windows.setEnabled(have_sub_windows)
+		self.next_window.setEnabled(have_sub_windows)
+		self.previous_window.setEnabled(have_sub_windows)
+		self.window_menu.addAction(self.close_active_window)
+		self.window_menu.addAction(self.close_all_windows)
+		self.window_menu.addSeparator()
+		self.window_menu.addAction(self.tile_windows)
+		self.window_menu.addAction(self.cascade_windows)
+		self.window_menu.addSeparator()
+		self.window_menu.addAction(self.next_window)
+		self.window_menu.addAction(self.previous_window)
+		if sub_window_count == 0:
+			return
+		self.window_menu.addSeparator()
+		nr = 1
+		for sub_window in self.mdi_area.subWindowList():
+			label = str(nr) + " " + sub_window.name
+			if nr < 10:
+				label = "&" + label
+			action = self.window_menu.addAction(label)
+			action.setCheckable(True)
+			action.setChecked(sub_window == self.mdi_area.activeSubWindow())
+			action.triggered.connect(lambda x=nr: self.setActiveSubWindow(x))
+			self.window_menu.addAction(action)
+			nr += 1
+
+	def setActiveSubWindow(self, nr):
+		self.mdi_area.setActiveSubWindow(self.mdi_area.subWindowList()[nr - 1])
+
+# Unique name for sub-windows
+
+def NumberedWindowName(name, nr):
+	if nr > 1:
+		name += " <" + str(nr) + ">"
+	return name
+
+def UniqueSubWindowName(mdi_area, name):
+	nr = 1
+	while True:
+		unique_name = NumberedWindowName(name, nr)
+		ok = True
+		for sub_window in mdi_area.subWindowList():
+			if sub_window.name == unique_name:
+				ok = False
+				break
+		if ok:
+			return unique_name
+		nr += 1
+
+# Add a sub-window
+
+def AddSubWindow(mdi_area, sub_window, name):
+	unique_name = UniqueSubWindowName(mdi_area, name)
+	sub_window.setMinimumSize(200, 100)
+	sub_window.resize(800, 600)
+	sub_window.setWindowTitle(unique_name)
+	sub_window.setAttribute(Qt.WA_DeleteOnClose)
+	sub_window.setWindowIcon(sub_window.style().standardIcon(QStyle.SP_FileIcon))
+	sub_window.name = unique_name
+	mdi_area.addSubWindow(sub_window)
+	sub_window.show()
+
 # Main window
 
 class MainWindow(QMainWindow):
@@ -298,21 +455,28 @@ class MainWindow(QMainWindow):
 
 		self.glb = glb
 
-		self.setWindowTitle("Call Graph: " + glb.dbname)
-		self.move(100, 100)
-		self.resize(800, 600)
+		self.setWindowTitle("Exported SQL Viewer: " + glb.dbname)
 		self.setWindowIcon(self.style().standardIcon(QStyle.SP_ComputerIcon))
 		self.setMinimumSize(200, 100)
 
-		self.model = CallGraphModel(glb)
+		self.mdi_area = QMdiArea()
+		self.mdi_area.setHorizontalScrollBarPolicy(Qt.ScrollBarAsNeeded)
+		self.mdi_area.setVerticalScrollBarPolicy(Qt.ScrollBarAsNeeded)
 
-		self.view = QTreeView()
-		self.view.setModel(self.model)
+		self.setCentralWidget(self.mdi_area)
 
-		for c, w in ((0, 250), (1, 100), (2, 60), (3, 70), (4, 70), (5, 100)):
-			self.view.setColumnWidth(c, w)
+		menu = self.menuBar()
+
+		file_menu = menu.addMenu("&File")
+		file_menu.addAction(CreateExitAction(glb.app, self))
+
+		reports_menu = menu.addMenu("&Reports")
+		reports_menu.addAction(CreateAction("Context-Sensitive Call &Graph", "Create a new window containing a context-sensitive call graph", self.NewCallGraph, self))
+
+		self.window_menu = WindowMenu(self.mdi_area, menu)
 
-		self.setCentralWidget(self.view)
+	def NewCallGraph(self):
+		CallGraphWindow(self.glb, self)
 
 # Global data
 
-- 
GitLab


From ebd70c7dc2f5f57315e19d959ddc9cb05e9d48e1 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Mon, 1 Oct 2018 09:28:48 +0300
Subject: [PATCH 0139/1890] perf scripts python: exported-sql-viewer.py: Add
 ability to find symbols in the call-graph

Add a Find bar that appears at the bottom of the call-graph window.

Committer testing:

Using:

  python tools/perf/scripts/python/exported-sql-viewer.py pt_example branches calls

Using the database built in the first "Committer Testing" section in
this patch series I was able to:

  "Reports"
      "Context-Sensitive Call Graphs"
           Control+F or select "Edit" in the top menu then "Find"
                __poll<ENTER>

and find the first place where the "__poll" function appears, then
press the down arrow in the lower right corner and go to the next, etc.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/20181001062853.28285-15-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../scripts/python/exported-sql-viewer.py     | 306 +++++++++++++++++-
 1 file changed, 305 insertions(+), 1 deletion(-)

diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py
index c2f44351821ea..0386a600ffc73 100755
--- a/tools/perf/scripts/python/exported-sql-viewer.py
+++ b/tools/perf/scripts/python/exported-sql-viewer.py
@@ -49,6 +49,7 @@
 import sys
 import weakref
 import threading
+import string
 from PySide.QtCore import *
 from PySide.QtGui import *
 from PySide.QtSql import *
@@ -76,6 +77,27 @@ def QueryExec(query, stmt):
 	if not ret:
 		raise Exception("Query failed: " + query.lastError().text())
 
+# Background thread
+
+class Thread(QThread):
+
+	done = Signal(object)
+
+	def __init__(self, task, param=None, parent=None):
+		super(Thread, self).__init__(parent)
+		self.task = task
+		self.param = param
+
+	def run(self):
+		while True:
+			if self.param is None:
+				done, result = self.task()
+			else:
+				done, result = self.task(self.param)
+			self.done.emit(result)
+			if done:
+				break
+
 # Tree data model
 
 class TreeModel(QAbstractItemModel):
@@ -157,6 +179,125 @@ def LookupCreateModel(model_name, create_fn):
 	model_cache_lock.release()
 	return model
 
+# Find bar
+
+class FindBar():
+
+	def __init__(self, parent, finder, is_reg_expr=False):
+		self.finder = finder
+		self.context = []
+		self.last_value = None
+		self.last_pattern = None
+
+		label = QLabel("Find:")
+		label.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed)
+
+		self.textbox = QComboBox()
+		self.textbox.setEditable(True)
+		self.textbox.currentIndexChanged.connect(self.ValueChanged)
+
+		self.progress = QProgressBar()
+		self.progress.setRange(0, 0)
+		self.progress.hide()
+
+		if is_reg_expr:
+			self.pattern = QCheckBox("Regular Expression")
+		else:
+			self.pattern = QCheckBox("Pattern")
+		self.pattern.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed)
+
+		self.next_button = QToolButton()
+		self.next_button.setIcon(parent.style().standardIcon(QStyle.SP_ArrowDown))
+		self.next_button.released.connect(lambda: self.NextPrev(1))
+
+		self.prev_button = QToolButton()
+		self.prev_button.setIcon(parent.style().standardIcon(QStyle.SP_ArrowUp))
+		self.prev_button.released.connect(lambda: self.NextPrev(-1))
+
+		self.close_button = QToolButton()
+		self.close_button.setIcon(parent.style().standardIcon(QStyle.SP_DockWidgetCloseButton))
+		self.close_button.released.connect(self.Deactivate)
+
+		self.hbox = QHBoxLayout()
+		self.hbox.setContentsMargins(0, 0, 0, 0)
+
+		self.hbox.addWidget(label)
+		self.hbox.addWidget(self.textbox)
+		self.hbox.addWidget(self.progress)
+		self.hbox.addWidget(self.pattern)
+		self.hbox.addWidget(self.next_button)
+		self.hbox.addWidget(self.prev_button)
+		self.hbox.addWidget(self.close_button)
+
+		self.bar = QWidget()
+		self.bar.setLayout(self.hbox);
+		self.bar.hide()
+
+	def Widget(self):
+		return self.bar
+
+	def Activate(self):
+		self.bar.show()
+		self.textbox.setFocus()
+
+	def Deactivate(self):
+		self.bar.hide()
+
+	def Busy(self):
+		self.textbox.setEnabled(False)
+		self.pattern.hide()
+		self.next_button.hide()
+		self.prev_button.hide()
+		self.progress.show()
+
+	def Idle(self):
+		self.textbox.setEnabled(True)
+		self.progress.hide()
+		self.pattern.show()
+		self.next_button.show()
+		self.prev_button.show()
+
+	def Find(self, direction):
+		value = self.textbox.currentText()
+		pattern = self.pattern.isChecked()
+		self.last_value = value
+		self.last_pattern = pattern
+		self.finder.Find(value, direction, pattern, self.context)
+
+	def ValueChanged(self):
+		value = self.textbox.currentText()
+		pattern = self.pattern.isChecked()
+		index = self.textbox.currentIndex()
+		data = self.textbox.itemData(index)
+		# Store the pattern in the combo box to keep it with the text value
+		if data == None:
+			self.textbox.setItemData(index, pattern)
+		else:
+			self.pattern.setChecked(data)
+		self.Find(0)
+
+	def NextPrev(self, direction):
+		value = self.textbox.currentText()
+		pattern = self.pattern.isChecked()
+		if value != self.last_value:
+			index = self.textbox.findText(value)
+			# Allow for a button press before the value has been added to the combo box
+			if index < 0:
+				index = self.textbox.count()
+				self.textbox.addItem(value, pattern)
+				self.textbox.setCurrentIndex(index)
+				return
+			else:
+				self.textbox.setItemData(index, pattern)
+		elif pattern != self.last_pattern:
+			# Keep the pattern recorded in the combo box up to date
+			index = self.textbox.currentIndex()
+			self.textbox.setItemData(index, pattern)
+		self.Find(direction)
+
+	def NotFound(self):
+		QMessageBox.information(self.bar, "Find", "'" + self.textbox.currentText() + "' not found")
+
 # Context-sensitive call graph data model item base
 
 class CallGraphLevelItemBase(object):
@@ -308,6 +449,123 @@ class CallGraphModel(TreeModel):
 		alignment = [ Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight ]
 		return alignment[column]
 
+	def FindSelect(self, value, pattern, query):
+		if pattern:
+			# postgresql and sqlite pattern patching differences:
+			#   postgresql LIKE is case sensitive but sqlite LIKE is not
+			#   postgresql LIKE allows % and _ to be escaped with \ but sqlite LIKE does not
+			#   postgresql supports ILIKE which is case insensitive
+			#   sqlite supports GLOB (text only) which uses * and ? and is case sensitive
+			if not self.glb.dbref.is_sqlite3:
+				# Escape % and _
+				s = value.replace("%", "\%")
+				s = s.replace("_", "\_")
+				# Translate * and ? into SQL LIKE pattern characters % and _
+				trans = string.maketrans("*?", "%_")
+				match = " LIKE '" + str(s).translate(trans) + "'"
+			else:
+				match = " GLOB '" + str(value) + "'"
+		else:
+			match = " = '" + str(value) + "'"
+		QueryExec(query, "SELECT call_path_id, comm_id, thread_id"
+						" FROM calls"
+						" INNER JOIN call_paths ON calls.call_path_id = call_paths.id"
+						" INNER JOIN symbols ON call_paths.symbol_id = symbols.id"
+						" WHERE symbols.name" + match +
+						" GROUP BY comm_id, thread_id, call_path_id"
+						" ORDER BY comm_id, thread_id, call_path_id")
+
+	def FindPath(self, query):
+		# Turn the query result into a list of ids that the tree view can walk
+		# to open the tree at the right place.
+		ids = []
+		parent_id = query.value(0)
+		while parent_id:
+			ids.insert(0, parent_id)
+			q2 = QSqlQuery(self.glb.db)
+			QueryExec(q2, "SELECT parent_id"
+					" FROM call_paths"
+					" WHERE id = " + str(parent_id))
+			if not q2.next():
+				break
+			parent_id = q2.value(0)
+		# The call path root is not used
+		if ids[0] == 1:
+			del ids[0]
+		ids.insert(0, query.value(2))
+		ids.insert(0, query.value(1))
+		return ids
+
+	def Found(self, query, found):
+		if found:
+			return self.FindPath(query)
+		return []
+
+	def FindValue(self, value, pattern, query, last_value, last_pattern):
+		if last_value == value and pattern == last_pattern:
+			found = query.first()
+		else:
+			self.FindSelect(value, pattern, query)
+			found = query.next()
+		return self.Found(query, found)
+
+	def FindNext(self, query):
+		found = query.next()
+		if not found:
+			found = query.first()
+		return self.Found(query, found)
+
+	def FindPrev(self, query):
+		found = query.previous()
+		if not found:
+			found = query.last()
+		return self.Found(query, found)
+
+	def FindThread(self, c):
+		if c.direction == 0 or c.value != c.last_value or c.pattern != c.last_pattern:
+			ids = self.FindValue(c.value, c.pattern, c.query, c.last_value, c.last_pattern)
+		elif c.direction > 0:
+			ids = self.FindNext(c.query)
+		else:
+			ids = self.FindPrev(c.query)
+		return (True, ids)
+
+	def Find(self, value, direction, pattern, context, callback):
+		class Context():
+			def __init__(self, *x):
+				self.value, self.direction, self.pattern, self.query, self.last_value, self.last_pattern = x
+			def Update(self, *x):
+				self.value, self.direction, self.pattern, self.last_value, self.last_pattern = x + (self.value, self.pattern)
+		if len(context):
+			context[0].Update(value, direction, pattern)
+		else:
+			context.append(Context(value, direction, pattern, QSqlQuery(self.glb.db), None, None))
+		# Use a thread so the UI is not blocked during the SELECT
+		thread = Thread(self.FindThread, context[0])
+		thread.done.connect(lambda ids, t=thread, c=callback: self.FindDone(t, c, ids), Qt.QueuedConnection)
+		thread.start()
+
+	def FindDone(self, thread, callback, ids):
+		callback(ids)
+
+# Vertical widget layout
+
+class VBox():
+
+	def __init__(self, w1, w2, w3=None):
+		self.vbox = QWidget()
+		self.vbox.setLayout(QVBoxLayout());
+
+		self.vbox.layout().setContentsMargins(0, 0, 0, 0)
+
+		self.vbox.layout().addWidget(w1)
+		self.vbox.layout().addWidget(w2)
+		if w3:
+			self.vbox.layout().addWidget(w3)
+
+	def Widget(self):
+		return self.vbox
+
 # Context-sensitive call graph window
 
 class CallGraphWindow(QMdiSubWindow):
@@ -323,10 +581,45 @@ class CallGraphWindow(QMdiSubWindow):
 		for c, w in ((0, 250), (1, 100), (2, 60), (3, 70), (4, 70), (5, 100)):
 			self.view.setColumnWidth(c, w)
 
-		self.setWidget(self.view)
+		self.find_bar = FindBar(self, self)
+
+		self.vbox = VBox(self.view, self.find_bar.Widget())
+
+		self.setWidget(self.vbox.Widget())
 
 		AddSubWindow(glb.mainwindow.mdi_area, self, "Context-Sensitive Call Graph")
 
+	def DisplayFound(self, ids):
+		if not len(ids):
+			return False
+		parent = QModelIndex()
+		for dbid in ids:
+			found = False
+			n = self.model.rowCount(parent)
+			for row in xrange(n):
+				child = self.model.index(row, 0, parent)
+				if child.internalPointer().dbid == dbid:
+					found = True
+					self.view.setCurrentIndex(child)
+					parent = child
+					break
+			if not found:
+				break
+		return found
+
+	def Find(self, value, direction, pattern, context):
+		self.view.setFocus()
+		self.find_bar.Busy()
+		self.model.Find(value, direction, pattern, context, self.FindDone)
+
+	def FindDone(self, ids):
+		found = True
+		if not self.DisplayFound(ids):
+			found = False
+		self.find_bar.Idle()
+		if not found:
+			self.find_bar.NotFound()
+
 # Action Definition
 
 def CreateAction(label, tip, callback, parent=None, shortcut=None):
@@ -470,11 +763,22 @@ class MainWindow(QMainWindow):
 		file_menu = menu.addMenu("&File")
 		file_menu.addAction(CreateExitAction(glb.app, self))
 
+		edit_menu = menu.addMenu("&Edit")
+		edit_menu.addAction(CreateAction("&Find...", "Find items", self.Find, self, QKeySequence.Find))
+
 		reports_menu = menu.addMenu("&Reports")
 		reports_menu.addAction(CreateAction("Context-Sensitive Call &Graph", "Create a new window containing a context-sensitive call graph", self.NewCallGraph, self))
 
 		self.window_menu = WindowMenu(self.mdi_area, menu)
 
+	def Find(self):
+		win = self.mdi_area.activeSubWindow()
+		if win:
+			try:
+				win.find_bar.Activate()
+			except:
+				pass
+
 	def NewCallGraph(self):
 		CallGraphWindow(self.glb, self)
 
-- 
GitLab


From 82f68e2898e634b8b0efc7ddd57e037ef75ea114 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Mon, 1 Oct 2018 09:28:49 +0300
Subject: [PATCH 0140/1890] perf scripts python: exported-sql-viewer.py: Add
 ability to shrink / enlarge font

Shrinking the font allows more information to display.

Committer testing:

Works, tested with the convenient Control+Shift+'+' and Control+'-' as
well with the more cumbersome top menu "Edit" + "Enlarge/Shrink font"
options.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/20181001062853.28285-16-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../scripts/python/exported-sql-viewer.py     | 24 +++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py
index 0386a600ffc73..310ba71475830 100755
--- a/tools/perf/scripts/python/exported-sql-viewer.py
+++ b/tools/perf/scripts/python/exported-sql-viewer.py
@@ -706,6 +706,20 @@ class WindowMenu():
 	def setActiveSubWindow(self, nr):
 		self.mdi_area.setActiveSubWindow(self.mdi_area.subWindowList()[nr - 1])
 
+# Font resize
+
+def ResizeFont(widget, diff):
+	font = widget.font()
+	sz = font.pointSize()
+	font.setPointSize(sz + diff)
+	widget.setFont(font)
+
+def ShrinkFont(widget):
+	ResizeFont(widget, -1)
+
+def EnlargeFont(widget):
+	ResizeFont(widget, 1)
+
 # Unique name for sub-windows
 
 def NumberedWindowName(name, nr):
@@ -765,6 +779,8 @@ class MainWindow(QMainWindow):
 
 		edit_menu = menu.addMenu("&Edit")
 		edit_menu.addAction(CreateAction("&Find...", "Find items", self.Find, self, QKeySequence.Find))
+		edit_menu.addAction(CreateAction("&Shrink Font", "Make text smaller", self.ShrinkFont, self, [QKeySequence("Ctrl+-")]))
+		edit_menu.addAction(CreateAction("&Enlarge Font", "Make text bigger", self.EnlargeFont, self, [QKeySequence("Ctrl++")]))
 
 		reports_menu = menu.addMenu("&Reports")
 		reports_menu.addAction(CreateAction("Context-Sensitive Call &Graph", "Create a new window containing a context-sensitive call graph", self.NewCallGraph, self))
@@ -779,6 +795,14 @@ class MainWindow(QMainWindow):
 			except:
 				pass
 
+	def ShrinkFont(self):
+		win = self.mdi_area.activeSubWindow()
+		ShrinkFont(win.view)
+
+	def EnlargeFont(self):
+		win = self.mdi_area.activeSubWindow()
+		EnlargeFont(win.view)
+
 	def NewCallGraph(self):
 		CallGraphWindow(self.glb, self)
 
-- 
GitLab


From 8392b74b575c38fa5d50d1fe07fa9a4bcea93862 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Mon, 1 Oct 2018 09:28:50 +0300
Subject: [PATCH 0141/1890] perf scripts python: exported-sql-viewer.py: Add
 ability to display all the database tables

Displaying all the database tables can help make the database easier to
understand.

Committer testing:

Opened all the tables, even the sqlite master table, which I selected
everything and used control+C, lets see if it works...

  CREATE VIEW threads_view AS SELECT id,machine_id,(SELECT host_or_guest FROM machines_view WHERE id = machine_id) AS host_or_guest,process_id,pid,tid FROM threads

Humm, nope, just one of the cells got copied, even with everything selected :-)

Anyway, works as advertised, useful for perusing the data.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/20181001062853.28285-17-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../scripts/python/exported-sql-viewer.py     | 694 ++++++++++++++++++
 1 file changed, 694 insertions(+)

diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py
index 310ba71475830..ef822d8501093 100755
--- a/tools/perf/scripts/python/exported-sql-viewer.py
+++ b/tools/perf/scripts/python/exported-sql-viewer.py
@@ -50,10 +50,15 @@ import sys
 import weakref
 import threading
 import string
+import cPickle
+import re
+import os
 from PySide.QtCore import *
 from PySide.QtGui import *
 from PySide.QtSql import *
 from decimal import *
+from ctypes import *
+from multiprocessing import Process, Array, Value, Event
 
 # Data formatting helpers
 
@@ -146,6 +151,68 @@ class TreeModel(QAbstractItemModel):
 	def DisplayData(self, item, index):
 		return item.getData(index.column())
 
+	def FetchIfNeeded(self, row):
+		if row > self.last_row_read:
+			self.last_row_read = row
+			if row + 10 >= self.root.child_count:
+				self.fetcher.Fetch(glb_chunk_sz)
+
+	def columnAlignment(self, column):
+		return Qt.AlignLeft
+
+	def columnFont(self, column):
+		return None
+
+	def data(self, index, role):
+		if role == Qt.TextAlignmentRole:
+			return self.columnAlignment(index.column())
+		if role == Qt.FontRole:
+			return self.columnFont(index.column())
+		if role != Qt.DisplayRole:
+			return None
+		item = index.internalPointer()
+		return self.DisplayData(item, index)
+
+# Table data model
+
+class TableModel(QAbstractTableModel):
+
+	def __init__(self, parent=None):
+		super(TableModel, self).__init__(parent)
+		self.child_count = 0
+		self.child_items = []
+		self.last_row_read = 0
+
+	def Item(self, parent):
+		if parent.isValid():
+			return parent.internalPointer()
+		else:
+			return self
+
+	def rowCount(self, parent):
+		return self.child_count
+
+	def headerData(self, section, orientation, role):
+		if role == Qt.TextAlignmentRole:
+			return self.columnAlignment(section)
+		if role != Qt.DisplayRole:
+			return None
+		if orientation != Qt.Horizontal:
+			return None
+		return self.columnHeader(section)
+
+	def index(self, row, column, parent):
+		return self.createIndex(row, column, self.child_items[row])
+
+	def DisplayData(self, item, index):
+		return item.getData(index.column())
+
+	def FetchIfNeeded(self, row):
+		if row > self.last_row_read:
+			self.last_row_read = row
+			if row + 10 >= self.child_count:
+				self.fetcher.Fetch(glb_chunk_sz)
+
 	def columnAlignment(self, column):
 		return Qt.AlignLeft
 
@@ -620,6 +687,601 @@ class CallGraphWindow(QMdiSubWindow):
 		if not found:
 			self.find_bar.NotFound()
 
+# Child data item  finder
+
+class ChildDataItemFinder():
+
+	def __init__(self, root):
+		self.root = root
+		self.value, self.direction, self.pattern, self.last_value, self.last_pattern = (None,) * 5
+		self.rows = []
+		self.pos = 0
+
+	def FindSelect(self):
+		self.rows = []
+		if self.pattern:
+			pattern = re.compile(self.value)
+			for child in self.root.child_items:
+				for column_data in child.data:
+					if re.search(pattern, str(column_data)) is not None:
+						self.rows.append(child.row)
+						break
+		else:
+			for child in self.root.child_items:
+				for column_data in child.data:
+					if self.value in str(column_data):
+						self.rows.append(child.row)
+						break
+
+	def FindValue(self):
+		self.pos = 0
+		if self.last_value != self.value or self.pattern != self.last_pattern:
+			self.FindSelect()
+		if not len(self.rows):
+			return -1
+		return self.rows[self.pos]
+
+	def FindThread(self):
+		if self.direction == 0 or self.value != self.last_value or self.pattern != self.last_pattern:
+			row = self.FindValue()
+		elif len(self.rows):
+			if self.direction > 0:
+				self.pos += 1
+				if self.pos >= len(self.rows):
+					self.pos = 0
+			else:
+				self.pos -= 1
+				if self.pos < 0:
+					self.pos = len(self.rows) - 1
+			row = self.rows[self.pos]
+		else:
+			row = -1
+		return (True, row)
+
+	def Find(self, value, direction, pattern, context, callback):
+		self.value, self.direction, self.pattern, self.last_value, self.last_pattern = (value, direction,pattern, self.value, self.pattern)
+		# Use a thread so the UI is not blocked
+		thread = Thread(self.FindThread)
+		thread.done.connect(lambda row, t=thread, c=callback: self.FindDone(t, c, row), Qt.QueuedConnection)
+		thread.start()
+
+	def FindDone(self, thread, callback, row):
+		callback(row)
+
+# Number of database records to fetch in one go
+
+glb_chunk_sz = 10000
+
+# size of pickled integer big enough for record size
+
+glb_nsz = 8
+
+# Background process for SQL data fetcher
+
+class SQLFetcherProcess():
+
+	def __init__(self, dbref, sql, buffer, head, tail, fetch_count, fetching_done, process_target, wait_event, fetched_event, prep):
+		# Need a unique connection name
+		conn_name = "SQLFetcher" + str(os.getpid())
+		self.db, dbname = dbref.Open(conn_name)
+		self.sql = sql
+		self.buffer = buffer
+		self.head = head
+		self.tail = tail
+		self.fetch_count = fetch_count
+		self.fetching_done = fetching_done
+		self.process_target = process_target
+		self.wait_event = wait_event
+		self.fetched_event = fetched_event
+		self.prep = prep
+		self.query = QSqlQuery(self.db)
+		self.query_limit = 0 if "$$last_id$$" in sql else 2
+		self.last_id = -1
+		self.fetched = 0
+		self.more = True
+		self.local_head = self.head.value
+		self.local_tail = self.tail.value
+
+	def Select(self):
+		if self.query_limit:
+			if self.query_limit == 1:
+				return
+			self.query_limit -= 1
+		stmt = self.sql.replace("$$last_id$$", str(self.last_id))
+		QueryExec(self.query, stmt)
+
+	def Next(self):
+		if not self.query.next():
+			self.Select()
+			if not self.query.next():
+				return None
+		self.last_id = self.query.value(0)
+		return self.prep(self.query)
+
+	def WaitForTarget(self):
+		while True:
+			self.wait_event.clear()
+			target = self.process_target.value
+			if target > self.fetched or target < 0:
+				break
+			self.wait_event.wait()
+		return target
+
+	def HasSpace(self, sz):
+		if self.local_tail <= self.local_head:
+			space = len(self.buffer) - self.local_head
+			if space > sz:
+				return True
+			if space >= glb_nsz:
+				# Use 0 (or space < glb_nsz) to mean there is no more at the top of the buffer
+				nd = cPickle.dumps(0, cPickle.HIGHEST_PROTOCOL)
+				self.buffer[self.local_head : self.local_head + len(nd)] = nd
+			self.local_head = 0
+		if self.local_tail - self.local_head > sz:
+			return True
+		return False
+
+	def WaitForSpace(self, sz):
+		if self.HasSpace(sz):
+			return
+		while True:
+			self.wait_event.clear()
+			self.local_tail = self.tail.value
+			if self.HasSpace(sz):
+				return
+			self.wait_event.wait()
+
+	def AddToBuffer(self, obj):
+		d = cPickle.dumps(obj, cPickle.HIGHEST_PROTOCOL)
+		n = len(d)
+		nd = cPickle.dumps(n, cPickle.HIGHEST_PROTOCOL)
+		sz = n + glb_nsz
+		self.WaitForSpace(sz)
+		pos = self.local_head
+		self.buffer[pos : pos + len(nd)] = nd
+		self.buffer[pos + glb_nsz : pos + sz] = d
+		self.local_head += sz
+
+	def FetchBatch(self, batch_size):
+		fetched = 0
+		while batch_size > fetched:
+			obj = self.Next()
+			if obj is None:
+				self.more = False
+				break
+			self.AddToBuffer(obj)
+			fetched += 1
+		if fetched:
+			self.fetched += fetched
+			with self.fetch_count.get_lock():
+				self.fetch_count.value += fetched
+			self.head.value = self.local_head
+			self.fetched_event.set()
+
+	def Run(self):
+		while self.more:
+			target = self.WaitForTarget()
+			if target < 0:
+				break
+			batch_size = min(glb_chunk_sz, target - self.fetched)
+			self.FetchBatch(batch_size)
+		self.fetching_done.value = True
+		self.fetched_event.set()
+
+def SQLFetcherFn(*x):
+	process = SQLFetcherProcess(*x)
+	process.Run()
+
+# SQL data fetcher
+
+class SQLFetcher(QObject):
+
+	done = Signal(object)
+
+	def __init__(self, glb, sql, prep, process_data, parent=None):
+		super(SQLFetcher, self).__init__(parent)
+		self.process_data = process_data
+		self.more = True
+		self.target = 0
+		self.last_target = 0
+		self.fetched = 0
+		self.buffer_size = 16 * 1024 * 1024
+		self.buffer = Array(c_char, self.buffer_size, lock=False)
+		self.head = Value(c_longlong)
+		self.tail = Value(c_longlong)
+		self.local_tail = 0
+		self.fetch_count = Value(c_longlong)
+		self.fetching_done = Value(c_bool)
+		self.last_count = 0
+		self.process_target = Value(c_longlong)
+		self.wait_event = Event()
+		self.fetched_event = Event()
+		glb.AddInstanceToShutdownOnExit(self)
+		self.process = Process(target=SQLFetcherFn, args=(glb.dbref, sql, self.buffer, self.head, self.tail, self.fetch_count, self.fetching_done, self.process_target, self.wait_event, self.fetched_event, prep))
+		self.process.start()
+		self.thread = Thread(self.Thread)
+		self.thread.done.connect(self.ProcessData, Qt.QueuedConnection)
+		self.thread.start()
+
+	def Shutdown(self):
+		# Tell the thread and process to exit
+		self.process_target.value = -1
+		self.wait_event.set()
+		self.more = False
+		self.fetching_done.value = True
+		self.fetched_event.set()
+
+	def Thread(self):
+		if not self.more:
+			return True, 0
+		while True:
+			self.fetched_event.clear()
+			fetch_count = self.fetch_count.value
+			if fetch_count != self.last_count:
+				break
+			if self.fetching_done.value:
+				self.more = False
+				return True, 0
+			self.fetched_event.wait()
+		count = fetch_count - self.last_count
+		self.last_count = fetch_count
+		self.fetched += count
+		return False, count
+
+	def Fetch(self, nr):
+		if not self.more:
+			# -1 inidcates there are no more
+			return -1
+		result = self.fetched
+		extra = result + nr - self.target
+		if extra > 0:
+			self.target += extra
+			# process_target < 0 indicates shutting down
+			if self.process_target.value >= 0:
+				self.process_target.value = self.target
+			self.wait_event.set()
+		return result
+
+	def RemoveFromBuffer(self):
+		pos = self.local_tail
+		if len(self.buffer) - pos < glb_nsz:
+			pos = 0
+		n = cPickle.loads(self.buffer[pos : pos + glb_nsz])
+		if n == 0:
+			pos = 0
+			n = cPickle.loads(self.buffer[0 : glb_nsz])
+		pos += glb_nsz
+		obj = cPickle.loads(self.buffer[pos : pos + n])
+		self.local_tail = pos + n
+		return obj
+
+	def ProcessData(self, count):
+		for i in xrange(count):
+			obj = self.RemoveFromBuffer()
+			self.process_data(obj)
+		self.tail.value = self.local_tail
+		self.wait_event.set()
+		self.done.emit(count)
+
+# Fetch more records bar
+
+class FetchMoreRecordsBar():
+
+	def __init__(self, model, parent):
+		self.model = model
+
+		self.label = QLabel("Number of records (x " + "{:,}".format(glb_chunk_sz) + ") to fetch:")
+		self.label.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed)
+
+		self.fetch_count = QSpinBox()
+		self.fetch_count.setRange(1, 1000000)
+		self.fetch_count.setValue(10)
+		self.fetch_count.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed)
+
+		self.fetch = QPushButton("Go!")
+		self.fetch.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed)
+		self.fetch.released.connect(self.FetchMoreRecords)
+
+		self.progress = QProgressBar()
+		self.progress.setRange(0, 100)
+		self.progress.hide()
+
+		self.done_label = QLabel("All records fetched")
+		self.done_label.hide()
+
+		self.spacer = QLabel("")
+
+		self.close_button = QToolButton()
+		self.close_button.setIcon(parent.style().standardIcon(QStyle.SP_DockWidgetCloseButton))
+		self.close_button.released.connect(self.Deactivate)
+
+		self.hbox = QHBoxLayout()
+		self.hbox.setContentsMargins(0, 0, 0, 0)
+
+		self.hbox.addWidget(self.label)
+		self.hbox.addWidget(self.fetch_count)
+		self.hbox.addWidget(self.fetch)
+		self.hbox.addWidget(self.spacer)
+		self.hbox.addWidget(self.progress)
+		self.hbox.addWidget(self.done_label)
+		self.hbox.addWidget(self.close_button)
+
+		self.bar = QWidget()
+		self.bar.setLayout(self.hbox);
+		self.bar.show()
+
+		self.in_progress = False
+		self.model.progress.connect(self.Progress)
+
+		self.done = False
+
+		if not model.HasMoreRecords():
+			self.Done()
+
+	def Widget(self):
+		return self.bar
+
+	def Activate(self):
+		self.bar.show()
+		self.fetch.setFocus()
+
+	def Deactivate(self):
+		self.bar.hide()
+
+	def Enable(self, enable):
+		self.fetch.setEnabled(enable)
+		self.fetch_count.setEnabled(enable)
+
+	def Busy(self):
+		self.Enable(False)
+		self.fetch.hide()
+		self.spacer.hide()
+		self.progress.show()
+
+	def Idle(self):
+		self.in_progress = False
+		self.Enable(True)
+		self.progress.hide()
+		self.fetch.show()
+		self.spacer.show()
+
+	def Target(self):
+		return self.fetch_count.value() * glb_chunk_sz
+
+	def Done(self):
+		self.done = True
+		self.Idle()
+		self.label.hide()
+		self.fetch_count.hide()
+		self.fetch.hide()
+		self.spacer.hide()
+		self.done_label.show()
+
+	def Progress(self, count):
+		if self.in_progress:
+			if count:
+				percent = ((count - self.start) * 100) / self.Target()
+				if percent >= 100:
+					self.Idle()
+				else:
+					self.progress.setValue(percent)
+		if not count:
+			# Count value of zero means no more records
+			self.Done()
+
+	def FetchMoreRecords(self):
+		if self.done:
+			return
+		self.progress.setValue(0)
+		self.Busy()
+		self.in_progress = True
+		self.start = self.model.FetchMoreRecords(self.Target())
+
+# SQL data preparation
+
+def SQLTableDataPrep(query, count):
+	data = []
+	for i in xrange(count):
+		data.append(query.value(i))
+	return data
+
+# SQL table data model item
+
+class SQLTableItem():
+
+	def __init__(self, row, data):
+		self.row = row
+		self.data = data
+
+	def getData(self, column):
+		return self.data[column]
+
+# SQL table data model
+
+class SQLTableModel(TableModel):
+
+	progress = Signal(object)
+
+	def __init__(self, glb, sql, column_count, parent=None):
+		super(SQLTableModel, self).__init__(parent)
+		self.glb = glb
+		self.more = True
+		self.populated = 0
+		self.fetcher = SQLFetcher(glb, sql, lambda x, y=column_count: SQLTableDataPrep(x, y), self.AddSample)
+		self.fetcher.done.connect(self.Update)
+		self.fetcher.Fetch(glb_chunk_sz)
+
+	def DisplayData(self, item, index):
+		self.FetchIfNeeded(item.row)
+		return item.getData(index.column())
+
+	def AddSample(self, data):
+		child = SQLTableItem(self.populated, data)
+		self.child_items.append(child)
+		self.populated += 1
+
+	def Update(self, fetched):
+		if not fetched:
+			self.more = False
+			self.progress.emit(0)
+		child_count = self.child_count
+		count = self.populated - child_count
+		if count > 0:
+			parent = QModelIndex()
+			self.beginInsertRows(parent, child_count, child_count + count - 1)
+			self.insertRows(child_count, count, parent)
+			self.child_count += count
+			self.endInsertRows()
+			self.progress.emit(self.child_count)
+
+	def FetchMoreRecords(self, count):
+		current = self.child_count
+		if self.more:
+			self.fetcher.Fetch(count)
+		else:
+			self.progress.emit(0)
+		return current
+
+	def HasMoreRecords(self):
+		return self.more
+
+# SQL automatic table data model
+
+class SQLAutoTableModel(SQLTableModel):
+
+	def __init__(self, glb, table_name, parent=None):
+		sql = "SELECT * FROM " + table_name + " WHERE id > $$last_id$$ ORDER BY id LIMIT " + str(glb_chunk_sz)
+		if table_name == "comm_threads_view":
+			# For now, comm_threads_view has no id column
+			sql = "SELECT * FROM " + table_name + " WHERE comm_id > $$last_id$$ ORDER BY comm_id LIMIT " + str(glb_chunk_sz)
+		self.column_headers = []
+		query = QSqlQuery(glb.db)
+		if glb.dbref.is_sqlite3:
+			QueryExec(query, "PRAGMA table_info(" + table_name + ")")
+			while query.next():
+				self.column_headers.append(query.value(1))
+			if table_name == "sqlite_master":
+				sql = "SELECT * FROM " + table_name
+		else:
+			if table_name[:19] == "information_schema.":
+				sql = "SELECT * FROM " + table_name
+				select_table_name = table_name[19:]
+				schema = "information_schema"
+			else:
+				select_table_name = table_name
+				schema = "public"
+			QueryExec(query, "SELECT column_name FROM information_schema.columns WHERE table_schema = '" + schema + "' and table_name = '" + select_table_name + "'")
+			while query.next():
+				self.column_headers.append(query.value(0))
+		super(SQLAutoTableModel, self).__init__(glb, sql, len(self.column_headers), parent)
+
+	def columnCount(self, parent=None):
+		return len(self.column_headers)
+
+	def columnHeader(self, column):
+		return self.column_headers[column]
+
+# Base class for custom ResizeColumnsToContents
+
+class ResizeColumnsToContentsBase(QObject):
+
+	def __init__(self, parent=None):
+		super(ResizeColumnsToContentsBase, self).__init__(parent)
+
+	def ResizeColumnToContents(self, column, n):
+		# Using the view's resizeColumnToContents() here is extrememly slow
+		# so implement a crude alternative
+		font = self.view.font()
+		metrics = QFontMetrics(font)
+		max = 0
+		for row in xrange(n):
+			val = self.data_model.child_items[row].data[column]
+			len = metrics.width(str(val) + "MM")
+			max = len if len > max else max
+		val = self.data_model.columnHeader(column)
+		len = metrics.width(str(val) + "MM")
+		max = len if len > max else max
+		self.view.setColumnWidth(column, max)
+
+	def ResizeColumnsToContents(self):
+		n = min(self.data_model.child_count, 100)
+		if n < 1:
+			# No data yet, so connect a signal to notify when there is
+			self.data_model.rowsInserted.connect(self.UpdateColumnWidths)
+			return
+		columns = self.data_model.columnCount()
+		for i in xrange(columns):
+			self.ResizeColumnToContents(i, n)
+
+	def UpdateColumnWidths(self, *x):
+		# This only needs to be done once, so disconnect the signal now
+		self.data_model.rowsInserted.disconnect(self.UpdateColumnWidths)
+		self.ResizeColumnsToContents()
+
+# Table window
+
+class TableWindow(QMdiSubWindow, ResizeColumnsToContentsBase):
+
+	def __init__(self, glb, table_name, parent=None):
+		super(TableWindow, self).__init__(parent)
+
+		self.data_model = LookupCreateModel(table_name + " Table", lambda: SQLAutoTableModel(glb, table_name))
+
+		self.model = QSortFilterProxyModel()
+		self.model.setSourceModel(self.data_model)
+
+		self.view = QTableView()
+		self.view.setModel(self.model)
+		self.view.setEditTriggers(QAbstractItemView.NoEditTriggers)
+		self.view.verticalHeader().setVisible(False)
+		self.view.sortByColumn(-1, Qt.AscendingOrder)
+		self.view.setSortingEnabled(True)
+
+		self.ResizeColumnsToContents()
+
+		self.find_bar = FindBar(self, self, True)
+
+		self.finder = ChildDataItemFinder(self.data_model)
+
+		self.fetch_bar = FetchMoreRecordsBar(self.data_model, self)
+
+		self.vbox = VBox(self.view, self.find_bar.Widget(), self.fetch_bar.Widget())
+
+		self.setWidget(self.vbox.Widget())
+
+		AddSubWindow(glb.mainwindow.mdi_area, self, table_name + " Table")
+
+	def Find(self, value, direction, pattern, context):
+		self.view.setFocus()
+		self.find_bar.Busy()
+		self.finder.Find(value, direction, pattern, context, self.FindDone)
+
+	def FindDone(self, row):
+		self.find_bar.Idle()
+		if row >= 0:
+			self.view.setCurrentIndex(self.model.index(row, 0, QModelIndex()))
+		else:
+			self.find_bar.NotFound()
+
+# Table list
+
+def GetTableList(glb):
+	tables = []
+	query = QSqlQuery(glb.db)
+	if glb.dbref.is_sqlite3:
+		QueryExec(query, "SELECT name FROM sqlite_master WHERE type IN ( 'table' , 'view' ) ORDER BY name")
+	else:
+		QueryExec(query, "SELECT table_name FROM information_schema.tables WHERE table_schema = 'public' AND table_type IN ( 'BASE TABLE' , 'VIEW' ) ORDER BY table_name")
+	while query.next():
+		tables.append(query.value(0))
+	if glb.dbref.is_sqlite3:
+		tables.append("sqlite_master")
+	else:
+		tables.append("information_schema.tables")
+		tables.append("information_schema.views")
+		tables.append("information_schema.columns")
+	return tables
+
 # Action Definition
 
 def CreateAction(label, tip, callback, parent=None, shortcut=None):
@@ -779,12 +1441,15 @@ class MainWindow(QMainWindow):
 
 		edit_menu = menu.addMenu("&Edit")
 		edit_menu.addAction(CreateAction("&Find...", "Find items", self.Find, self, QKeySequence.Find))
+		edit_menu.addAction(CreateAction("Fetch &more records...", "Fetch more records", self.FetchMoreRecords, self, [QKeySequence(Qt.Key_F8)]))
 		edit_menu.addAction(CreateAction("&Shrink Font", "Make text smaller", self.ShrinkFont, self, [QKeySequence("Ctrl+-")]))
 		edit_menu.addAction(CreateAction("&Enlarge Font", "Make text bigger", self.EnlargeFont, self, [QKeySequence("Ctrl++")]))
 
 		reports_menu = menu.addMenu("&Reports")
 		reports_menu.addAction(CreateAction("Context-Sensitive Call &Graph", "Create a new window containing a context-sensitive call graph", self.NewCallGraph, self))
 
+		self.TableMenu(GetTableList(glb), menu)
+
 		self.window_menu = WindowMenu(self.mdi_area, menu)
 
 	def Find(self):
@@ -795,6 +1460,14 @@ class MainWindow(QMainWindow):
 			except:
 				pass
 
+	def FetchMoreRecords(self):
+		win = self.mdi_area.activeSubWindow()
+		if win:
+			try:
+				win.fetch_bar.Activate()
+			except:
+				pass
+
 	def ShrinkFont(self):
 		win = self.mdi_area.activeSubWindow()
 		ShrinkFont(win.view)
@@ -803,9 +1476,17 @@ class MainWindow(QMainWindow):
 		win = self.mdi_area.activeSubWindow()
 		EnlargeFont(win.view)
 
+	def TableMenu(self, tables, menu):
+		table_menu = menu.addMenu("&Tables")
+		for table in tables:
+			table_menu.addAction(CreateAction(table, "Create a new window containing a table view", lambda t=table: self.NewTableView(t), self))
+
 	def NewCallGraph(self):
 		CallGraphWindow(self.glb, self)
 
+	def NewTableView(self, table_name):
+		TableWindow(self.glb, table_name, self)
+
 # Global data
 
 class Glb():
@@ -816,6 +1497,18 @@ class Glb():
 		self.dbname = dbname
 		self.app = None
 		self.mainwindow = None
+		self.instances_to_shutdown_on_exit = weakref.WeakSet()
+
+	def AddInstanceToShutdownOnExit(self, instance):
+		self.instances_to_shutdown_on_exit.add(instance)
+
+	# Shutdown any background processes or threads
+	def ShutdownInstances(self):
+		for x in self.instances_to_shutdown_on_exit:
+			try:
+				x.Shutdown()
+			except:
+				pass
 
 # Database reference
 
@@ -880,6 +1573,7 @@ def Main():
 	glb.mainwindow = mainwindow
 	mainwindow.show()
 	err = app.exec_()
+	glb.ShutdownInstances()
 	db.close()
 	sys.exit(err)
 
-- 
GitLab


From 76099f98aea4606f7c96b8d2366b46840529d08f Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Tue, 23 Oct 2018 10:59:49 +0300
Subject: [PATCH 0142/1890] perf scripts python: exported-sql-viewer.py: Add
 All branches report

Add a report to display branches in a similar fashion to perf script. The
main purpose of this report is to display disassembly, however, presently,
the only supported disassembler is Intel XED, and additionally the object
code must be present in perf build ID cache.

To use Intel XED, libxed.so must be present. To build and install
libxed.so:
	git clone https://github.com/intelxed/mbuild.git mbuild
	git clone https://github.com/intelxed/xed
	cd xed
	./mfile.py --share
	sudo ./mfile.py --prefix=/usr/local install
	sudo ldconfig

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/20181023075949.18920-1-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../scripts/python/exported-sql-viewer.py     | 547 ++++++++++++++++++
 1 file changed, 547 insertions(+)

diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py
index ef822d8501093..24cb0bd56afa5 100755
--- a/tools/perf/scripts/python/exported-sql-viewer.py
+++ b/tools/perf/scripts/python/exported-sql-viewer.py
@@ -46,6 +46,48 @@
 #	'Branch Count' is the total number of branches for that function and all
 #       functions that it calls
 
+# There is also a "All branches" report, which displays branches and
+# possibly disassembly.  However, presently, the only supported disassembler is
+# Intel XED, and additionally the object code must be present in perf build ID
+# cache. To use Intel XED, libxed.so must be present. To build and install
+# libxed.so:
+#            git clone https://github.com/intelxed/mbuild.git mbuild
+#            git clone https://github.com/intelxed/xed
+#            cd xed
+#            ./mfile.py --share
+#            sudo ./mfile.py --prefix=/usr/local install
+#            sudo ldconfig
+#
+# Example report:
+#
+# Time           CPU  Command  PID    TID    Branch Type            In Tx  Branch
+# 8107675239590  2    ls       22011  22011  return from interrupt  No     ffffffff86a00a67 native_irq_return_iret ([kernel]) -> 7fab593ea260 _start (ld-2.19.so)
+#                                                                              7fab593ea260 48 89 e7                                        mov %rsp, %rdi
+# 8107675239899  2    ls       22011  22011  hardware interrupt     No         7fab593ea260 _start (ld-2.19.so) -> ffffffff86a012e0 page_fault ([kernel])
+# 8107675241900  2    ls       22011  22011  return from interrupt  No     ffffffff86a00a67 native_irq_return_iret ([kernel]) -> 7fab593ea260 _start (ld-2.19.so)
+#                                                                              7fab593ea260 48 89 e7                                        mov %rsp, %rdi
+#                                                                              7fab593ea263 e8 c8 06 00 00                                  callq  0x7fab593ea930
+# 8107675241900  2    ls       22011  22011  call                   No         7fab593ea263 _start+0x3 (ld-2.19.so) -> 7fab593ea930 _dl_start (ld-2.19.so)
+#                                                                              7fab593ea930 55                                              pushq  %rbp
+#                                                                              7fab593ea931 48 89 e5                                        mov %rsp, %rbp
+#                                                                              7fab593ea934 41 57                                           pushq  %r15
+#                                                                              7fab593ea936 41 56                                           pushq  %r14
+#                                                                              7fab593ea938 41 55                                           pushq  %r13
+#                                                                              7fab593ea93a 41 54                                           pushq  %r12
+#                                                                              7fab593ea93c 53                                              pushq  %rbx
+#                                                                              7fab593ea93d 48 89 fb                                        mov %rdi, %rbx
+#                                                                              7fab593ea940 48 83 ec 68                                     sub $0x68, %rsp
+#                                                                              7fab593ea944 0f 31                                           rdtsc
+#                                                                              7fab593ea946 48 c1 e2 20                                     shl $0x20, %rdx
+#                                                                              7fab593ea94a 89 c0                                           mov %eax, %eax
+#                                                                              7fab593ea94c 48 09 c2                                        or %rax, %rdx
+#                                                                              7fab593ea94f 48 8b 05 1a 15 22 00                            movq  0x22151a(%rip), %rax
+# 8107675242232  2    ls       22011  22011  hardware interrupt     No         7fab593ea94f _dl_start+0x1f (ld-2.19.so) -> ffffffff86a012e0 page_fault ([kernel])
+# 8107675242900  2    ls       22011  22011  return from interrupt  No     ffffffff86a00a67 native_irq_return_iret ([kernel]) -> 7fab593ea94f _dl_start+0x1f (ld-2.19.so)
+#                                                                              7fab593ea94f 48 8b 05 1a 15 22 00                            movq  0x22151a(%rip), %rax
+#                                                                              7fab593ea956 48 89 15 3b 13 22 00                            movq  %rdx, 0x22133b(%rip)
+# 8107675243232  2    ls       22011  22011  hardware interrupt     No         7fab593ea956 _dl_start+0x26 (ld-2.19.so) -> ffffffff86a012e0 page_fault ([kernel])
+
 import sys
 import weakref
 import threading
@@ -62,6 +104,16 @@ from multiprocessing import Process, Array, Value, Event
 
 # Data formatting helpers
 
+def tohex(ip):
+	if ip < 0:
+		ip += 1 << 64
+	return "%x" % ip
+
+def offstr(offset):
+	if offset:
+		return "+0x%x" % offset
+	return ""
+
 def dsoname(name):
 	if name == "[kernel.kallsyms]":
 		return "[kernel]"
@@ -1077,6 +1129,351 @@ class FetchMoreRecordsBar():
 		self.in_progress = True
 		self.start = self.model.FetchMoreRecords(self.Target())
 
+# Brance data model level two item
+
+class BranchLevelTwoItem():
+
+	def __init__(self, row, text, parent_item):
+		self.row = row
+		self.parent_item = parent_item
+		self.data = [""] * 8
+		self.data[7] = text
+		self.level = 2
+
+	def getParentItem(self):
+		return self.parent_item
+
+	def getRow(self):
+		return self.row
+
+	def childCount(self):
+		return 0
+
+	def hasChildren(self):
+		return False
+
+	def getData(self, column):
+		return self.data[column]
+
+# Brance data model level one item
+
+class BranchLevelOneItem():
+
+	def __init__(self, glb, row, data, parent_item):
+		self.glb = glb
+		self.row = row
+		self.parent_item = parent_item
+		self.child_count = 0
+		self.child_items = []
+		self.data = data[1:]
+		self.dbid = data[0]
+		self.level = 1
+		self.query_done = False
+
+	def getChildItem(self, row):
+		return self.child_items[row]
+
+	def getParentItem(self):
+		return self.parent_item
+
+	def getRow(self):
+		return self.row
+
+	def Select(self):
+		self.query_done = True
+
+		if not self.glb.have_disassembler:
+			return
+
+		query = QSqlQuery(self.glb.db)
+
+		QueryExec(query, "SELECT cpu, to_dso_id, to_symbol_id, to_sym_offset, short_name, long_name, build_id, sym_start, to_ip"
+				  " FROM samples"
+				  " INNER JOIN dsos ON samples.to_dso_id = dsos.id"
+				  " INNER JOIN symbols ON samples.to_symbol_id = symbols.id"
+				  " WHERE samples.id = " + str(self.dbid))
+		if not query.next():
+			return
+		cpu = query.value(0)
+		dso = query.value(1)
+		sym = query.value(2)
+		if dso == 0 or sym == 0:
+			return
+		off = query.value(3)
+		short_name = query.value(4)
+		long_name = query.value(5)
+		build_id = query.value(6)
+		sym_start = query.value(7)
+		ip = query.value(8)
+
+		QueryExec(query, "SELECT samples.dso_id, symbol_id, sym_offset, sym_start"
+				  " FROM samples"
+				  " INNER JOIN symbols ON samples.symbol_id = symbols.id"
+				  " WHERE samples.id > " + str(self.dbid) + " AND cpu = " + str(cpu) +
+				  " ORDER BY samples.id"
+				  " LIMIT 1")
+		if not query.next():
+			return
+		if query.value(0) != dso:
+			# Cannot disassemble from one dso to another
+			return
+		bsym = query.value(1)
+		boff = query.value(2)
+		bsym_start = query.value(3)
+		if bsym == 0:
+			return
+		tot = bsym_start + boff + 1 - sym_start - off
+		if tot <= 0 or tot > 16384:
+			return
+
+		inst = self.glb.disassembler.Instruction()
+		f = self.glb.FileFromNamesAndBuildId(short_name, long_name, build_id)
+		if not f:
+			return
+		mode = 0 if Is64Bit(f) else 1
+		self.glb.disassembler.SetMode(inst, mode)
+
+		buf_sz = tot + 16
+		buf = create_string_buffer(tot + 16)
+		f.seek(sym_start + off)
+		buf.value = f.read(buf_sz)
+		buf_ptr = addressof(buf)
+		i = 0
+		while tot > 0:
+			cnt, text = self.glb.disassembler.DisassembleOne(inst, buf_ptr, buf_sz, ip)
+			if cnt:
+				byte_str = tohex(ip).rjust(16)
+				for k in xrange(cnt):
+					byte_str += " %02x" % ord(buf[i])
+					i += 1
+				while k < 15:
+					byte_str += "   "
+					k += 1
+				self.child_items.append(BranchLevelTwoItem(0, byte_str + " " + text, self))
+				self.child_count += 1
+			else:
+				return
+			buf_ptr += cnt
+			tot -= cnt
+			buf_sz -= cnt
+			ip += cnt
+
+	def childCount(self):
+		if not self.query_done:
+			self.Select()
+			if not self.child_count:
+				return -1
+		return self.child_count
+
+	def hasChildren(self):
+		if not self.query_done:
+			return True
+		return self.child_count > 0
+
+	def getData(self, column):
+		return self.data[column]
+
+# Brance data model root item
+
+class BranchRootItem():
+
+	def __init__(self):
+		self.child_count = 0
+		self.child_items = []
+		self.level = 0
+
+	def getChildItem(self, row):
+		return self.child_items[row]
+
+	def getParentItem(self):
+		return None
+
+	def getRow(self):
+		return 0
+
+	def childCount(self):
+		return self.child_count
+
+	def hasChildren(self):
+		return self.child_count > 0
+
+	def getData(self, column):
+		return ""
+
+# Branch data preparation
+
+def BranchDataPrep(query):
+	data = []
+	for i in xrange(0, 8):
+		data.append(query.value(i))
+	data.append(tohex(query.value(8)).rjust(16) + " " + query.value(9) + offstr(query.value(10)) +
+			" (" + dsoname(query.value(11)) + ")" + " -> " +
+			tohex(query.value(12)) + " " + query.value(13) + offstr(query.value(14)) +
+			" (" + dsoname(query.value(15)) + ")")
+	return data
+
+# Branch data model
+
+class BranchModel(TreeModel):
+
+	progress = Signal(object)
+
+	def __init__(self, glb, event_id, where_clause, parent=None):
+		super(BranchModel, self).__init__(BranchRootItem(), parent)
+		self.glb = glb
+		self.event_id = event_id
+		self.more = True
+		self.populated = 0
+		sql = ("SELECT samples.id, time, cpu, comm, pid, tid, branch_types.name,"
+			" CASE WHEN in_tx = '0' THEN 'No' ELSE 'Yes' END,"
+			" ip, symbols.name, sym_offset, dsos.short_name,"
+			" to_ip, to_symbols.name, to_sym_offset, to_dsos.short_name"
+			" FROM samples"
+			" INNER JOIN comms ON comm_id = comms.id"
+			" INNER JOIN threads ON thread_id = threads.id"
+			" INNER JOIN branch_types ON branch_type = branch_types.id"
+			" INNER JOIN symbols ON symbol_id = symbols.id"
+			" INNER JOIN symbols to_symbols ON to_symbol_id = to_symbols.id"
+			" INNER JOIN dsos ON samples.dso_id = dsos.id"
+			" INNER JOIN dsos AS to_dsos ON samples.to_dso_id = to_dsos.id"
+			" WHERE samples.id > $$last_id$$" + where_clause +
+			" AND evsel_id = " + str(self.event_id) +
+			" ORDER BY samples.id"
+			" LIMIT " + str(glb_chunk_sz))
+		self.fetcher = SQLFetcher(glb, sql, BranchDataPrep, self.AddSample)
+		self.fetcher.done.connect(self.Update)
+		self.fetcher.Fetch(glb_chunk_sz)
+
+	def columnCount(self, parent=None):
+		return 8
+
+	def columnHeader(self, column):
+		return ("Time", "CPU", "Command", "PID", "TID", "Branch Type", "In Tx", "Branch")[column]
+
+	def columnFont(self, column):
+		if column != 7:
+			return None
+		return QFont("Monospace")
+
+	def DisplayData(self, item, index):
+		if item.level == 1:
+			self.FetchIfNeeded(item.row)
+		return item.getData(index.column())
+
+	def AddSample(self, data):
+		child = BranchLevelOneItem(self.glb, self.populated, data, self.root)
+		self.root.child_items.append(child)
+		self.populated += 1
+
+	def Update(self, fetched):
+		if not fetched:
+			self.more = False
+			self.progress.emit(0)
+		child_count = self.root.child_count
+		count = self.populated - child_count
+		if count > 0:
+			parent = QModelIndex()
+			self.beginInsertRows(parent, child_count, child_count + count - 1)
+			self.insertRows(child_count, count, parent)
+			self.root.child_count += count
+			self.endInsertRows()
+			self.progress.emit(self.root.child_count)
+
+	def FetchMoreRecords(self, count):
+		current = self.root.child_count
+		if self.more:
+			self.fetcher.Fetch(count)
+		else:
+			self.progress.emit(0)
+		return current
+
+	def HasMoreRecords(self):
+		return self.more
+
+# Branch window
+
+class BranchWindow(QMdiSubWindow):
+
+	def __init__(self, glb, event_id, name, where_clause, parent=None):
+		super(BranchWindow, self).__init__(parent)
+
+		model_name = "Branch Events " + str(event_id)
+		if len(where_clause):
+			model_name = where_clause + " " + model_name
+
+		self.model = LookupCreateModel(model_name, lambda: BranchModel(glb, event_id, where_clause))
+
+		self.view = QTreeView()
+		self.view.setUniformRowHeights(True)
+		self.view.setModel(self.model)
+
+		self.ResizeColumnsToContents()
+
+		self.find_bar = FindBar(self, self, True)
+
+		self.finder = ChildDataItemFinder(self.model.root)
+
+		self.fetch_bar = FetchMoreRecordsBar(self.model, self)
+
+		self.vbox = VBox(self.view, self.find_bar.Widget(), self.fetch_bar.Widget())
+
+		self.setWidget(self.vbox.Widget())
+
+		AddSubWindow(glb.mainwindow.mdi_area, self, name + " Branch Events")
+
+	def ResizeColumnToContents(self, column, n):
+		# Using the view's resizeColumnToContents() here is extrememly slow
+		# so implement a crude alternative
+		mm = "MM" if column else "MMMM"
+		font = self.view.font()
+		metrics = QFontMetrics(font)
+		max = 0
+		for row in xrange(n):
+			val = self.model.root.child_items[row].data[column]
+			len = metrics.width(str(val) + mm)
+			max = len if len > max else max
+		val = self.model.columnHeader(column)
+		len = metrics.width(str(val) + mm)
+		max = len if len > max else max
+		self.view.setColumnWidth(column, max)
+
+	def ResizeColumnsToContents(self):
+		n = min(self.model.root.child_count, 100)
+		if n < 1:
+			# No data yet, so connect a signal to notify when there is
+			self.model.rowsInserted.connect(self.UpdateColumnWidths)
+			return
+		columns = self.model.columnCount()
+		for i in xrange(columns):
+			self.ResizeColumnToContents(i, n)
+
+	def UpdateColumnWidths(self, *x):
+		# This only needs to be done once, so disconnect the signal now
+		self.model.rowsInserted.disconnect(self.UpdateColumnWidths)
+		self.ResizeColumnsToContents()
+
+	def Find(self, value, direction, pattern, context):
+		self.view.setFocus()
+		self.find_bar.Busy()
+		self.finder.Find(value, direction, pattern, context, self.FindDone)
+
+	def FindDone(self, row):
+		self.find_bar.Idle()
+		if row >= 0:
+			self.view.setCurrentIndex(self.model.index(row, 0, QModelIndex()))
+		else:
+			self.find_bar.NotFound()
+
+# Event list
+
+def GetEventList(db):
+	events = []
+	query = QSqlQuery(db)
+	QueryExec(query, "SELECT name FROM selected_events WHERE id > 0 ORDER BY id")
+	while query.next():
+		events.append(query.value(0))
+	return events
+
 # SQL data preparation
 
 def SQLTableDataPrep(query, count):
@@ -1448,6 +1845,8 @@ class MainWindow(QMainWindow):
 		reports_menu = menu.addMenu("&Reports")
 		reports_menu.addAction(CreateAction("Context-Sensitive Call &Graph", "Create a new window containing a context-sensitive call graph", self.NewCallGraph, self))
 
+		self.EventMenu(GetEventList(glb.db), reports_menu)
+
 		self.TableMenu(GetTableList(glb), menu)
 
 		self.window_menu = WindowMenu(self.mdi_area, menu)
@@ -1476,6 +1875,20 @@ class MainWindow(QMainWindow):
 		win = self.mdi_area.activeSubWindow()
 		EnlargeFont(win.view)
 
+	def EventMenu(self, events, reports_menu):
+		branches_events = 0
+		for event in events:
+			event = event.split(":")[0]
+			if event == "branches":
+				branches_events += 1
+		dbid = 0
+		for event in events:
+			dbid += 1
+			event = event.split(":")[0]
+			if event == "branches":
+				label = "All branches" if branches_events == 1 else "All branches " + "(id=" + dbid + ")"
+				reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda x=dbid: self.NewBranchView(x), self))
+
 	def TableMenu(self, tables, menu):
 		table_menu = menu.addMenu("&Tables")
 		for table in tables:
@@ -1484,9 +1897,112 @@ class MainWindow(QMainWindow):
 	def NewCallGraph(self):
 		CallGraphWindow(self.glb, self)
 
+	def NewBranchView(self, event_id):
+		BranchWindow(self.glb, event_id, "", "", self)
+
 	def NewTableView(self, table_name):
 		TableWindow(self.glb, table_name, self)
 
+# XED Disassembler
+
+class xed_state_t(Structure):
+
+	_fields_ = [
+		("mode", c_int),
+		("width", c_int)
+	]
+
+class XEDInstruction():
+
+	def __init__(self, libxed):
+		# Current xed_decoded_inst_t structure is 192 bytes. Use 512 to allow for future expansion
+		xedd_t = c_byte * 512
+		self.xedd = xedd_t()
+		self.xedp = addressof(self.xedd)
+		libxed.xed_decoded_inst_zero(self.xedp)
+		self.state = xed_state_t()
+		self.statep = addressof(self.state)
+		# Buffer for disassembled instruction text
+		self.buffer = create_string_buffer(256)
+		self.bufferp = addressof(self.buffer)
+
+class LibXED():
+
+	def __init__(self):
+		self.libxed = CDLL("libxed.so")
+
+		self.xed_tables_init = self.libxed.xed_tables_init
+		self.xed_tables_init.restype = None
+		self.xed_tables_init.argtypes = []
+
+		self.xed_decoded_inst_zero = self.libxed.xed_decoded_inst_zero
+		self.xed_decoded_inst_zero.restype = None
+		self.xed_decoded_inst_zero.argtypes = [ c_void_p ]
+
+		self.xed_operand_values_set_mode = self.libxed.xed_operand_values_set_mode
+		self.xed_operand_values_set_mode.restype = None
+		self.xed_operand_values_set_mode.argtypes = [ c_void_p, c_void_p ]
+
+		self.xed_decoded_inst_zero_keep_mode = self.libxed.xed_decoded_inst_zero_keep_mode
+		self.xed_decoded_inst_zero_keep_mode.restype = None
+		self.xed_decoded_inst_zero_keep_mode.argtypes = [ c_void_p ]
+
+		self.xed_decode = self.libxed.xed_decode
+		self.xed_decode.restype = c_int
+		self.xed_decode.argtypes = [ c_void_p, c_void_p, c_uint ]
+
+		self.xed_format_context = self.libxed.xed_format_context
+		self.xed_format_context.restype = c_uint
+		self.xed_format_context.argtypes = [ c_int, c_void_p, c_void_p, c_int, c_ulonglong, c_void_p, c_void_p ]
+
+		self.xed_tables_init()
+
+	def Instruction(self):
+		return XEDInstruction(self)
+
+	def SetMode(self, inst, mode):
+		if mode:
+			inst.state.mode = 4 # 32-bit
+			inst.state.width = 4 # 4 bytes
+		else:
+			inst.state.mode = 1 # 64-bit
+			inst.state.width = 8 # 8 bytes
+		self.xed_operand_values_set_mode(inst.xedp, inst.statep)
+
+	def DisassembleOne(self, inst, bytes_ptr, bytes_cnt, ip):
+		self.xed_decoded_inst_zero_keep_mode(inst.xedp)
+		err = self.xed_decode(inst.xedp, bytes_ptr, bytes_cnt)
+		if err:
+			return 0, ""
+		# Use AT&T mode (2), alternative is Intel (3)
+		ok = self.xed_format_context(2, inst.xedp, inst.bufferp, sizeof(inst.buffer), ip, 0, 0)
+		if not ok:
+			return 0, ""
+		# Return instruction length and the disassembled instruction text
+		# For now, assume the length is in byte 166
+		return inst.xedd[166], inst.buffer.value
+
+def TryOpen(file_name):
+	try:
+		return open(file_name, "rb")
+	except:
+		return None
+
+def Is64Bit(f):
+	result = sizeof(c_void_p)
+	# ELF support only
+	pos = f.tell()
+	f.seek(0)
+	header = f.read(7)
+	f.seek(pos)
+	magic = header[0:4]
+	eclass = ord(header[4])
+	encoding = ord(header[5])
+	version = ord(header[6])
+	if magic == chr(127) + "ELF" and eclass > 0 and eclass < 3 and encoding > 0 and encoding < 3 and version == 1:
+		result = True if eclass == 2 else False
+	return result
+
 # Global data
 
 class Glb():
@@ -1495,9 +2011,40 @@ class Glb():
 		self.dbref = dbref
 		self.db = db
 		self.dbname = dbname
+		self.home_dir = os.path.expanduser("~")
+		self.buildid_dir = os.getenv("PERF_BUILDID_DIR")
+		if self.buildid_dir:
+			self.buildid_dir += "/.build-id/"
+		else:
+			self.buildid_dir = self.home_dir + "/.debug/.build-id/"
 		self.app = None
 		self.mainwindow = None
 		self.instances_to_shutdown_on_exit = weakref.WeakSet()
+		try:
+			self.disassembler = LibXED()
+			self.have_disassembler = True
+		except:
+			self.have_disassembler = False
+
+	def FileFromBuildId(self, build_id):
+		file_name = self.buildid_dir + build_id[0:2] + "/" + build_id[2:] + "/elf"
+		return TryOpen(file_name)
+
+	def FileFromNamesAndBuildId(self, short_name, long_name, build_id):
+		# Assume current machine i.e. no support for virtualization
+		if short_name[0:7] == "[kernel" and os.path.basename(long_name) == "kcore":
+			file_name = os.getenv("PERF_KCORE")
+			f = TryOpen(file_name) if file_name else None
+			if f:
+				return f
+			# For now, no special handling if long_name is /proc/kcore
+			f = TryOpen(long_name)
+			if f:
+				return f
+		f = self.FileFromBuildId(build_id)
+		if f:
+			return f
+		return None
 
 	def AddInstanceToShutdownOnExit(self, instance):
 		self.instances_to_shutdown_on_exit.add(instance)
-- 
GitLab


From 1fcb748d187d0c7732a75a509e924ead6d070e04 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 24 Oct 2018 00:36:12 +0100
Subject: [PATCH 0143/1890] amd-gpu: Don't undefine READ and WRITE

Remove the undefinition of READ and WRITE because these constants may be
used elsewhere in subsequently included header files, thus breaking them.

These constants don't actually appear to be used in the driver, so the
undefinition seems pointless.

Fixes: 4562236b3bc0 ("drm/amd/dc: Add dc display driver (v2)")
Signed-off-by: David Howells <dhowells@redhat.com>
---
 drivers/gpu/drm/amd/display/dc/os_types.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/os_types.h b/drivers/gpu/drm/amd/display/dc/os_types.h
index a407892905af2..c0d9f332baedc 100644
--- a/drivers/gpu/drm/amd/display/dc/os_types.h
+++ b/drivers/gpu/drm/amd/display/dc/os_types.h
@@ -40,8 +40,6 @@
 #define LITTLEENDIAN_CPU
 #endif
 
-#undef READ
-#undef WRITE
 #undef FRAME_SIZE
 
 #define dm_output_to_console(fmt, ...) DRM_DEBUG_KMS(fmt, ##__VA_ARGS__)
-- 
GitLab


From 00e23707442a75b404392cef1405ab4fd498de6b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 22 Oct 2018 13:07:28 +0100
Subject: [PATCH 0144/1890] iov_iter: Use accessor function

Use accessor functions to access an iterator's type and direction.  This
allows for the possibility of using some other method of determining the
type of iterator than if-chains with bitwise-AND conditions.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 block/bio.c           |  2 +-
 fs/block_dev.c        |  2 +-
 fs/ceph/file.c        |  2 +-
 fs/cifs/file.c        |  4 ++--
 fs/cifs/misc.c        |  2 +-
 fs/cifs/smbdirect.c   | 17 +++++++++----
 fs/direct-io.c        |  2 +-
 fs/fuse/file.c        |  2 +-
 fs/iomap.c            |  2 +-
 include/linux/uio.h   | 48 +++++++++++++++++++++++++------------
 lib/iov_iter.c        | 56 +++++++++++++++++++++----------------------
 mm/filemap.c          |  2 +-
 net/9p/trans_virtio.c |  2 +-
 net/tls/tls_sw.c      |  4 ++--
 14 files changed, 87 insertions(+), 60 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index 0093bed81c0e8..c55f36bbe12ac 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1255,7 +1255,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
 	/*
 	 * success
 	 */
-	if (((iter->type & WRITE) && (!map_data || !map_data->null_mapped)) ||
+	if ((iov_iter_rw(iter) == WRITE && (!map_data || !map_data->null_mapped)) ||
 	    (map_data && map_data->from_user)) {
 		ret = bio_copy_from_iter(bio, iter);
 		if (ret)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 38b8ce05cbc7e..a80b4f0ee7c4f 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -349,7 +349,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 
 	dio->size = 0;
 	dio->multi_bio = false;
-	dio->should_dirty = is_read && (iter->type == ITER_IOVEC);
+	dio->should_dirty = is_read && iter_is_iovec(iter);
 
 	blk_start_plug(&plug);
 	for (;;) {
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 92ab204336829..524ecc95b04de 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -658,7 +658,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
 	if (ret < 0)
 		return ret;
 
-	if (unlikely(to->type & ITER_PIPE)) {
+	if (unlikely(iov_iter_is_pipe(to))) {
 		size_t page_off;
 		ret = iov_iter_get_pages_alloc(to, &pages, len,
 					       &page_off);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 8d41ca7bfcf1f..dcdbcb6f09f82 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2990,7 +2990,7 @@ cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
 		size_t copy = min_t(size_t, remaining, PAGE_SIZE);
 		size_t written;
 
-		if (unlikely(iter->type & ITER_PIPE)) {
+		if (unlikely(iov_iter_is_pipe(iter))) {
 			void *addr = kmap_atomic(page);
 
 			written = copy_to_iter(addr, copy, iter);
@@ -3302,7 +3302,7 @@ ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
 	if (!is_sync_kiocb(iocb))
 		ctx->iocb = iocb;
 
-	if (to->type == ITER_IOVEC)
+	if (iter_is_iovec(to))
 		ctx->should_dirty = true;
 
 	rc = setup_aio_ctx_iter(ctx, to, READ);
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 6926685e513cf..7b5b960a04b8c 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -786,7 +786,7 @@ setup_aio_ctx_iter(struct cifs_aio_ctx *ctx, struct iov_iter *iter, int rw)
 	struct page **pages = NULL;
 	struct bio_vec *bv = NULL;
 
-	if (iter->type & ITER_KVEC) {
+	if (iov_iter_is_kvec(iter)) {
 		memcpy(&ctx->iter, iter, sizeof(struct iov_iter));
 		ctx->len = count;
 		iov_iter_advance(iter, count);
diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c
index 5fdb9a509a97f..5b05bf2552682 100644
--- a/fs/cifs/smbdirect.c
+++ b/fs/cifs/smbdirect.c
@@ -2054,14 +2054,22 @@ int smbd_recv(struct smbd_connection *info, struct msghdr *msg)
 
 	info->smbd_recv_pending++;
 
-	switch (msg->msg_iter.type) {
-	case READ | ITER_KVEC:
+	if (iov_iter_rw(&msg->msg_iter) == WRITE) {
+		/* It's a bug in upper layer to get there */
+		cifs_dbg(VFS, "CIFS: invalid msg iter dir %u\n",
+			 iov_iter_rw(&msg->msg_iter));
+		rc = -EINVAL;
+		goto out;
+	}
+
+	switch (iov_iter_type(&msg->msg_iter)) {
+	case ITER_KVEC:
 		buf = msg->msg_iter.kvec->iov_base;
 		to_read = msg->msg_iter.kvec->iov_len;
 		rc = smbd_recv_buf(info, buf, to_read);
 		break;
 
-	case READ | ITER_BVEC:
+	case ITER_BVEC:
 		page = msg->msg_iter.bvec->bv_page;
 		page_offset = msg->msg_iter.bvec->bv_offset;
 		to_read = msg->msg_iter.bvec->bv_len;
@@ -2071,10 +2079,11 @@ int smbd_recv(struct smbd_connection *info, struct msghdr *msg)
 	default:
 		/* It's a bug in upper layer to get there */
 		cifs_dbg(VFS, "CIFS: invalid msg type %d\n",
-			msg->msg_iter.type);
+			 iov_iter_type(&msg->msg_iter));
 		rc = -EINVAL;
 	}
 
+out:
 	info->smbd_recv_pending--;
 	wake_up(&info->wait_smbd_recv_pending);
 
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 093fb54cd3163..722d17c88edb9 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1313,7 +1313,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
 	spin_lock_init(&dio->bio_lock);
 	dio->refcount = 1;
 
-	dio->should_dirty = (iter->type == ITER_IOVEC);
+	dio->should_dirty = iter_is_iovec(iter) && iov_iter_rw(iter) == READ;
 	sdio.iter = iter;
 	sdio.final_block_in_request = end >> blkbits;
 
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 32d0b883e74f3..c9ccd45156dc9 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1271,7 +1271,7 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
 	ssize_t ret = 0;
 
 	/* Special case for kernel I/O: can copy directly into the buffer */
-	if (ii->type & ITER_KVEC) {
+	if (iov_iter_is_kvec(ii)) {
 		unsigned long user_addr = fuse_get_user_addr(ii);
 		size_t frag_size = fuse_get_frag_size(ii, *nbytesp);
 
diff --git a/fs/iomap.c b/fs/iomap.c
index ec15cf2ec696d..2c53400aa802e 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -1795,7 +1795,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 		if (pos >= dio->i_size)
 			goto out_free_dio;
 
-		if (iter->type == ITER_IOVEC)
+		if (iter_is_iovec(iter) && iov_iter_rw(iter) == READ)
 			dio->flags |= IOMAP_DIO_DIRTY;
 	} else {
 		flags |= IOMAP_WRITE;
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 422b1c01ee0de..fcabc959c794d 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -21,7 +21,7 @@ struct kvec {
 	size_t iov_len;
 };
 
-enum {
+enum iter_type {
 	ITER_IOVEC = 0,
 	ITER_KVEC = 2,
 	ITER_BVEC = 4,
@@ -47,6 +47,36 @@ struct iov_iter {
 	};
 };
 
+static inline enum iter_type iov_iter_type(const struct iov_iter *i)
+{
+	return i->type & ~(READ | WRITE);
+}
+
+static inline bool iter_is_iovec(const struct iov_iter *i)
+{
+	return iov_iter_type(i) == ITER_IOVEC;
+}
+
+static inline bool iov_iter_is_kvec(const struct iov_iter *i)
+{
+	return iov_iter_type(i) == ITER_KVEC;
+}
+
+static inline bool iov_iter_is_bvec(const struct iov_iter *i)
+{
+	return iov_iter_type(i) == ITER_BVEC;
+}
+
+static inline bool iov_iter_is_pipe(const struct iov_iter *i)
+{
+	return iov_iter_type(i) == ITER_PIPE;
+}
+
+static inline unsigned char iov_iter_rw(const struct iov_iter *i)
+{
+	return i->type & (READ | WRITE);
+}
+
 /*
  * Total number of bytes covered by an iovec.
  *
@@ -74,7 +104,8 @@ static inline struct iovec iov_iter_iovec(const struct iov_iter *iter)
 }
 
 #define iov_for_each(iov, iter, start)				\
-	if (!((start).type & (ITER_BVEC | ITER_PIPE)))		\
+	if (iov_iter_type(start) == ITER_IOVEC ||		\
+	    iov_iter_type(start) == ITER_KVEC)			\
 	for (iter = (start);					\
 	     (iter).count &&					\
 	     ((iov = iov_iter_iovec(&(iter))), 1);		\
@@ -202,19 +233,6 @@ static inline size_t iov_iter_count(const struct iov_iter *i)
 	return i->count;
 }
 
-static inline bool iter_is_iovec(const struct iov_iter *i)
-{
-	return !(i->type & (ITER_BVEC | ITER_KVEC | ITER_PIPE));
-}
-
-/*
- * Get one of READ or WRITE out of iter->type without any other flags OR'd in
- * with it.
- *
- * The ?: is just for type safety.
- */
-#define iov_iter_rw(i) ((0 ? (struct iov_iter *)0 : (i))->type & (READ | WRITE))
-
 /*
  * Cap the iov_iter by given limit; note that the second argument is
  * *not* the new size - it's upper limit for such.  Passing it a value
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 8be175df30753..42d39116a5564 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -558,7 +558,7 @@ static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
 size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
 {
 	const char *from = addr;
-	if (unlikely(i->type & ITER_PIPE))
+	if (unlikely(iov_iter_is_pipe(i)))
 		return copy_pipe_to_iter(addr, bytes, i);
 	if (iter_is_iovec(i))
 		might_fault();
@@ -658,7 +658,7 @@ size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
 	const char *from = addr;
 	unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
 
-	if (unlikely(i->type & ITER_PIPE))
+	if (unlikely(iov_iter_is_pipe(i)))
 		return copy_pipe_to_iter_mcsafe(addr, bytes, i);
 	if (iter_is_iovec(i))
 		might_fault();
@@ -692,7 +692,7 @@ EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe);
 size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
 {
 	char *to = addr;
-	if (unlikely(i->type & ITER_PIPE)) {
+	if (unlikely(iov_iter_is_pipe(i))) {
 		WARN_ON(1);
 		return 0;
 	}
@@ -712,7 +712,7 @@ EXPORT_SYMBOL(_copy_from_iter);
 bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
 {
 	char *to = addr;
-	if (unlikely(i->type & ITER_PIPE)) {
+	if (unlikely(iov_iter_is_pipe(i))) {
 		WARN_ON(1);
 		return false;
 	}
@@ -739,7 +739,7 @@ EXPORT_SYMBOL(_copy_from_iter_full);
 size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
 {
 	char *to = addr;
-	if (unlikely(i->type & ITER_PIPE)) {
+	if (unlikely(iov_iter_is_pipe(i))) {
 		WARN_ON(1);
 		return 0;
 	}
@@ -773,7 +773,7 @@ EXPORT_SYMBOL(_copy_from_iter_nocache);
 size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
 {
 	char *to = addr;
-	if (unlikely(i->type & ITER_PIPE)) {
+	if (unlikely(iov_iter_is_pipe(i))) {
 		WARN_ON(1);
 		return 0;
 	}
@@ -794,7 +794,7 @@ EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache);
 bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
 {
 	char *to = addr;
-	if (unlikely(i->type & ITER_PIPE)) {
+	if (unlikely(iov_iter_is_pipe(i))) {
 		WARN_ON(1);
 		return false;
 	}
@@ -836,7 +836,7 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
 		size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
 		kunmap_atomic(kaddr);
 		return wanted;
-	} else if (likely(!(i->type & ITER_PIPE)))
+	} else if (likely(!iov_iter_is_pipe(i)))
 		return copy_page_to_iter_iovec(page, offset, bytes, i);
 	else
 		return copy_page_to_iter_pipe(page, offset, bytes, i);
@@ -848,7 +848,7 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
 {
 	if (unlikely(!page_copy_sane(page, offset, bytes)))
 		return 0;
-	if (unlikely(i->type & ITER_PIPE)) {
+	if (unlikely(iov_iter_is_pipe(i))) {
 		WARN_ON(1);
 		return 0;
 	}
@@ -888,7 +888,7 @@ static size_t pipe_zero(size_t bytes, struct iov_iter *i)
 
 size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
 {
-	if (unlikely(i->type & ITER_PIPE))
+	if (unlikely(iov_iter_is_pipe(i)))
 		return pipe_zero(bytes, i);
 	iterate_and_advance(i, bytes, v,
 		clear_user(v.iov_base, v.iov_len),
@@ -908,7 +908,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
 		kunmap_atomic(kaddr);
 		return 0;
 	}
-	if (unlikely(i->type & ITER_PIPE)) {
+	if (unlikely(iov_iter_is_pipe(i))) {
 		kunmap_atomic(kaddr);
 		WARN_ON(1);
 		return 0;
@@ -972,7 +972,7 @@ static void pipe_advance(struct iov_iter *i, size_t size)
 
 void iov_iter_advance(struct iov_iter *i, size_t size)
 {
-	if (unlikely(i->type & ITER_PIPE)) {
+	if (unlikely(iov_iter_is_pipe(i))) {
 		pipe_advance(i, size);
 		return;
 	}
@@ -987,7 +987,7 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll)
 	if (WARN_ON(unroll > MAX_RW_COUNT))
 		return;
 	i->count += unroll;
-	if (unlikely(i->type & ITER_PIPE)) {
+	if (unlikely(iov_iter_is_pipe(i))) {
 		struct pipe_inode_info *pipe = i->pipe;
 		int idx = i->idx;
 		size_t off = i->iov_offset;
@@ -1016,7 +1016,7 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll)
 		return;
 	}
 	unroll -= i->iov_offset;
-	if (i->type & ITER_BVEC) {
+	if (iov_iter_is_bvec(i)) {
 		const struct bio_vec *bvec = i->bvec;
 		while (1) {
 			size_t n = (--bvec)->bv_len;
@@ -1049,11 +1049,11 @@ EXPORT_SYMBOL(iov_iter_revert);
  */
 size_t iov_iter_single_seg_count(const struct iov_iter *i)
 {
-	if (unlikely(i->type & ITER_PIPE))
+	if (unlikely(iov_iter_is_pipe(i)))
 		return i->count;	// it is a silly place, anyway
 	if (i->nr_segs == 1)
 		return i->count;
-	else if (i->type & ITER_BVEC)
+	else if (iov_iter_is_bvec(i))
 		return min(i->count, i->bvec->bv_len - i->iov_offset);
 	else
 		return min(i->count, i->iov->iov_len - i->iov_offset);
@@ -1106,7 +1106,7 @@ unsigned long iov_iter_alignment(const struct iov_iter *i)
 	unsigned long res = 0;
 	size_t size = i->count;
 
-	if (unlikely(i->type & ITER_PIPE)) {
+	if (unlikely(iov_iter_is_pipe(i))) {
 		if (size && i->iov_offset && allocated(&i->pipe->bufs[i->idx]))
 			return size | i->iov_offset;
 		return size;
@@ -1125,7 +1125,7 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
 	unsigned long res = 0;
 	size_t size = i->count;
 
-	if (unlikely(i->type & ITER_PIPE)) {
+	if (unlikely(iov_iter_is_pipe(i))) {
 		WARN_ON(1);
 		return ~0U;
 	}
@@ -1193,7 +1193,7 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
 	if (maxsize > i->count)
 		maxsize = i->count;
 
-	if (unlikely(i->type & ITER_PIPE))
+	if (unlikely(iov_iter_is_pipe(i)))
 		return pipe_get_pages(i, pages, maxsize, maxpages, start);
 	iterate_all_kinds(i, maxsize, v, ({
 		unsigned long addr = (unsigned long)v.iov_base;
@@ -1205,7 +1205,7 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
 			len = maxpages * PAGE_SIZE;
 		addr &= ~(PAGE_SIZE - 1);
 		n = DIV_ROUND_UP(len, PAGE_SIZE);
-		res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages);
+		res = get_user_pages_fast(addr, n, iov_iter_rw(i) != WRITE, pages);
 		if (unlikely(res < 0))
 			return res;
 		return (res == n ? len : res * PAGE_SIZE) - *start;
@@ -1270,7 +1270,7 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
 	if (maxsize > i->count)
 		maxsize = i->count;
 
-	if (unlikely(i->type & ITER_PIPE))
+	if (unlikely(iov_iter_is_pipe(i)))
 		return pipe_get_pages_alloc(i, pages, maxsize, start);
 	iterate_all_kinds(i, maxsize, v, ({
 		unsigned long addr = (unsigned long)v.iov_base;
@@ -1283,7 +1283,7 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
 		p = get_pages_array(n);
 		if (!p)
 			return -ENOMEM;
-		res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, p);
+		res = get_user_pages_fast(addr, n, iov_iter_rw(i) != WRITE, p);
 		if (unlikely(res < 0)) {
 			kvfree(p);
 			return res;
@@ -1313,7 +1313,7 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
 	__wsum sum, next;
 	size_t off = 0;
 	sum = *csum;
-	if (unlikely(i->type & ITER_PIPE)) {
+	if (unlikely(iov_iter_is_pipe(i))) {
 		WARN_ON(1);
 		return 0;
 	}
@@ -1355,7 +1355,7 @@ bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum,
 	__wsum sum, next;
 	size_t off = 0;
 	sum = *csum;
-	if (unlikely(i->type & ITER_PIPE)) {
+	if (unlikely(iov_iter_is_pipe(i))) {
 		WARN_ON(1);
 		return false;
 	}
@@ -1400,7 +1400,7 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum,
 	__wsum sum, next;
 	size_t off = 0;
 	sum = *csum;
-	if (unlikely(i->type & ITER_PIPE)) {
+	if (unlikely(iov_iter_is_pipe(i))) {
 		WARN_ON(1);	/* for now */
 		return 0;
 	}
@@ -1443,7 +1443,7 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages)
 	if (!size)
 		return 0;
 
-	if (unlikely(i->type & ITER_PIPE)) {
+	if (unlikely(iov_iter_is_pipe(i))) {
 		struct pipe_inode_info *pipe = i->pipe;
 		size_t off;
 		int idx;
@@ -1481,11 +1481,11 @@ EXPORT_SYMBOL(iov_iter_npages);
 const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
 {
 	*new = *old;
-	if (unlikely(new->type & ITER_PIPE)) {
+	if (unlikely(iov_iter_is_pipe(new))) {
 		WARN_ON(1);
 		return NULL;
 	}
-	if (new->type & ITER_BVEC)
+	if (iov_iter_is_bvec(new))
 		return new->bvec = kmemdup(new->bvec,
 				    new->nr_segs * sizeof(struct bio_vec),
 				    flags);
diff --git a/mm/filemap.c b/mm/filemap.c
index 52517f28e6f4a..bdeee0168ea75 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2122,7 +2122,7 @@ static ssize_t generic_file_buffered_read(struct kiocb *iocb,
 					!mapping->a_ops->is_partially_uptodate)
 				goto page_not_up_to_date;
 			/* pipes can't handle partially uptodate pages */
-			if (unlikely(iter->type & ITER_PIPE))
+			if (unlikely(iov_iter_is_pipe(iter)))
 				goto page_not_up_to_date;
 			if (!trylock_page(page))
 				goto page_not_up_to_date;
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 7728b0acde09a..4d7d2070e9c85 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -322,7 +322,7 @@ static int p9_get_mapped_pages(struct virtio_chan *chan,
 	if (!iov_iter_count(data))
 		return 0;
 
-	if (!(data->type & ITER_KVEC)) {
+	if (iov_iter_is_kvec(data)) {
 		int n;
 		/*
 		 * We allow only p9_max_pages pinned. We wait for the
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index a525fc4c2a4b9..ad64b9c8b6006 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -799,7 +799,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 	struct crypto_tfm *tfm = crypto_aead_tfm(ctx->aead_send);
 	bool async_capable = tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC;
 	unsigned char record_type = TLS_RECORD_TYPE_DATA;
-	bool is_kvec = msg->msg_iter.type & ITER_KVEC;
+	bool is_kvec = iov_iter_is_kvec(&msg->msg_iter);
 	bool eor = !(msg->msg_flags & MSG_MORE);
 	size_t try_to_copy, copied = 0;
 	struct sk_msg *msg_pl, *msg_en;
@@ -1457,7 +1457,7 @@ int tls_sw_recvmsg(struct sock *sk,
 	bool cmsg = false;
 	int target, err = 0;
 	long timeo;
-	bool is_kvec = msg->msg_iter.type & ITER_KVEC;
+	bool is_kvec = iov_iter_is_kvec(&msg->msg_iter);
 	int num_async = 0;
 
 	flags |= nonblock;
-- 
GitLab


From aa563d7bca6e882ec2bdae24603c8f016401a144 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 20 Oct 2018 00:57:56 +0100
Subject: [PATCH 0145/1890] iov_iter: Separate type from direction and use
 accessor functions

In the iov_iter struct, separate the iterator type from the iterator
direction and use accessor functions to access them in most places.

Convert a bunch of places to use switch-statements to access them rather
then chains of bitwise-AND statements.  This makes it easier to add further
iterator types.  Also, this can be more efficient as to implement a switch
of small contiguous integers, the compiler can use ~50% fewer compare
instructions than it has to use bitwise-and instructions.

Further, cease passing the iterator type into the iterator setup function.
The iterator function can set that itself.  Only the direction is required.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 drivers/block/drbd/drbd_main.c           |  2 +-
 drivers/block/drbd/drbd_receiver.c       |  2 +-
 drivers/block/loop.c                     |  9 ++++----
 drivers/block/nbd.c                      | 12 +++++-----
 drivers/fsi/fsi-sbefifo.c                |  4 ++--
 drivers/isdn/mISDN/l1oip_core.c          |  3 +--
 drivers/misc/vmw_vmci/vmci_queue_pair.c  |  6 ++---
 drivers/nvme/target/io-cmd-file.c        |  2 +-
 drivers/target/iscsi/iscsi_target_util.c |  6 ++---
 drivers/target/target_core_file.c        |  6 ++---
 drivers/usb/usbip/usbip_common.c         |  2 +-
 drivers/xen/pvcalls-back.c               |  8 +++----
 fs/9p/vfs_addr.c                         |  4 ++--
 fs/9p/vfs_dir.c                          |  2 +-
 fs/9p/xattr.c                            |  4 ++--
 fs/afs/rxrpc.c                           | 15 ++++++-------
 fs/ceph/file.c                           |  5 ++---
 fs/cifs/connect.c                        |  4 ++--
 fs/cifs/misc.c                           |  2 +-
 fs/cifs/smb2ops.c                        |  4 ++--
 fs/cifs/transport.c                      |  8 +++----
 fs/dlm/lowcomms.c                        |  2 +-
 fs/nfsd/vfs.c                            |  4 ++--
 fs/ocfs2/cluster/tcp.c                   |  2 +-
 fs/orangefs/inode.c                      |  2 +-
 fs/splice.c                              |  7 +++---
 include/linux/uio.h                      | 10 ++++-----
 lib/iov_iter.c                           | 28 +++++++++++++-----------
 mm/page_io.c                             |  2 +-
 net/9p/client.c                          |  2 +-
 net/bluetooth/6lowpan.c                  |  2 +-
 net/bluetooth/a2mp.c                     |  2 +-
 net/bluetooth/smp.c                      |  2 +-
 net/ceph/messenger.c                     |  6 ++---
 net/netfilter/ipvs/ip_vs_sync.c          |  2 +-
 net/smc/smc_clc.c                        |  4 ++--
 net/socket.c                             |  6 ++---
 net/sunrpc/svcsock.c                     |  2 +-
 net/tipc/topsrv.c                        |  2 +-
 net/tls/tls_device.c                     |  4 ++--
 40 files changed, 96 insertions(+), 105 deletions(-)

diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index ef8212a4b73ef..ded9735d44af2 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1856,7 +1856,7 @@ int drbd_send(struct drbd_connection *connection, struct socket *sock,
 
 	/* THINK  if (signal_pending) return ... ? */
 
-	iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iov, 1, size);
+	iov_iter_kvec(&msg.msg_iter, WRITE, &iov, 1, size);
 
 	if (sock == connection->data.socket) {
 		rcu_read_lock();
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 75f6b47169e65..fcc70642b0049 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -516,7 +516,7 @@ static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flag
 	struct msghdr msg = {
 		.msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
 	};
-	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, size);
+	iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, size);
 	return sock_recvmsg(sock, &msg, msg.msg_flags);
 }
 
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index ea9debf59b225..cb0cc86850762 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -268,7 +268,7 @@ static int lo_write_bvec(struct file *file, struct bio_vec *bvec, loff_t *ppos)
 	struct iov_iter i;
 	ssize_t bw;
 
-	iov_iter_bvec(&i, ITER_BVEC | WRITE, bvec, 1, bvec->bv_len);
+	iov_iter_bvec(&i, WRITE, bvec, 1, bvec->bv_len);
 
 	file_start_write(file);
 	bw = vfs_iter_write(file, &i, ppos, 0);
@@ -346,7 +346,7 @@ static int lo_read_simple(struct loop_device *lo, struct request *rq,
 	ssize_t len;
 
 	rq_for_each_segment(bvec, rq, iter) {
-		iov_iter_bvec(&i, ITER_BVEC, &bvec, 1, bvec.bv_len);
+		iov_iter_bvec(&i, READ, &bvec, 1, bvec.bv_len);
 		len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0);
 		if (len < 0)
 			return len;
@@ -387,7 +387,7 @@ static int lo_read_transfer(struct loop_device *lo, struct request *rq,
 		b.bv_offset = 0;
 		b.bv_len = bvec.bv_len;
 
-		iov_iter_bvec(&i, ITER_BVEC, &b, 1, b.bv_len);
+		iov_iter_bvec(&i, READ, &b, 1, b.bv_len);
 		len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0);
 		if (len < 0) {
 			ret = len;
@@ -554,8 +554,7 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
 	}
 	atomic_set(&cmd->ref, 2);
 
-	iov_iter_bvec(&iter, ITER_BVEC | rw, bvec,
-		      segments, blk_rq_bytes(rq));
+	iov_iter_bvec(&iter, rw, bvec, segments, blk_rq_bytes(rq));
 	iter.iov_offset = offset;
 
 	cmd->iocb.ki_pos = pos;
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 14a51254c3db7..4d4d6129ff662 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -473,7 +473,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
 	u32 nbd_cmd_flags = 0;
 	int sent = nsock->sent, skip = 0;
 
-	iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request));
+	iov_iter_kvec(&from, WRITE, &iov, 1, sizeof(request));
 
 	switch (req_op(req)) {
 	case REQ_OP_DISCARD:
@@ -564,8 +564,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
 
 			dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n",
 				req, bvec.bv_len);
-			iov_iter_bvec(&from, ITER_BVEC | WRITE,
-				      &bvec, 1, bvec.bv_len);
+			iov_iter_bvec(&from, WRITE, &bvec, 1, bvec.bv_len);
 			if (skip) {
 				if (skip >= iov_iter_count(&from)) {
 					skip -= iov_iter_count(&from);
@@ -624,7 +623,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
 	int ret = 0;
 
 	reply.magic = 0;
-	iov_iter_kvec(&to, READ | ITER_KVEC, &iov, 1, sizeof(reply));
+	iov_iter_kvec(&to, READ, &iov, 1, sizeof(reply));
 	result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
 	if (result <= 0) {
 		if (!nbd_disconnected(config))
@@ -678,8 +677,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
 		struct bio_vec bvec;
 
 		rq_for_each_segment(bvec, req, iter) {
-			iov_iter_bvec(&to, ITER_BVEC | READ,
-				      &bvec, 1, bvec.bv_len);
+			iov_iter_bvec(&to, READ, &bvec, 1, bvec.bv_len);
 			result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
 			if (result <= 0) {
 				dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
@@ -1073,7 +1071,7 @@ static void send_disconnects(struct nbd_device *nbd)
 	for (i = 0; i < config->num_connections; i++) {
 		struct nbd_sock *nsock = config->socks[i];
 
-		iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request));
+		iov_iter_kvec(&from, WRITE, &iov, 1, sizeof(request));
 		mutex_lock(&nsock->tx_lock);
 		ret = sock_xmit(nbd, i, 1, &from, 0, NULL);
 		if (ret <= 0)
diff --git a/drivers/fsi/fsi-sbefifo.c b/drivers/fsi/fsi-sbefifo.c
index ae861342626e3..d92f5b87c251e 100644
--- a/drivers/fsi/fsi-sbefifo.c
+++ b/drivers/fsi/fsi-sbefifo.c
@@ -638,7 +638,7 @@ static void sbefifo_collect_async_ffdc(struct sbefifo *sbefifo)
 	}
         ffdc_iov.iov_base = ffdc;
 	ffdc_iov.iov_len = SBEFIFO_MAX_FFDC_SIZE;
-        iov_iter_kvec(&ffdc_iter, WRITE | ITER_KVEC, &ffdc_iov, 1, SBEFIFO_MAX_FFDC_SIZE);
+        iov_iter_kvec(&ffdc_iter, WRITE, &ffdc_iov, 1, SBEFIFO_MAX_FFDC_SIZE);
 	cmd[0] = cpu_to_be32(2);
 	cmd[1] = cpu_to_be32(SBEFIFO_CMD_GET_SBE_FFDC);
 	rc = sbefifo_do_command(sbefifo, cmd, 2, &ffdc_iter);
@@ -735,7 +735,7 @@ int sbefifo_submit(struct device *dev, const __be32 *command, size_t cmd_len,
 	rbytes = (*resp_len) * sizeof(__be32);
 	resp_iov.iov_base = response;
 	resp_iov.iov_len = rbytes;
-        iov_iter_kvec(&resp_iter, WRITE | ITER_KVEC, &resp_iov, 1, rbytes);
+        iov_iter_kvec(&resp_iter, WRITE, &resp_iov, 1, rbytes);
 
 	/* Perform the command */
 	mutex_lock(&sbefifo->lock);
diff --git a/drivers/isdn/mISDN/l1oip_core.c b/drivers/isdn/mISDN/l1oip_core.c
index b05022f94f18c..072bb5e36c184 100644
--- a/drivers/isdn/mISDN/l1oip_core.c
+++ b/drivers/isdn/mISDN/l1oip_core.c
@@ -718,8 +718,7 @@ l1oip_socket_thread(void *data)
 		printk(KERN_DEBUG "%s: socket created and open\n",
 		       __func__);
 	while (!signal_pending(current)) {
-		iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1,
-				recvbuf_size);
+		iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, recvbuf_size);
 		recvlen = sock_recvmsg(socket, &msg, 0);
 		if (recvlen > 0) {
 			l1oip_socket_parse(hc, &sin_rx, recvbuf, recvlen);
diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c b/drivers/misc/vmw_vmci/vmci_queue_pair.c
index bd52f29b4a4e2..264f4ed8eef26 100644
--- a/drivers/misc/vmw_vmci/vmci_queue_pair.c
+++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c
@@ -3030,7 +3030,7 @@ ssize_t vmci_qpair_enqueue(struct vmci_qp *qpair,
 	if (!qpair || !buf)
 		return VMCI_ERROR_INVALID_ARGS;
 
-	iov_iter_kvec(&from, WRITE | ITER_KVEC, &v, 1, buf_size);
+	iov_iter_kvec(&from, WRITE, &v, 1, buf_size);
 
 	qp_lock(qpair);
 
@@ -3074,7 +3074,7 @@ ssize_t vmci_qpair_dequeue(struct vmci_qp *qpair,
 	if (!qpair || !buf)
 		return VMCI_ERROR_INVALID_ARGS;
 
-	iov_iter_kvec(&to, READ | ITER_KVEC, &v, 1, buf_size);
+	iov_iter_kvec(&to, READ, &v, 1, buf_size);
 
 	qp_lock(qpair);
 
@@ -3119,7 +3119,7 @@ ssize_t vmci_qpair_peek(struct vmci_qp *qpair,
 	if (!qpair || !buf)
 		return VMCI_ERROR_INVALID_ARGS;
 
-	iov_iter_kvec(&to, READ | ITER_KVEC, &v, 1, buf_size);
+	iov_iter_kvec(&to, READ, &v, 1, buf_size);
 
 	qp_lock(qpair);
 
diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c
index 81a9dc5290a87..97f282eca3c28 100644
--- a/drivers/nvme/target/io-cmd-file.c
+++ b/drivers/nvme/target/io-cmd-file.c
@@ -101,7 +101,7 @@ static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos,
 		rw = READ;
 	}
 
-	iov_iter_bvec(&iter, ITER_BVEC | rw, req->f.bvec, nr_segs, count);
+	iov_iter_bvec(&iter, rw, req->f.bvec, nr_segs, count);
 
 	iocb->ki_pos = pos;
 	iocb->ki_filp = req->ns->file;
diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c
index 49be1e41290cd..991482576417b 100644
--- a/drivers/target/iscsi/iscsi_target_util.c
+++ b/drivers/target/iscsi/iscsi_target_util.c
@@ -1258,8 +1258,7 @@ static int iscsit_do_rx_data(
 		return -1;
 
 	memset(&msg, 0, sizeof(struct msghdr));
-	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC,
-		      count->iov, count->iov_count, data);
+	iov_iter_kvec(&msg.msg_iter, READ, count->iov, count->iov_count, data);
 
 	while (msg_data_left(&msg)) {
 		rx_loop = sock_recvmsg(conn->sock, &msg, MSG_WAITALL);
@@ -1315,8 +1314,7 @@ int tx_data(
 
 	memset(&msg, 0, sizeof(struct msghdr));
 
-	iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC,
-		      iov, iov_count, data);
+	iov_iter_kvec(&msg.msg_iter, WRITE, iov, iov_count, data);
 
 	while (msg_data_left(&msg)) {
 		int tx_loop = sock_sendmsg(conn->sock, &msg);
diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
index 16751ae55d7b6..49b110d1b972b 100644
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c
@@ -303,7 +303,7 @@ fd_execute_rw_aio(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
 		len += sg->length;
 	}
 
-	iov_iter_bvec(&iter, ITER_BVEC | is_write, bvec, sgl_nents, len);
+	iov_iter_bvec(&iter, is_write, bvec, sgl_nents, len);
 
 	aio_cmd->cmd = cmd;
 	aio_cmd->len = len;
@@ -353,7 +353,7 @@ static int fd_do_rw(struct se_cmd *cmd, struct file *fd,
 		len += sg->length;
 	}
 
-	iov_iter_bvec(&iter, ITER_BVEC, bvec, sgl_nents, len);
+	iov_iter_bvec(&iter, READ, bvec, sgl_nents, len);
 	if (is_write)
 		ret = vfs_iter_write(fd, &iter, &pos, 0);
 	else
@@ -490,7 +490,7 @@ fd_execute_write_same(struct se_cmd *cmd)
 		len += se_dev->dev_attrib.block_size;
 	}
 
-	iov_iter_bvec(&iter, ITER_BVEC, bvec, nolb, len);
+	iov_iter_bvec(&iter, READ, bvec, nolb, len);
 	ret = vfs_iter_write(fd_dev->fd_file, &iter, &pos, 0);
 
 	kfree(bvec);
diff --git a/drivers/usb/usbip/usbip_common.c b/drivers/usb/usbip/usbip_common.c
index 9756752c0681f..45da3e01c7b03 100644
--- a/drivers/usb/usbip/usbip_common.c
+++ b/drivers/usb/usbip/usbip_common.c
@@ -309,7 +309,7 @@ int usbip_recv(struct socket *sock, void *buf, int size)
 	if (!sock || !buf || !size)
 		return -EINVAL;
 
-	iov_iter_kvec(&msg.msg_iter, READ|ITER_KVEC, &iov, 1, size);
+	iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, size);
 
 	usbip_dbg_xmit("enter\n");
 
diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c
index b1092fbefa630..2e5d845b50914 100644
--- a/drivers/xen/pvcalls-back.c
+++ b/drivers/xen/pvcalls-back.c
@@ -137,13 +137,13 @@ static void pvcalls_conn_back_read(void *opaque)
 	if (masked_prod < masked_cons) {
 		vec[0].iov_base = data->in + masked_prod;
 		vec[0].iov_len = wanted;
-		iov_iter_kvec(&msg.msg_iter, ITER_KVEC|WRITE, vec, 1, wanted);
+		iov_iter_kvec(&msg.msg_iter, WRITE, vec, 1, wanted);
 	} else {
 		vec[0].iov_base = data->in + masked_prod;
 		vec[0].iov_len = array_size - masked_prod;
 		vec[1].iov_base = data->in;
 		vec[1].iov_len = wanted - vec[0].iov_len;
-		iov_iter_kvec(&msg.msg_iter, ITER_KVEC|WRITE, vec, 2, wanted);
+		iov_iter_kvec(&msg.msg_iter, WRITE, vec, 2, wanted);
 	}
 
 	atomic_set(&map->read, 0);
@@ -195,13 +195,13 @@ static void pvcalls_conn_back_write(struct sock_mapping *map)
 	if (pvcalls_mask(prod, array_size) > pvcalls_mask(cons, array_size)) {
 		vec[0].iov_base = data->out + pvcalls_mask(cons, array_size);
 		vec[0].iov_len = size;
-		iov_iter_kvec(&msg.msg_iter, ITER_KVEC|READ, vec, 1, size);
+		iov_iter_kvec(&msg.msg_iter, READ, vec, 1, size);
 	} else {
 		vec[0].iov_base = data->out + pvcalls_mask(cons, array_size);
 		vec[0].iov_len = array_size - pvcalls_mask(cons, array_size);
 		vec[1].iov_base = data->out;
 		vec[1].iov_len = size - vec[0].iov_len;
-		iov_iter_kvec(&msg.msg_iter, ITER_KVEC|READ, vec, 2, size);
+		iov_iter_kvec(&msg.msg_iter, READ, vec, 2, size);
 	}
 
 	atomic_set(&map->write, 0);
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index e1cbdfdb7c684..0bcbcc20f7695 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -65,7 +65,7 @@ static int v9fs_fid_readpage(struct p9_fid *fid, struct page *page)
 	if (retval == 0)
 		return retval;
 
-	iov_iter_bvec(&to, ITER_BVEC | READ, &bvec, 1, PAGE_SIZE);
+	iov_iter_bvec(&to, READ, &bvec, 1, PAGE_SIZE);
 
 	retval = p9_client_read(fid, page_offset(page), &to, &err);
 	if (err) {
@@ -175,7 +175,7 @@ static int v9fs_vfs_writepage_locked(struct page *page)
 	bvec.bv_page = page;
 	bvec.bv_offset = 0;
 	bvec.bv_len = len;
-	iov_iter_bvec(&from, ITER_BVEC | WRITE, &bvec, 1, len);
+	iov_iter_bvec(&from, WRITE, &bvec, 1, len);
 
 	/* We should have writeback_fid always set */
 	BUG_ON(!v9inode->writeback_fid);
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index b0405d6aac854..d5db3c968a035 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -133,7 +133,7 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx)
 		if (rdir->tail == rdir->head) {
 			struct iov_iter to;
 			int n;
-			iov_iter_kvec(&to, READ | ITER_KVEC, &kvec, 1, buflen);
+			iov_iter_kvec(&to, READ, &kvec, 1, buflen);
 			n = p9_client_read(file->private_data, ctx->pos, &to,
 					   &err);
 			if (err)
diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c
index 352abc39e891a..ac8ff8ca4c115 100644
--- a/fs/9p/xattr.c
+++ b/fs/9p/xattr.c
@@ -32,7 +32,7 @@ ssize_t v9fs_fid_xattr_get(struct p9_fid *fid, const char *name,
 	struct iov_iter to;
 	int err;
 
-	iov_iter_kvec(&to, READ | ITER_KVEC, &kvec, 1, buffer_size);
+	iov_iter_kvec(&to, READ, &kvec, 1, buffer_size);
 
 	attr_fid = p9_client_xattrwalk(fid, name, &attr_size);
 	if (IS_ERR(attr_fid)) {
@@ -107,7 +107,7 @@ int v9fs_fid_xattr_set(struct p9_fid *fid, const char *name,
 	struct iov_iter from;
 	int retval, err;
 
-	iov_iter_kvec(&from, WRITE | ITER_KVEC, &kvec, 1, value_len);
+	iov_iter_kvec(&from, WRITE, &kvec, 1, value_len);
 
 	p9_debug(P9_DEBUG_VFS, "name = %s value_len = %zu flags = %d\n",
 		 name, value_len, flags);
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 77a83790a31f3..639c16882e932 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -286,7 +286,7 @@ static void afs_load_bvec(struct afs_call *call, struct msghdr *msg,
 		offset = 0;
 	}
 
-	iov_iter_bvec(&msg->msg_iter, WRITE | ITER_BVEC, bv, nr, bytes);
+	iov_iter_bvec(&msg->msg_iter, WRITE, bv, nr, bytes);
 }
 
 /*
@@ -401,8 +401,7 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
 
 	msg.msg_name		= NULL;
 	msg.msg_namelen		= 0;
-	iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iov, 1,
-		      call->request_size);
+	iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, call->request_size);
 	msg.msg_control		= NULL;
 	msg.msg_controllen	= 0;
 	msg.msg_flags		= MSG_WAITALL | (call->send_pages ? MSG_MORE : 0);
@@ -432,7 +431,7 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
 		rxrpc_kernel_abort_call(call->net->socket, rxcall,
 					RX_USER_ABORT, ret, "KSD");
 	} else {
-		iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, NULL, 0, 0);
+		iov_iter_kvec(&msg.msg_iter, READ, NULL, 0, 0);
 		rxrpc_kernel_recv_data(call->net->socket, rxcall,
 				       &msg.msg_iter, false,
 				       &call->abort_code, &call->service_id);
@@ -468,7 +467,7 @@ static void afs_deliver_to_call(struct afs_call *call)
 		if (state == AFS_CALL_SV_AWAIT_ACK) {
 			struct iov_iter iter;
 
-			iov_iter_kvec(&iter, READ | ITER_KVEC, NULL, 0, 0);
+			iov_iter_kvec(&iter, READ, NULL, 0, 0);
 			ret = rxrpc_kernel_recv_data(call->net->socket,
 						     call->rxcall, &iter, false,
 						     &remote_abort,
@@ -825,7 +824,7 @@ void afs_send_empty_reply(struct afs_call *call)
 
 	msg.msg_name		= NULL;
 	msg.msg_namelen		= 0;
-	iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, NULL, 0, 0);
+	iov_iter_kvec(&msg.msg_iter, WRITE, NULL, 0, 0);
 	msg.msg_control		= NULL;
 	msg.msg_controllen	= 0;
 	msg.msg_flags		= 0;
@@ -864,7 +863,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
 	iov[0].iov_len		= len;
 	msg.msg_name		= NULL;
 	msg.msg_namelen		= 0;
-	iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iov, 1, len);
+	iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, len);
 	msg.msg_control		= NULL;
 	msg.msg_controllen	= 0;
 	msg.msg_flags		= 0;
@@ -905,7 +904,7 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count,
 
 	iov.iov_base = buf + call->offset;
 	iov.iov_len = count - call->offset;
-	iov_iter_kvec(&iter, ITER_KVEC | READ, &iov, 1, count - call->offset);
+	iov_iter_kvec(&iter, READ, &iov, 1, count - call->offset);
 
 	ret = rxrpc_kernel_recv_data(net->socket, call->rxcall, &iter,
 				     want_more, &remote_abort,
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 524ecc95b04de..5dd433aa9b234 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -821,7 +821,7 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
 				aio_req->total_len = rc + zlen;
 			}
 
-			iov_iter_bvec(&i, ITER_BVEC, osd_data->bvec_pos.bvecs,
+			iov_iter_bvec(&i, READ, osd_data->bvec_pos.bvecs,
 				      osd_data->num_bvecs,
 				      osd_data->bvec_pos.iter.bi_size);
 			iov_iter_advance(&i, rc);
@@ -1044,8 +1044,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
 				int zlen = min_t(size_t, len - ret,
 						 size - pos - ret);
 
-				iov_iter_bvec(&i, ITER_BVEC, bvecs, num_pages,
-					      len);
+				iov_iter_bvec(&i, READ, bvecs, num_pages, len);
 				iov_iter_advance(&i, ret);
 				iov_iter_zero(zlen, &i);
 				ret += zlen;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 52d71b64c0c6e..11bcd2fb90b13 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -588,7 +588,7 @@ cifs_read_from_socket(struct TCP_Server_Info *server, char *buf,
 {
 	struct msghdr smb_msg;
 	struct kvec iov = {.iov_base = buf, .iov_len = to_read};
-	iov_iter_kvec(&smb_msg.msg_iter, READ | ITER_KVEC, &iov, 1, to_read);
+	iov_iter_kvec(&smb_msg.msg_iter, READ, &iov, 1, to_read);
 
 	return cifs_readv_from_socket(server, &smb_msg);
 }
@@ -600,7 +600,7 @@ cifs_read_page_from_socket(struct TCP_Server_Info *server, struct page *page,
 	struct msghdr smb_msg;
 	struct bio_vec bv = {
 		.bv_page = page, .bv_len = to_read, .bv_offset = page_offset};
-	iov_iter_bvec(&smb_msg.msg_iter, READ | ITER_BVEC, &bv, 1, to_read);
+	iov_iter_bvec(&smb_msg.msg_iter, READ, &bv, 1, to_read);
 	return cifs_readv_from_socket(server, &smb_msg);
 }
 
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 7b5b960a04b8c..10cff44832d8b 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -857,7 +857,7 @@ setup_aio_ctx_iter(struct cifs_aio_ctx *ctx, struct iov_iter *iter, int rw)
 	ctx->bv = bv;
 	ctx->len = saved_len - count;
 	ctx->npages = npages;
-	iov_iter_bvec(&ctx->iter, ITER_BVEC | rw, ctx->bv, npages, ctx->len);
+	iov_iter_bvec(&ctx->iter, rw, ctx->bv, npages, ctx->len);
 	return 0;
 }
 
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 89985a0a6819e..26f8a65b8722f 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -2962,13 +2962,13 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
 			return 0;
 		}
 
-		iov_iter_bvec(&iter, WRITE | ITER_BVEC, bvec, npages, data_len);
+		iov_iter_bvec(&iter, WRITE, bvec, npages, data_len);
 	} else if (buf_len >= data_offset + data_len) {
 		/* read response payload is in buf */
 		WARN_ONCE(npages > 0, "read data can be either in buf or in pages");
 		iov.iov_base = buf + data_offset;
 		iov.iov_len = data_len;
-		iov_iter_kvec(&iter, WRITE | ITER_KVEC, &iov, 1, data_len);
+		iov_iter_kvec(&iter, WRITE, &iov, 1, data_len);
 	} else {
 		/* read response payload cannot be in both buf and pages */
 		WARN_ONCE(1, "buf can not contain only a part of read data");
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index b48f43963da6a..146b618802a5c 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -307,8 +307,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
 			.iov_base = &rfc1002_marker,
 			.iov_len  = 4
 		};
-		iov_iter_kvec(&smb_msg.msg_iter, WRITE | ITER_KVEC, &hiov,
-			      1, 4);
+		iov_iter_kvec(&smb_msg.msg_iter, WRITE, &hiov, 1, 4);
 		rc = smb_send_kvec(server, &smb_msg, &sent);
 		if (rc < 0)
 			goto uncork;
@@ -329,8 +328,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
 			size += iov[i].iov_len;
 		}
 
-		iov_iter_kvec(&smb_msg.msg_iter, WRITE | ITER_KVEC,
-			      iov, n_vec, size);
+		iov_iter_kvec(&smb_msg.msg_iter, WRITE, iov, n_vec, size);
 
 		rc = smb_send_kvec(server, &smb_msg, &sent);
 		if (rc < 0)
@@ -346,7 +344,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
 			rqst_page_get_length(&rqst[j], i, &bvec.bv_len,
 					     &bvec.bv_offset);
 
-			iov_iter_bvec(&smb_msg.msg_iter, WRITE | ITER_BVEC,
+			iov_iter_bvec(&smb_msg.msg_iter, WRITE,
 				      &bvec, 1, bvec.bv_len);
 			rc = smb_send_kvec(server, &smb_msg, &sent);
 			if (rc < 0)
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index a5e4a221435c0..76976d6e50f93 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -674,7 +674,7 @@ static int receive_from_sock(struct connection *con)
 		nvec = 2;
 	}
 	len = iov[0].iov_len + iov[1].iov_len;
-	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, iov, nvec, len);
+	iov_iter_kvec(&msg.msg_iter, READ, iov, nvec, len);
 
 	r = ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT | MSG_NOSIGNAL);
 	if (ret <= 0)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index b53e76391e525..8b90f54809047 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -923,7 +923,7 @@ __be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	int host_err;
 
 	trace_nfsd_read_vector(rqstp, fhp, offset, *count);
-	iov_iter_kvec(&iter, READ | ITER_KVEC, vec, vlen, *count);
+	iov_iter_kvec(&iter, READ, vec, vlen, *count);
 	host_err = vfs_iter_read(file, &iter, &offset, 0);
 	return nfsd_finish_read(rqstp, fhp, file, offset, count, host_err);
 }
@@ -999,7 +999,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 	if (stable && !use_wgather)
 		flags |= RWF_SYNC;
 
-	iov_iter_kvec(&iter, WRITE | ITER_KVEC, vec, vlen, *cnt);
+	iov_iter_kvec(&iter, WRITE, vec, vlen, *cnt);
 	host_err = vfs_iter_write(file, &iter, &pos, flags);
 	if (host_err < 0)
 		goto out_nfserr;
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 7d9eea7d4a873..e9f236af1927d 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -916,7 +916,7 @@ static int o2net_recv_tcp_msg(struct socket *sock, void *data, size_t len)
 {
 	struct kvec vec = { .iov_len = len, .iov_base = data, };
 	struct msghdr msg = { .msg_flags = MSG_DONTWAIT, };
-	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1, len);
+	iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, len);
 	return sock_recvmsg(sock, &msg, MSG_DONTWAIT);
 }
 
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
index 31932879b7162..136a8bdc1d910 100644
--- a/fs/orangefs/inode.c
+++ b/fs/orangefs/inode.c
@@ -25,7 +25,7 @@ static int read_one_page(struct page *page)
 	struct iov_iter to;
 	struct bio_vec bv = {.bv_page = page, .bv_len = PAGE_SIZE};
 
-	iov_iter_bvec(&to, ITER_BVEC | READ, &bv, 1, PAGE_SIZE);
+	iov_iter_bvec(&to, READ, &bv, 1, PAGE_SIZE);
 
 	gossip_debug(GOSSIP_INODE_DEBUG,
 		    "orangefs_readpage called with page %p\n",
diff --git a/fs/splice.c b/fs/splice.c
index b3daa971f5977..3553f1956508d 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -301,7 +301,7 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
 	struct kiocb kiocb;
 	int idx, ret;
 
-	iov_iter_pipe(&to, ITER_PIPE | READ, pipe, len);
+	iov_iter_pipe(&to, READ, pipe, len);
 	idx = to.idx;
 	init_sync_kiocb(&kiocb, in);
 	kiocb.ki_pos = *ppos;
@@ -386,7 +386,7 @@ static ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
 	 */
 	offset = *ppos & ~PAGE_MASK;
 
-	iov_iter_pipe(&to, ITER_PIPE | READ, pipe, len + offset);
+	iov_iter_pipe(&to, READ, pipe, len + offset);
 
 	res = iov_iter_get_pages_alloc(&to, &pages, len + offset, &base);
 	if (res <= 0)
@@ -745,8 +745,7 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
 			left -= this_len;
 		}
 
-		iov_iter_bvec(&from, ITER_BVEC | WRITE, array, n,
-			      sd.total_len - left);
+		iov_iter_bvec(&from, WRITE, array, n, sd.total_len - left);
 		ret = vfs_iter_write(out, &from, &sd.pos, 0);
 		if (ret <= 0)
 			break;
diff --git a/include/linux/uio.h b/include/linux/uio.h
index fcabc959c794d..3d8f1acc142c2 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -29,7 +29,7 @@ enum iter_type {
 };
 
 struct iov_iter {
-	int type;
+	unsigned int type;
 	size_t iov_offset;
 	size_t count;
 	union {
@@ -212,13 +212,13 @@ size_t copy_to_iter_mcsafe(void *addr, size_t bytes, struct iov_iter *i)
 size_t iov_iter_zero(size_t bytes, struct iov_iter *);
 unsigned long iov_iter_alignment(const struct iov_iter *i);
 unsigned long iov_iter_gap_alignment(const struct iov_iter *i);
-void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov,
+void iov_iter_init(struct iov_iter *i, unsigned int direction, const struct iovec *iov,
 			unsigned long nr_segs, size_t count);
-void iov_iter_kvec(struct iov_iter *i, int direction, const struct kvec *kvec,
+void iov_iter_kvec(struct iov_iter *i, unsigned int direction, const struct kvec *kvec,
 			unsigned long nr_segs, size_t count);
-void iov_iter_bvec(struct iov_iter *i, int direction, const struct bio_vec *bvec,
+void iov_iter_bvec(struct iov_iter *i, unsigned int direction, const struct bio_vec *bvec,
 			unsigned long nr_segs, size_t count);
-void iov_iter_pipe(struct iov_iter *i, int direction, struct pipe_inode_info *pipe,
+void iov_iter_pipe(struct iov_iter *i, unsigned int direction, struct pipe_inode_info *pipe,
 			size_t count);
 ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
 			size_t maxsize, unsigned maxpages, size_t *start);
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 42d39116a5564..c42b928b15efa 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -428,17 +428,19 @@ int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
 }
 EXPORT_SYMBOL(iov_iter_fault_in_readable);
 
-void iov_iter_init(struct iov_iter *i, int direction,
+void iov_iter_init(struct iov_iter *i, unsigned int direction,
 			const struct iovec *iov, unsigned long nr_segs,
 			size_t count)
 {
+	WARN_ON(direction & ~(READ | WRITE));
+	direction &= READ | WRITE;
+
 	/* It will get better.  Eventually... */
 	if (uaccess_kernel()) {
-		direction |= ITER_KVEC;
-		i->type = direction;
+		i->type = ITER_KVEC | direction;
 		i->kvec = (struct kvec *)iov;
 	} else {
-		i->type = direction;
+		i->type = ITER_IOVEC | direction;
 		i->iov = iov;
 	}
 	i->nr_segs = nr_segs;
@@ -1060,12 +1062,12 @@ size_t iov_iter_single_seg_count(const struct iov_iter *i)
 }
 EXPORT_SYMBOL(iov_iter_single_seg_count);
 
-void iov_iter_kvec(struct iov_iter *i, int direction,
+void iov_iter_kvec(struct iov_iter *i, unsigned int direction,
 			const struct kvec *kvec, unsigned long nr_segs,
 			size_t count)
 {
-	BUG_ON(!(direction & ITER_KVEC));
-	i->type = direction;
+	WARN_ON(direction & ~(READ | WRITE));
+	i->type = ITER_KVEC | (direction & (READ | WRITE));
 	i->kvec = kvec;
 	i->nr_segs = nr_segs;
 	i->iov_offset = 0;
@@ -1073,12 +1075,12 @@ void iov_iter_kvec(struct iov_iter *i, int direction,
 }
 EXPORT_SYMBOL(iov_iter_kvec);
 
-void iov_iter_bvec(struct iov_iter *i, int direction,
+void iov_iter_bvec(struct iov_iter *i, unsigned int direction,
 			const struct bio_vec *bvec, unsigned long nr_segs,
 			size_t count)
 {
-	BUG_ON(!(direction & ITER_BVEC));
-	i->type = direction;
+	WARN_ON(direction & ~(READ | WRITE));
+	i->type = ITER_BVEC | (direction & (READ | WRITE));
 	i->bvec = bvec;
 	i->nr_segs = nr_segs;
 	i->iov_offset = 0;
@@ -1086,13 +1088,13 @@ void iov_iter_bvec(struct iov_iter *i, int direction,
 }
 EXPORT_SYMBOL(iov_iter_bvec);
 
-void iov_iter_pipe(struct iov_iter *i, int direction,
+void iov_iter_pipe(struct iov_iter *i, unsigned int direction,
 			struct pipe_inode_info *pipe,
 			size_t count)
 {
-	BUG_ON(direction != ITER_PIPE);
+	BUG_ON(direction != READ);
 	WARN_ON(pipe->nrbufs == pipe->buffers);
-	i->type = direction;
+	i->type = ITER_PIPE | READ;
 	i->pipe = pipe;
 	i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
 	i->iov_offset = 0;
diff --git a/mm/page_io.c b/mm/page_io.c
index aafd19ec1db46..86de453a60cf8 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -294,7 +294,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
 		};
 		struct iov_iter from;
 
-		iov_iter_bvec(&from, ITER_BVEC | WRITE, &bv, 1, PAGE_SIZE);
+		iov_iter_bvec(&from, WRITE, &bv, 1, PAGE_SIZE);
 		init_sync_kiocb(&kiocb, swap_file);
 		kiocb.ki_pos = page_file_offset(page);
 
diff --git a/net/9p/client.c b/net/9p/client.c
index deae53a7dffc8..a9cd1401bd098 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -2070,7 +2070,7 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
 	struct kvec kv = {.iov_base = data, .iov_len = count};
 	struct iov_iter to;
 
-	iov_iter_kvec(&to, READ | ITER_KVEC, &kv, 1, count);
+	iov_iter_kvec(&to, READ, &kv, 1, count);
 
 	p9_debug(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %d\n",
 				fid->fid, (unsigned long long) offset, count);
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 4e2576fc0c599..828e87fe80278 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -467,7 +467,7 @@ static int send_pkt(struct l2cap_chan *chan, struct sk_buff *skb,
 	iv.iov_len = skb->len;
 
 	memset(&msg, 0, sizeof(msg));
-	iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iv, 1, skb->len);
+	iov_iter_kvec(&msg.msg_iter, WRITE, &iv, 1, skb->len);
 
 	err = l2cap_chan_send(chan, &msg, skb->len);
 	if (err > 0) {
diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c
index 51c2cf2d8923a..58fc6333d4122 100644
--- a/net/bluetooth/a2mp.c
+++ b/net/bluetooth/a2mp.c
@@ -63,7 +63,7 @@ static void a2mp_send(struct amp_mgr *mgr, u8 code, u8 ident, u16 len, void *dat
 
 	memset(&msg, 0, sizeof(msg));
 
-	iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iv, 1, total_len);
+	iov_iter_kvec(&msg.msg_iter, WRITE, &iv, 1, total_len);
 
 	l2cap_chan_send(chan, &msg, total_len);
 
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index a1c1b7e8a45ca..c822e626761bd 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -622,7 +622,7 @@ static void smp_send_cmd(struct l2cap_conn *conn, u8 code, u16 len, void *data)
 
 	memset(&msg, 0, sizeof(msg));
 
-	iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iv, 2, 1 + len);
+	iov_iter_kvec(&msg.msg_iter, WRITE, iv, 2, 1 + len);
 
 	l2cap_chan_send(chan, &msg, 1 + len);
 
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 0a187196aeede..e493ff77b378c 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -526,7 +526,7 @@ static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len)
 	if (!buf)
 		msg.msg_flags |= MSG_TRUNC;
 
-	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, len);
+	iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, len);
 	r = sock_recvmsg(sock, &msg, msg.msg_flags);
 	if (r == -EAGAIN)
 		r = 0;
@@ -545,7 +545,7 @@ static int ceph_tcp_recvpage(struct socket *sock, struct page *page,
 	int r;
 
 	BUG_ON(page_offset + length > PAGE_SIZE);
-	iov_iter_bvec(&msg.msg_iter, READ | ITER_BVEC, &bvec, 1, length);
+	iov_iter_bvec(&msg.msg_iter, READ, &bvec, 1, length);
 	r = sock_recvmsg(sock, &msg, msg.msg_flags);
 	if (r == -EAGAIN)
 		r = 0;
@@ -607,7 +607,7 @@ static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
 	else
 		msg.msg_flags |= MSG_EOR;  /* superfluous, but what the hell */
 
-	iov_iter_bvec(&msg.msg_iter, WRITE | ITER_BVEC, &bvec, 1, size);
+	iov_iter_bvec(&msg.msg_iter, WRITE, &bvec, 1, size);
 	ret = sock_sendmsg(sock, &msg);
 	if (ret == -EAGAIN)
 		ret = 0;
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index d4020c5e831d3..2526be6b3d909 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1616,7 +1616,7 @@ ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
 	EnterFunction(7);
 
 	/* Receive a packet */
-	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, buflen);
+	iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, buflen);
 	len = sock_recvmsg(sock, &msg, MSG_DONTWAIT);
 	if (len < 0)
 		return len;
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 52241d679cc91..89c3a8c7859a3 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -286,7 +286,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
 	 */
 	krflags = MSG_PEEK | MSG_WAITALL;
 	smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
-	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1,
+	iov_iter_kvec(&msg.msg_iter, READ, &vec, 1,
 			sizeof(struct smc_clc_msg_hdr));
 	len = sock_recvmsg(smc->clcsock, &msg, krflags);
 	if (signal_pending(current)) {
@@ -325,7 +325,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
 
 	/* receive the complete CLC message */
 	memset(&msg, 0, sizeof(struct msghdr));
-	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1, datlen);
+	iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, datlen);
 	krflags = MSG_WAITALL;
 	len = sock_recvmsg(smc->clcsock, &msg, krflags);
 	if (len < datlen || !smc_clc_msg_hdr_valid(clcm)) {
diff --git a/net/socket.c b/net/socket.c
index b68801c7d0ab0..fae408abea547 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -635,7 +635,7 @@ EXPORT_SYMBOL(sock_sendmsg);
 int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
 		   struct kvec *vec, size_t num, size_t size)
 {
-	iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
+	iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
 	return sock_sendmsg(sock, msg);
 }
 EXPORT_SYMBOL(kernel_sendmsg);
@@ -648,7 +648,7 @@ int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
 	if (!sock->ops->sendmsg_locked)
 		return sock_no_sendmsg_locked(sk, msg, size);
 
-	iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
+	iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
 
 	return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
 }
@@ -823,7 +823,7 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
 	mm_segment_t oldfs = get_fs();
 	int result;
 
-	iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, vec, num, size);
+	iov_iter_kvec(&msg->msg_iter, READ, vec, num, size);
 	set_fs(KERNEL_DS);
 	result = sock_recvmsg(sock, msg, flags);
 	set_fs(oldfs);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 5445145e639c9..0b46ec0bf74e8 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -338,7 +338,7 @@ static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr,
 	rqstp->rq_xprt_hlen = 0;
 
 	clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
-	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, iov, nr, buflen);
+	iov_iter_kvec(&msg.msg_iter, READ, iov, nr, buflen);
 	len = sock_recvmsg(svsk->sk_sock, &msg, msg.msg_flags);
 	/* If we read a full record, then assume there may be more
 	 * data to read (stream based sockets only!)
diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c
index d8956f7daac41..afa02eeec4034 100644
--- a/net/tipc/topsrv.c
+++ b/net/tipc/topsrv.c
@@ -394,7 +394,7 @@ static int tipc_conn_rcv_from_sock(struct tipc_conn *con)
 	iov.iov_base = &s;
 	iov.iov_len = sizeof(s);
 	msg.msg_name = NULL;
-	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, iov.iov_len);
+	iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, iov.iov_len);
 	ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT);
 	if (ret == -EWOULDBLOCK)
 		return -EWOULDBLOCK;
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 276edbc04f385..d753e362d2d9e 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -489,7 +489,7 @@ int tls_device_sendpage(struct sock *sk, struct page *page,
 
 	iov.iov_base = kaddr + offset;
 	iov.iov_len = size;
-	iov_iter_kvec(&msg_iter, WRITE | ITER_KVEC, &iov, 1, size);
+	iov_iter_kvec(&msg_iter, WRITE, &iov, 1, size);
 	rc = tls_push_data(sk, &msg_iter, size,
 			   flags, TLS_RECORD_TYPE_DATA);
 	kunmap(page);
@@ -538,7 +538,7 @@ static int tls_device_push_pending_record(struct sock *sk, int flags)
 {
 	struct iov_iter	msg_iter;
 
-	iov_iter_kvec(&msg_iter, WRITE | ITER_KVEC, NULL, 0, 0);
+	iov_iter_kvec(&msg_iter, WRITE, NULL, 0, 0);
 	return tls_push_data(sk, &msg_iter, 0, flags, TLS_RECORD_TYPE_DATA);
 }
 
-- 
GitLab


From 9ea9ce0427aab02a2fd88fc608267cf6952119f1 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 20 Oct 2018 00:57:56 +0100
Subject: [PATCH 0146/1890] iov_iter: Add I/O discard iterator

Add a new iterator, ITER_DISCARD, that can only be used in READ mode and
just discards any data copied to it.

This is useful in a network filesystem for discarding any unwanted data
sent by a server.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/linux/uio.h |  7 ++++++
 lib/iov_iter.c      | 55 +++++++++++++++++++++++++++++++++++++++------
 2 files changed, 55 insertions(+), 7 deletions(-)

diff --git a/include/linux/uio.h b/include/linux/uio.h
index 3d8f1acc142c2..55ce99ddb912f 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -26,6 +26,7 @@ enum iter_type {
 	ITER_KVEC = 2,
 	ITER_BVEC = 4,
 	ITER_PIPE = 8,
+	ITER_DISCARD = 16,
 };
 
 struct iov_iter {
@@ -72,6 +73,11 @@ static inline bool iov_iter_is_pipe(const struct iov_iter *i)
 	return iov_iter_type(i) == ITER_PIPE;
 }
 
+static inline bool iov_iter_is_discard(const struct iov_iter *i)
+{
+	return iov_iter_type(i) == ITER_DISCARD;
+}
+
 static inline unsigned char iov_iter_rw(const struct iov_iter *i)
 {
 	return i->type & (READ | WRITE);
@@ -220,6 +226,7 @@ void iov_iter_bvec(struct iov_iter *i, unsigned int direction, const struct bio_
 			unsigned long nr_segs, size_t count);
 void iov_iter_pipe(struct iov_iter *i, unsigned int direction, struct pipe_inode_info *pipe,
 			size_t count);
+void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count);
 ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
 			size_t maxsize, unsigned maxpages, size_t *start);
 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages,
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index c42b928b15efa..7ebccb5c16377 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -83,6 +83,7 @@
 			const struct kvec *kvec;		\
 			struct kvec v;				\
 			iterate_kvec(i, n, v, kvec, skip, (K))	\
+		} else if (unlikely(i->type & ITER_DISCARD)) {	\
 		} else {					\
 			const struct iovec *iov;		\
 			struct iovec v;				\
@@ -114,6 +115,8 @@
 			}					\
 			i->nr_segs -= kvec - i->kvec;		\
 			i->kvec = kvec;				\
+		} else if (unlikely(i->type & ITER_DISCARD)) {	\
+			skip += n;				\
 		} else {					\
 			const struct iovec *iov;		\
 			struct iovec v;				\
@@ -838,7 +841,9 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
 		size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
 		kunmap_atomic(kaddr);
 		return wanted;
-	} else if (likely(!iov_iter_is_pipe(i)))
+	} else if (unlikely(iov_iter_is_discard(i)))
+		return bytes;
+	else if (likely(!iov_iter_is_pipe(i)))
 		return copy_page_to_iter_iovec(page, offset, bytes, i);
 	else
 		return copy_page_to_iter_pipe(page, offset, bytes, i);
@@ -850,7 +855,7 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
 {
 	if (unlikely(!page_copy_sane(page, offset, bytes)))
 		return 0;
-	if (unlikely(iov_iter_is_pipe(i))) {
+	if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
 		WARN_ON(1);
 		return 0;
 	}
@@ -910,7 +915,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
 		kunmap_atomic(kaddr);
 		return 0;
 	}
-	if (unlikely(iov_iter_is_pipe(i))) {
+	if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
 		kunmap_atomic(kaddr);
 		WARN_ON(1);
 		return 0;
@@ -978,6 +983,10 @@ void iov_iter_advance(struct iov_iter *i, size_t size)
 		pipe_advance(i, size);
 		return;
 	}
+	if (unlikely(iov_iter_is_discard(i))) {
+		i->count -= size;
+		return;
+	}
 	iterate_and_advance(i, size, v, 0, 0, 0)
 }
 EXPORT_SYMBOL(iov_iter_advance);
@@ -1013,6 +1022,8 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll)
 		pipe_truncate(i);
 		return;
 	}
+	if (unlikely(iov_iter_is_discard(i)))
+		return;
 	if (unroll <= i->iov_offset) {
 		i->iov_offset -= unroll;
 		return;
@@ -1055,6 +1066,8 @@ size_t iov_iter_single_seg_count(const struct iov_iter *i)
 		return i->count;	// it is a silly place, anyway
 	if (i->nr_segs == 1)
 		return i->count;
+	if (unlikely(iov_iter_is_discard(i)))
+		return i->count;
 	else if (iov_iter_is_bvec(i))
 		return min(i->count, i->bvec->bv_len - i->iov_offset);
 	else
@@ -1103,6 +1116,24 @@ void iov_iter_pipe(struct iov_iter *i, unsigned int direction,
 }
 EXPORT_SYMBOL(iov_iter_pipe);
 
+/**
+ * iov_iter_discard - Initialise an I/O iterator that discards data
+ * @i: The iterator to initialise.
+ * @direction: The direction of the transfer.
+ * @count: The size of the I/O buffer in bytes.
+ *
+ * Set up an I/O iterator that just discards everything that's written to it.
+ * It's only available as a READ iterator.
+ */
+void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count)
+{
+	BUG_ON(direction != READ);
+	i->type = ITER_DISCARD | READ;
+	i->count = count;
+	i->iov_offset = 0;
+}
+EXPORT_SYMBOL(iov_iter_discard);
+
 unsigned long iov_iter_alignment(const struct iov_iter *i)
 {
 	unsigned long res = 0;
@@ -1127,7 +1158,7 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
 	unsigned long res = 0;
 	size_t size = i->count;
 
-	if (unlikely(iov_iter_is_pipe(i))) {
+	if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
 		WARN_ON(1);
 		return ~0U;
 	}
@@ -1197,6 +1228,9 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
 
 	if (unlikely(iov_iter_is_pipe(i)))
 		return pipe_get_pages(i, pages, maxsize, maxpages, start);
+	if (unlikely(iov_iter_is_discard(i)))
+		return -EFAULT;
+
 	iterate_all_kinds(i, maxsize, v, ({
 		unsigned long addr = (unsigned long)v.iov_base;
 		size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
@@ -1274,6 +1308,9 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
 
 	if (unlikely(iov_iter_is_pipe(i)))
 		return pipe_get_pages_alloc(i, pages, maxsize, start);
+	if (unlikely(iov_iter_is_discard(i)))
+		return -EFAULT;
+
 	iterate_all_kinds(i, maxsize, v, ({
 		unsigned long addr = (unsigned long)v.iov_base;
 		size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
@@ -1315,7 +1352,7 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
 	__wsum sum, next;
 	size_t off = 0;
 	sum = *csum;
-	if (unlikely(iov_iter_is_pipe(i))) {
+	if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
 		WARN_ON(1);
 		return 0;
 	}
@@ -1357,7 +1394,7 @@ bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum,
 	__wsum sum, next;
 	size_t off = 0;
 	sum = *csum;
-	if (unlikely(iov_iter_is_pipe(i))) {
+	if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
 		WARN_ON(1);
 		return false;
 	}
@@ -1402,7 +1439,7 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum,
 	__wsum sum, next;
 	size_t off = 0;
 	sum = *csum;
-	if (unlikely(iov_iter_is_pipe(i))) {
+	if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
 		WARN_ON(1);	/* for now */
 		return 0;
 	}
@@ -1444,6 +1481,8 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages)
 
 	if (!size)
 		return 0;
+	if (unlikely(iov_iter_is_discard(i)))
+		return 0;
 
 	if (unlikely(iov_iter_is_pipe(i))) {
 		struct pipe_inode_info *pipe = i->pipe;
@@ -1487,6 +1526,8 @@ const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
 		WARN_ON(1);
 		return NULL;
 	}
+	if (unlikely(iov_iter_is_discard(new)))
+		return NULL;
 	if (iov_iter_is_bvec(new))
 		return new->bvec = kmemdup(new->bvec,
 				    new->nr_segs * sizeof(struct bio_vec),
-- 
GitLab


From 160cb9574b550426122422444b8f19d614505f81 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 20 Oct 2018 00:57:56 +0100
Subject: [PATCH 0147/1890] afs: Better tracing of protocol errors

Include the site of detection of AFS protocol errors in trace lines to
better be able to determine what went wrong.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/cmservice.c         |   6 +-
 fs/afs/fsclient.c          | 117 ++++++++++++++++++++++---------------
 fs/afs/inode.c             |   2 +-
 fs/afs/internal.h          |   2 +-
 fs/afs/rxrpc.c             |   5 +-
 fs/afs/vlclient.c          |  30 ++++++----
 include/trace/events/afs.h |  54 ++++++++++++++---
 7 files changed, 146 insertions(+), 70 deletions(-)

diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 9e51d6fe7e8f9..58f79301a7164 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -189,7 +189,8 @@ static int afs_deliver_cb_callback(struct afs_call *call)
 		call->count = ntohl(call->tmp);
 		_debug("FID count: %u", call->count);
 		if (call->count > AFSCBMAX)
-			return afs_protocol_error(call, -EBADMSG);
+			return afs_protocol_error(call, -EBADMSG,
+						  afs_eproto_cb_fid_count);
 
 		call->buffer = kmalloc(array3_size(call->count, 3, 4),
 				       GFP_KERNEL);
@@ -234,7 +235,8 @@ static int afs_deliver_cb_callback(struct afs_call *call)
 		call->count2 = ntohl(call->tmp);
 		_debug("CB count: %u", call->count2);
 		if (call->count2 != call->count && call->count2 != 0)
-			return afs_protocol_error(call, -EBADMSG);
+			return afs_protocol_error(call, -EBADMSG,
+						  afs_eproto_cb_count);
 		call->offset = 0;
 		call->unmarshall++;
 
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 50929cb91732f..d9a5815945dcd 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -233,7 +233,7 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call,
 
 bad:
 	xdr_dump_bad(*_bp);
-	return afs_protocol_error(call, -EBADMSG);
+	return afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status);
 }
 
 /*
@@ -399,9 +399,10 @@ static int afs_deliver_fs_fetch_status_vnode(struct afs_call *call)
 
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
-	if (afs_decode_status(call, &bp, &vnode->status, vnode,
-			      &call->expected_version, NULL) < 0)
-		return afs_protocol_error(call, -EBADMSG);
+	ret = afs_decode_status(call, &bp, &vnode->status, vnode,
+				&call->expected_version, NULL);
+	if (ret < 0)
+		return ret;
 	xdr_decode_AFSCallBack(call, vnode, &bp);
 	if (call->reply[1])
 		xdr_decode_AFSVolSync(&bp, call->reply[1]);
@@ -580,9 +581,10 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
 			return ret;
 
 		bp = call->buffer;
-		if (afs_decode_status(call, &bp, &vnode->status, vnode,
-				      &vnode->status.data_version, req) < 0)
-			return afs_protocol_error(call, -EBADMSG);
+		ret = afs_decode_status(call, &bp, &vnode->status, vnode,
+					&vnode->status.data_version, req);
+		if (ret < 0)
+			return ret;
 		xdr_decode_AFSCallBack(call, vnode, &bp);
 		if (call->reply[1])
 			xdr_decode_AFSVolSync(&bp, call->reply[1]);
@@ -733,10 +735,13 @@ static int afs_deliver_fs_create_vnode(struct afs_call *call)
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
 	xdr_decode_AFSFid(&bp, call->reply[1]);
-	if (afs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL) < 0 ||
-	    afs_decode_status(call, &bp, &vnode->status, vnode,
-			      &call->expected_version, NULL) < 0)
-		return afs_protocol_error(call, -EBADMSG);
+	ret = afs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL);
+	if (ret < 0)
+		return ret;
+	ret = afs_decode_status(call, &bp, &vnode->status, vnode,
+				&call->expected_version, NULL);
+	if (ret < 0)
+		return ret;
 	xdr_decode_AFSCallBack_raw(&bp, call->reply[3]);
 	/* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
 
@@ -839,9 +844,10 @@ static int afs_deliver_fs_remove(struct afs_call *call)
 
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
-	if (afs_decode_status(call, &bp, &vnode->status, vnode,
-			      &call->expected_version, NULL) < 0)
-		return afs_protocol_error(call, -EBADMSG);
+	ret = afs_decode_status(call, &bp, &vnode->status, vnode,
+				&call->expected_version, NULL);
+	if (ret < 0)
+		return ret;
 	/* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
 
 	_leave(" = 0 [done]");
@@ -929,10 +935,13 @@ static int afs_deliver_fs_link(struct afs_call *call)
 
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
-	if (afs_decode_status(call, &bp, &vnode->status, vnode, NULL, NULL) < 0 ||
-	    afs_decode_status(call, &bp, &dvnode->status, dvnode,
-			      &call->expected_version, NULL) < 0)
-		return afs_protocol_error(call, -EBADMSG);
+	ret = afs_decode_status(call, &bp, &vnode->status, vnode, NULL, NULL);
+	if (ret < 0)
+		return ret;
+	ret = afs_decode_status(call, &bp, &dvnode->status, dvnode,
+				&call->expected_version, NULL);
+	if (ret < 0)
+		return ret;
 	/* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
 
 	_leave(" = 0 [done]");
@@ -1016,10 +1025,13 @@ static int afs_deliver_fs_symlink(struct afs_call *call)
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
 	xdr_decode_AFSFid(&bp, call->reply[1]);
-	if (afs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL) ||
-	    afs_decode_status(call, &bp, &vnode->status, vnode,
-			      &call->expected_version, NULL) < 0)
-		return afs_protocol_error(call, -EBADMSG);
+	ret = afs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL);
+	if (ret < 0)
+		return ret;
+	ret = afs_decode_status(call, &bp, &vnode->status, vnode,
+				&call->expected_version, NULL);
+	if (ret < 0)
+		return ret;
 	/* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
 
 	_leave(" = 0 [done]");
@@ -1122,13 +1134,16 @@ static int afs_deliver_fs_rename(struct afs_call *call)
 
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
-	if (afs_decode_status(call, &bp, &orig_dvnode->status, orig_dvnode,
-			      &call->expected_version, NULL) < 0)
-		return afs_protocol_error(call, -EBADMSG);
-	if (new_dvnode != orig_dvnode &&
-	    afs_decode_status(call, &bp, &new_dvnode->status, new_dvnode,
-			      &call->expected_version_2, NULL) < 0)
-		return afs_protocol_error(call, -EBADMSG);
+	ret = afs_decode_status(call, &bp, &orig_dvnode->status, orig_dvnode,
+				&call->expected_version, NULL);
+	if (ret < 0)
+		return ret;
+	if (new_dvnode != orig_dvnode) {
+		ret = afs_decode_status(call, &bp, &new_dvnode->status, new_dvnode,
+					&call->expected_version_2, NULL);
+		if (ret < 0)
+			return ret;
+	}
 	/* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
 
 	_leave(" = 0 [done]");
@@ -1231,9 +1246,10 @@ static int afs_deliver_fs_store_data(struct afs_call *call)
 
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
-	if (afs_decode_status(call, &bp, &vnode->status, vnode,
-			      &call->expected_version, NULL) < 0)
-		return afs_protocol_error(call, -EBADMSG);
+	ret = afs_decode_status(call, &bp, &vnode->status, vnode,
+				&call->expected_version, NULL);
+	if (ret < 0)
+		return ret;
 	/* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
 
 	afs_pages_written_back(vnode, call);
@@ -1407,9 +1423,10 @@ static int afs_deliver_fs_store_status(struct afs_call *call)
 
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
-	if (afs_decode_status(call, &bp, &vnode->status, vnode,
-			      &call->expected_version, NULL) < 0)
-		return afs_protocol_error(call, -EBADMSG);
+	ret = afs_decode_status(call, &bp, &vnode->status, vnode,
+				&call->expected_version, NULL);
+	if (ret < 0)
+		return ret;
 	/* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
 
 	_leave(" = 0 [done]");
@@ -1612,7 +1629,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
 		call->count = ntohl(call->tmp);
 		_debug("volname length: %u", call->count);
 		if (call->count >= AFSNAMEMAX)
-			return afs_protocol_error(call, -EBADMSG);
+			return afs_protocol_error(call, -EBADMSG,
+						  afs_eproto_volname_len);
 		call->offset = 0;
 		call->unmarshall++;
 
@@ -1659,7 +1677,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
 		call->count = ntohl(call->tmp);
 		_debug("offline msg length: %u", call->count);
 		if (call->count >= AFSNAMEMAX)
-			return afs_protocol_error(call, -EBADMSG);
+			return afs_protocol_error(call, -EBADMSG,
+						  afs_eproto_offline_msg_len);
 		call->offset = 0;
 		call->unmarshall++;
 
@@ -1706,7 +1725,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
 		call->count = ntohl(call->tmp);
 		_debug("motd length: %u", call->count);
 		if (call->count >= AFSNAMEMAX)
-			return afs_protocol_error(call, -EBADMSG);
+			return afs_protocol_error(call, -EBADMSG,
+						  afs_eproto_motd_len);
 		call->offset = 0;
 		call->unmarshall++;
 
@@ -2109,8 +2129,10 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call)
 
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
-	afs_decode_status(call, &bp, status, vnode,
-			  &call->expected_version, NULL);
+	ret = afs_decode_status(call, &bp, status, vnode,
+				&call->expected_version, NULL);
+	if (ret < 0)
+		return ret;
 	callback[call->count].version	= ntohl(bp[0]);
 	callback[call->count].expiry	= ntohl(bp[1]);
 	callback[call->count].type	= ntohl(bp[2]);
@@ -2206,7 +2228,8 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
 		tmp = ntohl(call->tmp);
 		_debug("status count: %u/%u", tmp, call->count2);
 		if (tmp != call->count2)
-			return afs_protocol_error(call, -EBADMSG);
+			return afs_protocol_error(call, -EBADMSG,
+						  afs_eproto_ibulkst_count);
 
 		call->count = 0;
 		call->unmarshall++;
@@ -2221,10 +2244,11 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
 
 		bp = call->buffer;
 		statuses = call->reply[1];
-		if (afs_decode_status(call, &bp, &statuses[call->count],
-				      call->count == 0 ? vnode : NULL,
-				      NULL, NULL) < 0)
-			return afs_protocol_error(call, -EBADMSG);
+		ret = afs_decode_status(call, &bp, &statuses[call->count],
+					call->count == 0 ? vnode : NULL,
+					NULL, NULL);
+		if (ret < 0)
+			return ret;
 
 		call->count++;
 		if (call->count < call->count2)
@@ -2244,7 +2268,8 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
 		tmp = ntohl(call->tmp);
 		_debug("CB count: %u", tmp);
 		if (tmp != call->count2)
-			return afs_protocol_error(call, -EBADMSG);
+			return afs_protocol_error(call, -EBADMSG,
+						  afs_eproto_ibulkst_cb_count);
 		call->count = 0;
 		call->unmarshall++;
 	more_cbs:
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 479b7fdda1244..ab4e7a15c205a 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -82,7 +82,7 @@ static int afs_inode_init_from_status(struct afs_vnode *vnode, struct key *key)
 	default:
 		printk("kAFS: AFS vnode with undefined type\n");
 		read_sequnlock_excl(&vnode->cb_lock);
-		return afs_protocol_error(NULL, -EBADMSG);
+		return afs_protocol_error(NULL, -EBADMSG, afs_eproto_file_type);
 	}
 
 	inode->i_blocks		= 0;
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 72de1f157d202..73770a2a23112 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -931,7 +931,7 @@ extern void afs_flat_call_destructor(struct afs_call *);
 extern void afs_send_empty_reply(struct afs_call *);
 extern void afs_send_simple_reply(struct afs_call *, const void *, size_t);
 extern int afs_extract_data(struct afs_call *, void *, size_t, bool);
-extern int afs_protocol_error(struct afs_call *, int);
+extern int afs_protocol_error(struct afs_call *, int, enum afs_eproto_cause);
 
 static inline int afs_transfer_reply(struct afs_call *call)
 {
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 639c16882e932..9757c2d953f13 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -939,8 +939,9 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count,
 /*
  * Log protocol error production.
  */
-noinline int afs_protocol_error(struct afs_call *call, int error)
+noinline int afs_protocol_error(struct afs_call *call, int error,
+				enum afs_eproto_cause cause)
 {
-	trace_afs_protocol_error(call, error, __builtin_return_address(0));
+	trace_afs_protocol_error(call, error, cause);
 	return error;
 }
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index c3b740813fc71..d0f95c4ab05ed 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -451,7 +451,8 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
 		call->count2	= ntohl(*bp); /* Type or next count */
 
 		if (call->count > YFS_MAXENDPOINTS)
-			return afs_protocol_error(call, -EBADMSG);
+			return afs_protocol_error(call, -EBADMSG,
+						  afs_eproto_yvl_fsendpt_num);
 
 		alist = afs_alloc_addrlist(call->count, FS_SERVICE, AFS_FS_PORT);
 		if (!alist)
@@ -475,7 +476,8 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
 			size = sizeof(__be32) * (1 + 4 + 1);
 			break;
 		default:
-			return afs_protocol_error(call, -EBADMSG);
+			return afs_protocol_error(call, -EBADMSG,
+						  afs_eproto_yvl_fsendpt_type);
 		}
 
 		size += sizeof(__be32);
@@ -488,18 +490,21 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
 		switch (call->count2) {
 		case YFS_ENDPOINT_IPV4:
 			if (ntohl(bp[0]) != sizeof(__be32) * 2)
-				return afs_protocol_error(call, -EBADMSG);
+				return afs_protocol_error(call, -EBADMSG,
+							  afs_eproto_yvl_fsendpt4_len);
 			afs_merge_fs_addr4(alist, bp[1], ntohl(bp[2]));
 			bp += 3;
 			break;
 		case YFS_ENDPOINT_IPV6:
 			if (ntohl(bp[0]) != sizeof(__be32) * 5)
-				return afs_protocol_error(call, -EBADMSG);
+				return afs_protocol_error(call, -EBADMSG,
+							  afs_eproto_yvl_fsendpt6_len);
 			afs_merge_fs_addr6(alist, bp + 1, ntohl(bp[5]));
 			bp += 6;
 			break;
 		default:
-			return afs_protocol_error(call, -EBADMSG);
+			return afs_protocol_error(call, -EBADMSG,
+						  afs_eproto_yvl_fsendpt_type);
 		}
 
 		/* Got either the type of the next entry or the count of
@@ -518,7 +523,8 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
 		if (!call->count)
 			goto end;
 		if (call->count > YFS_MAXENDPOINTS)
-			return afs_protocol_error(call, -EBADMSG);
+			return afs_protocol_error(call, -EBADMSG,
+						  afs_eproto_yvl_vlendpt_type);
 
 		call->unmarshall = 3;
 
@@ -546,7 +552,8 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
 			size = sizeof(__be32) * (1 + 4 + 1);
 			break;
 		default:
-			return afs_protocol_error(call, -EBADMSG);
+			return afs_protocol_error(call, -EBADMSG,
+						  afs_eproto_yvl_vlendpt_type);
 		}
 
 		if (call->count > 1)
@@ -559,16 +566,19 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
 		switch (call->count2) {
 		case YFS_ENDPOINT_IPV4:
 			if (ntohl(bp[0]) != sizeof(__be32) * 2)
-				return afs_protocol_error(call, -EBADMSG);
+				return afs_protocol_error(call, -EBADMSG,
+							  afs_eproto_yvl_vlendpt4_len);
 			bp += 3;
 			break;
 		case YFS_ENDPOINT_IPV6:
 			if (ntohl(bp[0]) != sizeof(__be32) * 5)
-				return afs_protocol_error(call, -EBADMSG);
+				return afs_protocol_error(call, -EBADMSG,
+							  afs_eproto_yvl_vlendpt6_len);
 			bp += 6;
 			break;
 		default:
-			return afs_protocol_error(call, -EBADMSG);
+			return afs_protocol_error(call, -EBADMSG,
+						  afs_eproto_yvl_vlendpt_type);
 		}
 
 		/* Got either the type of the next entry or the count of
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index d0a341bc45404..5c60ade2c7d88 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -84,6 +84,25 @@ enum afs_edit_dir_reason {
 	afs_edit_dir_for_unlink,
 };
 
+enum afs_eproto_cause {
+	afs_eproto_bad_status,
+	afs_eproto_cb_count,
+	afs_eproto_cb_fid_count,
+	afs_eproto_file_type,
+	afs_eproto_ibulkst_cb_count,
+	afs_eproto_ibulkst_count,
+	afs_eproto_motd_len,
+	afs_eproto_offline_msg_len,
+	afs_eproto_volname_len,
+	afs_eproto_yvl_fsendpt4_len,
+	afs_eproto_yvl_fsendpt6_len,
+	afs_eproto_yvl_fsendpt_num,
+	afs_eproto_yvl_fsendpt_type,
+	afs_eproto_yvl_vlendpt4_len,
+	afs_eproto_yvl_vlendpt6_len,
+	afs_eproto_yvl_vlendpt_type,
+};
+
 #endif /* end __AFS_DECLARE_TRACE_ENUMS_ONCE_ONLY */
 
 /*
@@ -146,6 +165,24 @@ enum afs_edit_dir_reason {
 	EM(afs_edit_dir_for_symlink,		"Symlnk") \
 	E_(afs_edit_dir_for_unlink,		"Unlink")
 
+#define afs_eproto_causes			\
+	EM(afs_eproto_bad_status,	"BadStatus") \
+	EM(afs_eproto_cb_count,		"CbCount") \
+	EM(afs_eproto_cb_fid_count,	"CbFidCount") \
+	EM(afs_eproto_file_type,	"FileTYpe") \
+	EM(afs_eproto_ibulkst_cb_count,	"IBS.CbCount") \
+	EM(afs_eproto_ibulkst_count,	"IBS.FidCount") \
+	EM(afs_eproto_motd_len,		"MotdLen") \
+	EM(afs_eproto_offline_msg_len,	"OfflineMsgLen") \
+	EM(afs_eproto_volname_len,	"VolNameLen") \
+	EM(afs_eproto_yvl_fsendpt4_len,	"YVL.FsEnd4Len") \
+	EM(afs_eproto_yvl_fsendpt6_len,	"YVL.FsEnd6Len") \
+	EM(afs_eproto_yvl_fsendpt_num,	"YVL.FsEndCount") \
+	EM(afs_eproto_yvl_fsendpt_type,	"YVL.FsEndType") \
+	EM(afs_eproto_yvl_vlendpt4_len,	"YVL.VlEnd4Len") \
+	EM(afs_eproto_yvl_vlendpt6_len,	"YVL.VlEnd6Len") \
+	E_(afs_eproto_yvl_vlendpt_type,	"YVL.VlEndType")
+
 
 /*
  * Export enum symbols via userspace.
@@ -555,24 +592,25 @@ TRACE_EVENT(afs_edit_dir,
 	    );
 
 TRACE_EVENT(afs_protocol_error,
-	    TP_PROTO(struct afs_call *call, int error, const void *where),
+	    TP_PROTO(struct afs_call *call, int error, enum afs_eproto_cause cause),
 
-	    TP_ARGS(call, error, where),
+	    TP_ARGS(call, error, cause),
 
 	    TP_STRUCT__entry(
-		    __field(unsigned int,	call		)
-		    __field(int,		error		)
-		    __field(const void *,	where		)
+		    __field(unsigned int,		call		)
+		    __field(int,			error		)
+		    __field(enum afs_eproto_cause,	cause		)
 			     ),
 
 	    TP_fast_assign(
 		    __entry->call = call ? call->debug_id : 0;
 		    __entry->error = error;
-		    __entry->where = where;
+		    __entry->cause = cause;
 			   ),
 
-	    TP_printk("c=%08x r=%d sp=%pSR",
-		      __entry->call, __entry->error, __entry->where)
+	    TP_printk("c=%08x r=%d %s",
+		      __entry->call, __entry->error,
+		      __print_symbolic(__entry->cause, afs_eproto_causes))
 	    );
 
 TRACE_EVENT(afs_cm_no_server,
-- 
GitLab


From 12bdcf333fe13ece2a487a699b1a0f4c5dbb594b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 20 Oct 2018 00:57:56 +0100
Subject: [PATCH 0148/1890] afs: Set up the iov_iter before calling
 afs_extract_data()

afs_extract_data sets up a temporary iov_iter and passes it to AF_RXRPC
each time it is called to describe the remaining buffer to be filled.

Instead:

 (1) Put an iterator in the afs_call struct.

 (2) Set the iterator for each marshalling stage to load data into the
     appropriate places.  A number of convenience functions are provided to
     this end (eg. afs_extract_to_buf()).

     This iterator is then passed to afs_extract_data().

 (3) Use the new ITER_DISCARD iterator to discard any excess data provided
     by FetchData.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/cmservice.c         |  40 +++---
 fs/afs/fsclient.c          | 282 +++++++++++++------------------------
 fs/afs/internal.h          |  56 +++++++-
 fs/afs/rxrpc.c             |  41 ++----
 fs/afs/vlclient.c          | 104 ++++++--------
 include/trace/events/afs.h |  22 +--
 6 files changed, 236 insertions(+), 309 deletions(-)

diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 58f79301a7164..4db62ae8dc1a6 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -176,13 +176,13 @@ static int afs_deliver_cb_callback(struct afs_call *call)
 
 	switch (call->unmarshall) {
 	case 0:
-		call->offset = 0;
+		afs_extract_to_tmp(call);
 		call->unmarshall++;
 
 		/* extract the FID array and its count in two steps */
 	case 1:
 		_debug("extract FID count");
-		ret = afs_extract_data(call, &call->tmp, 4, true);
+		ret = afs_extract_data(call, true);
 		if (ret < 0)
 			return ret;
 
@@ -196,13 +196,12 @@ static int afs_deliver_cb_callback(struct afs_call *call)
 				       GFP_KERNEL);
 		if (!call->buffer)
 			return -ENOMEM;
-		call->offset = 0;
+		afs_extract_to_buf(call, call->count * 3 * 4);
 		call->unmarshall++;
 
 	case 2:
 		_debug("extract FID array");
-		ret = afs_extract_data(call, call->buffer,
-				       call->count * 3 * 4, true);
+		ret = afs_extract_data(call, true);
 		if (ret < 0)
 			return ret;
 
@@ -222,13 +221,13 @@ static int afs_deliver_cb_callback(struct afs_call *call)
 			cb->cb.type	= AFSCM_CB_UNTYPED;
 		}
 
-		call->offset = 0;
+		afs_extract_to_tmp(call);
 		call->unmarshall++;
 
 		/* extract the callback array and its count in two steps */
 	case 3:
 		_debug("extract CB count");
-		ret = afs_extract_data(call, &call->tmp, 4, true);
+		ret = afs_extract_data(call, true);
 		if (ret < 0)
 			return ret;
 
@@ -237,13 +236,12 @@ static int afs_deliver_cb_callback(struct afs_call *call)
 		if (call->count2 != call->count && call->count2 != 0)
 			return afs_protocol_error(call, -EBADMSG,
 						  afs_eproto_cb_count);
-		call->offset = 0;
+		afs_extract_to_buf(call, call->count2 * 3 * 4);
 		call->unmarshall++;
 
 	case 4:
 		_debug("extract CB array");
-		ret = afs_extract_data(call, call->buffer,
-				       call->count2 * 3 * 4, false);
+		ret = afs_extract_data(call, false);
 		if (ret < 0)
 			return ret;
 
@@ -256,7 +254,6 @@ static int afs_deliver_cb_callback(struct afs_call *call)
 			cb->cb.type	= ntohl(*bp++);
 		}
 
-		call->offset = 0;
 		call->unmarshall++;
 	case 5:
 		break;
@@ -303,7 +300,8 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call)
 
 	rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
 
-	ret = afs_extract_data(call, NULL, 0, false);
+	afs_extract_discard(call, 0);
+	ret = afs_extract_data(call, false);
 	if (ret < 0)
 		return ret;
 
@@ -332,16 +330,15 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
 
 	switch (call->unmarshall) {
 	case 0:
-		call->offset = 0;
 		call->buffer = kmalloc_array(11, sizeof(__be32), GFP_KERNEL);
 		if (!call->buffer)
 			return -ENOMEM;
+		afs_extract_to_buf(call, 11 * sizeof(__be32));
 		call->unmarshall++;
 
 	case 1:
 		_debug("extract UUID");
-		ret = afs_extract_data(call, call->buffer,
-				       11 * sizeof(__be32), false);
+		ret = afs_extract_data(call, false);
 		switch (ret) {
 		case 0:		break;
 		case -EAGAIN:	return 0;
@@ -364,7 +361,6 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
 		for (loop = 0; loop < 6; loop++)
 			r->node[loop] = ntohl(b[loop + 5]);
 
-		call->offset = 0;
 		call->unmarshall++;
 
 	case 2:
@@ -407,7 +403,8 @@ static int afs_deliver_cb_probe(struct afs_call *call)
 
 	_enter("");
 
-	ret = afs_extract_data(call, NULL, 0, false);
+	afs_extract_discard(call, 0);
+	ret = afs_extract_data(call, false);
 	if (ret < 0)
 		return ret;
 
@@ -455,16 +452,15 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call)
 
 	switch (call->unmarshall) {
 	case 0:
-		call->offset = 0;
 		call->buffer = kmalloc_array(11, sizeof(__be32), GFP_KERNEL);
 		if (!call->buffer)
 			return -ENOMEM;
+		afs_extract_to_buf(call, 11 * sizeof(__be32));
 		call->unmarshall++;
 
 	case 1:
 		_debug("extract UUID");
-		ret = afs_extract_data(call, call->buffer,
-				       11 * sizeof(__be32), false);
+		ret = afs_extract_data(call, false);
 		switch (ret) {
 		case 0:		break;
 		case -EAGAIN:	return 0;
@@ -487,7 +483,6 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call)
 		for (loop = 0; loop < 6; loop++)
 			r->node[loop] = ntohl(b[loop + 5]);
 
-		call->offset = 0;
 		call->unmarshall++;
 
 	case 2:
@@ -572,7 +567,8 @@ static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call)
 
 	_enter("");
 
-	ret = afs_extract_data(call, NULL, 0, false);
+	afs_extract_discard(call, 0);
+	ret = afs_extract_data(call, false);
 	if (ret < 0)
 		return ret;
 
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index d9a5815945dcd..fe2e9e39b388d 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -20,12 +20,6 @@
 
 static const struct afs_fid afs_zero_fid;
 
-/*
- * We need somewhere to discard into in case the server helpfully returns more
- * than we asked for in FS.FetchData{,64}.
- */
-static u8 afs_discard_buffer[64];
-
 static inline void afs_use_fs_server(struct afs_call *call, struct afs_cb_interest *cbi)
 {
 	call->cbi = afs_get_cb_interest(cbi);
@@ -469,114 +463,93 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
 	struct afs_read *req = call->reply[2];
 	const __be32 *bp;
 	unsigned int size;
-	void *buffer;
 	int ret;
 
-	_enter("{%u,%zu/%u;%llu/%llu}",
-	       call->unmarshall, call->offset, call->count,
-	       req->remain, req->actual_len);
+	_enter("{%u,%zu/%llu}",
+	       call->unmarshall, iov_iter_count(&call->iter), req->actual_len);
 
 	switch (call->unmarshall) {
 	case 0:
 		req->actual_len = 0;
-		call->offset = 0;
+		req->index = 0;
+		req->offset = req->pos & (PAGE_SIZE - 1);
 		call->unmarshall++;
-		if (call->operation_ID != FSFETCHDATA64) {
-			call->unmarshall++;
-			goto no_msw;
+		if (call->operation_ID == FSFETCHDATA64) {
+			afs_extract_to_tmp64(call);
+		} else {
+			call->tmp_u = htonl(0);
+			afs_extract_to_tmp(call);
 		}
 
-		/* extract the upper part of the returned data length of an
-		 * FSFETCHDATA64 op (which should always be 0 using this
-		 * client) */
-	case 1:
-		_debug("extract data length (MSW)");
-		ret = afs_extract_data(call, &call->tmp, 4, true);
-		if (ret < 0)
-			return ret;
-
-		req->actual_len = ntohl(call->tmp);
-		req->actual_len <<= 32;
-		call->offset = 0;
-		call->unmarshall++;
-
-	no_msw:
 		/* extract the returned data length */
-	case 2:
+	case 1:
 		_debug("extract data length");
-		ret = afs_extract_data(call, &call->tmp, 4, true);
+		ret = afs_extract_data(call, true);
 		if (ret < 0)
 			return ret;
 
-		req->actual_len |= ntohl(call->tmp);
+		req->actual_len = be64_to_cpu(call->tmp64);
 		_debug("DATA length: %llu", req->actual_len);
-
-		req->remain = req->actual_len;
-		call->offset = req->pos & (PAGE_SIZE - 1);
-		req->index = 0;
-		if (req->actual_len == 0)
+		req->remain = min(req->len, req->actual_len);
+		if (req->remain == 0)
 			goto no_more_data;
+
 		call->unmarshall++;
 
 	begin_page:
 		ASSERTCMP(req->index, <, req->nr_pages);
-		if (req->remain > PAGE_SIZE - call->offset)
-			size = PAGE_SIZE - call->offset;
+		if (req->remain > PAGE_SIZE - req->offset)
+			size = PAGE_SIZE - req->offset;
 		else
 			size = req->remain;
-		call->count = call->offset + size;
-		ASSERTCMP(call->count, <=, PAGE_SIZE);
-		req->remain -= size;
+		call->bvec[0].bv_len = size;
+		call->bvec[0].bv_offset = req->offset;
+		call->bvec[0].bv_page = req->pages[req->index];
+		iov_iter_bvec(&call->iter, READ, call->bvec, 1, size);
+		ASSERTCMP(size, <=, PAGE_SIZE);
 
 		/* extract the returned data */
-	case 3:
-		_debug("extract data %llu/%llu %zu/%u",
-		       req->remain, req->actual_len, call->offset, call->count);
+	case 2:
+		_debug("extract data %zu/%llu",
+		       iov_iter_count(&call->iter), req->remain);
 
-		buffer = kmap(req->pages[req->index]);
-		ret = afs_extract_data(call, buffer, call->count, true);
-		kunmap(req->pages[req->index]);
+		ret = afs_extract_data(call, true);
 		if (ret < 0)
 			return ret;
-		if (call->offset == PAGE_SIZE) {
+		req->remain -= call->bvec[0].bv_len;
+		req->offset += call->bvec[0].bv_len;
+		ASSERTCMP(req->offset, <=, PAGE_SIZE);
+		if (req->offset == PAGE_SIZE) {
+			req->offset = 0;
 			if (req->page_done)
 				req->page_done(call, req);
 			req->index++;
-			if (req->remain > 0) {
-				call->offset = 0;
-				if (req->index >= req->nr_pages) {
-					call->unmarshall = 4;
-					goto begin_discard;
-				}
+			if (req->remain > 0)
 				goto begin_page;
-			}
 		}
-		goto no_more_data;
+
+		ASSERTCMP(req->remain, ==, 0);
+		if (req->actual_len <= req->len)
+			goto no_more_data;
 
 		/* Discard any excess data the server gave us */
-	begin_discard:
-	case 4:
-		size = min_t(loff_t, sizeof(afs_discard_buffer), req->remain);
-		call->count = size;
-		_debug("extract discard %llu/%llu %zu/%u",
-		       req->remain, req->actual_len, call->offset, call->count);
-
-		call->offset = 0;
-		ret = afs_extract_data(call, afs_discard_buffer, call->count, true);
-		req->remain -= call->offset;
+		iov_iter_discard(&call->iter, READ, req->actual_len - req->len);
+		call->unmarshall = 3;
+	case 3:
+		_debug("extract discard %zu/%llu",
+		       iov_iter_count(&call->iter), req->actual_len - req->len);
+
+		ret = afs_extract_data(call, true);
 		if (ret < 0)
 			return ret;
-		if (req->remain > 0)
-			goto begin_discard;
 
 	no_more_data:
-		call->offset = 0;
-		call->unmarshall = 5;
+		call->unmarshall = 4;
+		afs_extract_to_buf(call, (21 + 3 + 6) * 4);
 
 		/* extract the metadata */
-	case 5:
-		ret = afs_extract_data(call, call->buffer,
-				       (21 + 3 + 6) * 4, false);
+	case 4:
+		ret = afs_extract_data(call, false);
 		if (ret < 0)
 			return ret;
 
@@ -589,20 +562,19 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
 		if (call->reply[1])
 			xdr_decode_AFSVolSync(&bp, call->reply[1]);
 
-		call->offset = 0;
 		call->unmarshall++;
 
-	case 6:
+	case 5:
 		break;
 	}
 
 	for (; req->index < req->nr_pages; req->index++) {
-		if (call->count < PAGE_SIZE)
+		if (req->offset < PAGE_SIZE)
 			zero_user_segment(req->pages[req->index],
-					  call->count, PAGE_SIZE);
+					  req->offset, PAGE_SIZE);
 		if (req->page_done)
 			req->page_done(call, req);
-		call->count = 0;
+		req->offset = 0;
 	}
 
 	_leave(" = 0 [done]");
@@ -1598,31 +1570,31 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
 {
 	const __be32 *bp;
 	char *p;
+	u32 size;
 	int ret;
 
 	_enter("{%u}", call->unmarshall);
 
 	switch (call->unmarshall) {
 	case 0:
-		call->offset = 0;
 		call->unmarshall++;
+		afs_extract_to_buf(call, 12 * 4);
 
 		/* extract the returned status record */
 	case 1:
 		_debug("extract status");
-		ret = afs_extract_data(call, call->buffer,
-				       12 * 4, true);
+		ret = afs_extract_data(call, true);
 		if (ret < 0)
 			return ret;
 
 		bp = call->buffer;
 		xdr_decode_AFSFetchVolumeStatus(&bp, call->reply[1]);
-		call->offset = 0;
 		call->unmarshall++;
+		afs_extract_to_tmp(call);
 
 		/* extract the volume name length */
 	case 2:
-		ret = afs_extract_data(call, &call->tmp, 4, true);
+		ret = afs_extract_data(call, true);
 		if (ret < 0)
 			return ret;
 
@@ -1631,46 +1603,26 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
 		if (call->count >= AFSNAMEMAX)
 			return afs_protocol_error(call, -EBADMSG,
 						  afs_eproto_volname_len);
-		call->offset = 0;
+		size = (call->count + 3) & ~3; /* It's padded */
+		afs_extract_begin(call, call->reply[2], size);
 		call->unmarshall++;
 
 		/* extract the volume name */
 	case 3:
 		_debug("extract volname");
-		if (call->count > 0) {
-			ret = afs_extract_data(call, call->reply[2],
-					       call->count, true);
-			if (ret < 0)
-				return ret;
-		}
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
 
 		p = call->reply[2];
 		p[call->count] = 0;
 		_debug("volname '%s'", p);
-
-		call->offset = 0;
+		afs_extract_to_tmp(call);
 		call->unmarshall++;
 
-		/* extract the volume name padding */
-		if ((call->count & 3) == 0) {
-			call->unmarshall++;
-			goto no_volname_padding;
-		}
-		call->count = 4 - (call->count & 3);
-
-	case 4:
-		ret = afs_extract_data(call, call->buffer,
-				       call->count, true);
-		if (ret < 0)
-			return ret;
-
-		call->offset = 0;
-		call->unmarshall++;
-	no_volname_padding:
-
 		/* extract the offline message length */
-	case 5:
-		ret = afs_extract_data(call, &call->tmp, 4, true);
+	case 4:
+		ret = afs_extract_data(call, true);
 		if (ret < 0)
 			return ret;
 
@@ -1679,46 +1631,27 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
 		if (call->count >= AFSNAMEMAX)
 			return afs_protocol_error(call, -EBADMSG,
 						  afs_eproto_offline_msg_len);
-		call->offset = 0;
+		size = (call->count + 3) & ~3; /* It's padded */
+		afs_extract_begin(call, call->reply[2], size);
 		call->unmarshall++;
 
 		/* extract the offline message */
-	case 6:
+	case 5:
 		_debug("extract offline");
-		if (call->count > 0) {
-			ret = afs_extract_data(call, call->reply[2],
-					       call->count, true);
-			if (ret < 0)
-				return ret;
-		}
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
 
 		p = call->reply[2];
 		p[call->count] = 0;
 		_debug("offline '%s'", p);
 
-		call->offset = 0;
+		afs_extract_to_tmp(call);
 		call->unmarshall++;
 
-		/* extract the offline message padding */
-		if ((call->count & 3) == 0) {
-			call->unmarshall++;
-			goto no_offline_padding;
-		}
-		call->count = 4 - (call->count & 3);
-
-	case 7:
-		ret = afs_extract_data(call, call->buffer,
-				       call->count, true);
-		if (ret < 0)
-			return ret;
-
-		call->offset = 0;
-		call->unmarshall++;
-	no_offline_padding:
-
 		/* extract the message of the day length */
-	case 8:
-		ret = afs_extract_data(call, &call->tmp, 4, true);
+	case 6:
+		ret = afs_extract_data(call, true);
 		if (ret < 0)
 			return ret;
 
@@ -1727,38 +1660,24 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
 		if (call->count >= AFSNAMEMAX)
 			return afs_protocol_error(call, -EBADMSG,
 						  afs_eproto_motd_len);
-		call->offset = 0;
+		size = (call->count + 3) & ~3; /* It's padded */
+		afs_extract_begin(call, call->reply[2], size);
 		call->unmarshall++;
 
 		/* extract the message of the day */
-	case 9:
+	case 7:
 		_debug("extract motd");
-		if (call->count > 0) {
-			ret = afs_extract_data(call, call->reply[2],
-					       call->count, true);
-			if (ret < 0)
-				return ret;
-		}
+		ret = afs_extract_data(call, false);
+		if (ret < 0)
+			return ret;
 
 		p = call->reply[2];
 		p[call->count] = 0;
 		_debug("motd '%s'", p);
 
-		call->offset = 0;
 		call->unmarshall++;
 
-		/* extract the message of the day padding */
-		call->count = (4 - (call->count & 3)) & 3;
-
-	case 10:
-		ret = afs_extract_data(call, call->buffer,
-				       call->count, false);
-		if (ret < 0)
-			return ret;
-
-		call->offset = 0;
-		call->unmarshall++;
-	case 11:
+	case 8:
 		break;
 	}
 
@@ -2024,19 +1943,16 @@ static int afs_deliver_fs_get_capabilities(struct afs_call *call)
 	u32 count;
 	int ret;
 
-	_enter("{%u,%zu/%u}", call->unmarshall, call->offset, call->count);
+	_enter("{%u,%zu}", call->unmarshall, iov_iter_count(&call->iter));
 
-again:
 	switch (call->unmarshall) {
 	case 0:
-		call->offset = 0;
+		afs_extract_to_tmp(call);
 		call->unmarshall++;
 
 		/* Extract the capabilities word count */
 	case 1:
-		ret = afs_extract_data(call, &call->tmp,
-				       1 * sizeof(__be32),
-				       true);
+		ret = afs_extract_data(call, true);
 		if (ret < 0)
 			return ret;
 
@@ -2044,24 +1960,17 @@ static int afs_deliver_fs_get_capabilities(struct afs_call *call)
 
 		call->count = count;
 		call->count2 = count;
-		call->offset = 0;
+		iov_iter_discard(&call->iter, READ, count * sizeof(__be32));
 		call->unmarshall++;
 
 		/* Extract capabilities words */
 	case 2:
-		count = min(call->count, 16U);
-		ret = afs_extract_data(call, call->buffer,
-				       count * sizeof(__be32),
-				       call->count > 16);
+		ret = afs_extract_data(call, false);
 		if (ret < 0)
 			return ret;
 
 		/* TODO: Examine capabilities */
 
-		call->count -= count;
-		if (call->count > 0)
-			goto again;
-		call->offset = 0;
 		call->unmarshall++;
 		break;
 	}
@@ -2215,13 +2124,13 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
 
 	switch (call->unmarshall) {
 	case 0:
-		call->offset = 0;
+		afs_extract_to_tmp(call);
 		call->unmarshall++;
 
 		/* Extract the file status count and array in two steps */
 	case 1:
 		_debug("extract status count");
-		ret = afs_extract_data(call, &call->tmp, 4, true);
+		ret = afs_extract_data(call, true);
 		if (ret < 0)
 			return ret;
 
@@ -2234,11 +2143,11 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
 		call->count = 0;
 		call->unmarshall++;
 	more_counts:
-		call->offset = 0;
+		afs_extract_to_buf(call, 21 * sizeof(__be32));
 
 	case 2:
 		_debug("extract status array %u", call->count);
-		ret = afs_extract_data(call, call->buffer, 21 * 4, true);
+		ret = afs_extract_data(call, true);
 		if (ret < 0)
 			return ret;
 
@@ -2256,12 +2165,12 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
 
 		call->count = 0;
 		call->unmarshall++;
-		call->offset = 0;
+		afs_extract_to_tmp(call);
 
 		/* Extract the callback count and array in two steps */
 	case 3:
 		_debug("extract CB count");
-		ret = afs_extract_data(call, &call->tmp, 4, true);
+		ret = afs_extract_data(call, true);
 		if (ret < 0)
 			return ret;
 
@@ -2273,11 +2182,11 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
 		call->count = 0;
 		call->unmarshall++;
 	more_cbs:
-		call->offset = 0;
+		afs_extract_to_buf(call, 3 * sizeof(__be32));
 
 	case 4:
 		_debug("extract CB array");
-		ret = afs_extract_data(call, call->buffer, 3 * 4, true);
+		ret = afs_extract_data(call, true);
 		if (ret < 0)
 			return ret;
 
@@ -2294,11 +2203,11 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
 		if (call->count < call->count2)
 			goto more_cbs;
 
-		call->offset = 0;
+		afs_extract_to_buf(call, 6 * sizeof(__be32));
 		call->unmarshall++;
 
 	case 5:
-		ret = afs_extract_data(call, call->buffer, 6 * 4, false);
+		ret = afs_extract_data(call, false);
 		if (ret < 0)
 			return ret;
 
@@ -2306,7 +2215,6 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
 		if (call->reply[3])
 			xdr_decode_AFSVolSync(&bp, call->reply[3]);
 
-		call->offset = 0;
 		call->unmarshall++;
 
 	case 6:
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 73770a2a23112..36e9cc74ac115 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -98,11 +98,16 @@ struct afs_call {
 	struct afs_cb_interest	*cbi;		/* Callback interest for server used */
 	void			*request;	/* request data (first part) */
 	struct address_space	*mapping;	/* Pages being written from */
+	struct iov_iter		iter;		/* Buffer iterator */
+	struct iov_iter		*_iter;		/* Iterator currently in use */
+	union {	/* Convenience for ->iter */
+		struct kvec	kvec[1];
+		struct bio_vec	bvec[1];
+	};
 	void			*buffer;	/* reply receive buffer */
 	void			*reply[4];	/* Where to put the reply */
 	pgoff_t			first;		/* first page in mapping to deal with */
 	pgoff_t			last;		/* last page in mapping to deal with */
-	size_t			offset;		/* offset into received data store */
 	atomic_t		usage;
 	enum afs_call_state	state;
 	spinlock_t		state_lock;
@@ -127,7 +132,13 @@ struct afs_call {
 	unsigned int		debug_id;	/* Trace ID */
 	u32			operation_ID;	/* operation ID for an incoming call */
 	u32			count;		/* count for use in unmarshalling */
-	__be32			tmp;		/* place to extract temporary data */
+	union {					/* place to extract temporary data */
+		struct {
+			__be32	tmp_u;
+			__be32	tmp;
+		} __attribute__((packed));
+		__be64		tmp64;
+	};
 	afs_dataversion_t	expected_version; /* Updated version expected from store */
 	afs_dataversion_t	expected_version_2; /* 2nd updated version expected from store */
 };
@@ -185,6 +196,7 @@ struct afs_read {
 	refcount_t		usage;
 	unsigned int		index;		/* Which page we're reading into */
 	unsigned int		nr_pages;
+	unsigned int		offset;		/* offset into current page */
 	void (*page_done)(struct afs_call *, struct afs_read *);
 	struct page		**pages;
 	struct page		*array[];
@@ -550,6 +562,15 @@ struct afs_vnode {
 	afs_callback_type_t	cb_type;	/* type of callback */
 };
 
+static inline struct fscache_cookie *afs_vnode_cache(struct afs_vnode *vnode)
+{
+#ifdef CONFIG_AFS_FSCACHE
+	return vnode->cache;
+#else
+	return NULL;
+#endif
+}
+
 /*
  * cached security record for one user's attempt to access a vnode
  */
@@ -930,12 +951,39 @@ extern struct afs_call *afs_alloc_flat_call(struct afs_net *,
 extern void afs_flat_call_destructor(struct afs_call *);
 extern void afs_send_empty_reply(struct afs_call *);
 extern void afs_send_simple_reply(struct afs_call *, const void *, size_t);
-extern int afs_extract_data(struct afs_call *, void *, size_t, bool);
+extern int afs_extract_data(struct afs_call *, bool);
 extern int afs_protocol_error(struct afs_call *, int, enum afs_eproto_cause);
 
+static inline void afs_extract_begin(struct afs_call *call, void *buf, size_t size)
+{
+	call->kvec[0].iov_base = buf;
+	call->kvec[0].iov_len = size;
+	iov_iter_kvec(&call->iter, READ, call->kvec, 1, size);
+}
+
+static inline void afs_extract_to_tmp(struct afs_call *call)
+{
+	afs_extract_begin(call, &call->tmp, sizeof(call->tmp));
+}
+
+static inline void afs_extract_to_tmp64(struct afs_call *call)
+{
+	afs_extract_begin(call, &call->tmp64, sizeof(call->tmp64));
+}
+
+static inline void afs_extract_discard(struct afs_call *call, size_t size)
+{
+	iov_iter_discard(&call->iter, READ, size);
+}
+
+static inline void afs_extract_to_buf(struct afs_call *call, size_t size)
+{
+	afs_extract_begin(call, call->buffer, size);
+}
+
 static inline int afs_transfer_reply(struct afs_call *call)
 {
-	return afs_extract_data(call, call->buffer, call->reply_max, false);
+	return afs_extract_data(call, false);
 }
 
 static inline bool afs_check_call_state(struct afs_call *call,
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 9757c2d953f13..a3904a8315de6 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -143,6 +143,7 @@ static struct afs_call *afs_alloc_call(struct afs_net *net,
 	INIT_WORK(&call->async_work, afs_process_async_call);
 	init_waitqueue_head(&call->waitq);
 	spin_lock_init(&call->state_lock);
+	call->_iter = &call->iter;
 
 	o = atomic_inc_return(&net->nr_outstanding_calls);
 	trace_afs_call(call, afs_call_trace_alloc, 1, o,
@@ -233,6 +234,7 @@ struct afs_call *afs_alloc_flat_call(struct afs_net *net,
 			goto nomem_free;
 	}
 
+	afs_extract_to_buf(call, call->reply_max);
 	call->operation_ID = type->op;
 	init_waitqueue_head(&call->waitq);
 	return call;
@@ -465,14 +467,12 @@ static void afs_deliver_to_call(struct afs_call *call)
 	       state == AFS_CALL_SV_AWAIT_ACK
 	       ) {
 		if (state == AFS_CALL_SV_AWAIT_ACK) {
-			struct iov_iter iter;
-
-			iov_iter_kvec(&iter, READ, NULL, 0, 0);
+			iov_iter_kvec(&call->iter, READ, NULL, 0, 0);
 			ret = rxrpc_kernel_recv_data(call->net->socket,
-						     call->rxcall, &iter, false,
-						     &remote_abort,
+						     call->rxcall, &call->iter,
+						     false, &remote_abort,
 						     &call->service_id);
-			trace_afs_recv_data(call, 0, 0, false, ret);
+			trace_afs_receive_data(call, &call->iter, false, ret);
 
 			if (ret == -EINPROGRESS || ret == -EAGAIN)
 				return;
@@ -516,7 +516,7 @@ static void afs_deliver_to_call(struct afs_call *call)
 			if (state != AFS_CALL_CL_AWAIT_REPLY)
 				abort_code = RXGEN_SS_UNMARSHAL;
 			rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
-						abort_code, -EBADMSG, "KUM");
+						abort_code, ret, "KUM");
 			goto local_abort;
 		}
 	}
@@ -727,6 +727,7 @@ void afs_charge_preallocation(struct work_struct *work)
 			call->async = true;
 			call->state = AFS_CALL_SV_AWAIT_OP_ID;
 			init_waitqueue_head(&call->waitq);
+			afs_extract_to_tmp(call);
 		}
 
 		if (rxrpc_kernel_charge_accept(net->socket,
@@ -772,18 +773,15 @@ static int afs_deliver_cm_op_id(struct afs_call *call)
 {
 	int ret;
 
-	_enter("{%zu}", call->offset);
-
-	ASSERTCMP(call->offset, <, 4);
+	_enter("{%zu}", iov_iter_count(call->_iter));
 
 	/* the operation ID forms the first four bytes of the request data */
-	ret = afs_extract_data(call, &call->tmp, 4, true);
+	ret = afs_extract_data(call, true);
 	if (ret < 0)
 		return ret;
 
 	call->operation_ID = ntohl(call->tmp);
 	afs_set_call_state(call, AFS_CALL_SV_AWAIT_OP_ID, AFS_CALL_SV_AWAIT_REQUEST);
-	call->offset = 0;
 
 	/* ask the cache manager to route the call (it'll change the call type
 	 * if successful) */
@@ -887,30 +885,19 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
 /*
  * Extract a piece of data from the received data socket buffers.
  */
-int afs_extract_data(struct afs_call *call, void *buf, size_t count,
-		     bool want_more)
+int afs_extract_data(struct afs_call *call, bool want_more)
 {
 	struct afs_net *net = call->net;
-	struct iov_iter iter;
-	struct kvec iov;
+	struct iov_iter *iter = call->_iter;
 	enum afs_call_state state;
 	u32 remote_abort = 0;
 	int ret;
 
-	_enter("{%s,%zu},,%zu,%d",
-	       call->type->name, call->offset, count, want_more);
-
-	ASSERTCMP(call->offset, <=, count);
-
-	iov.iov_base = buf + call->offset;
-	iov.iov_len = count - call->offset;
-	iov_iter_kvec(&iter, READ, &iov, 1, count - call->offset);
+	_enter("{%s,%zu},%d", call->type->name, iov_iter_count(iter), want_more);
 
-	ret = rxrpc_kernel_recv_data(net->socket, call->rxcall, &iter,
+	ret = rxrpc_kernel_recv_data(net->socket, call->rxcall, iter,
 				     want_more, &remote_abort,
 				     &call->service_id);
-	call->offset += (count - call->offset) - iov_iter_count(&iter);
-	trace_afs_recv_data(call, count, call->offset, want_more, ret);
 	if (ret == 0 || ret == -EAGAIN)
 		return ret;
 
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index d0f95c4ab05ed..e18c51742daaa 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -187,19 +187,18 @@ static int afs_deliver_vl_get_addrs_u(struct afs_call *call)
 	u32 uniquifier, nentries, count;
 	int i, ret;
 
-	_enter("{%u,%zu/%u}", call->unmarshall, call->offset, call->count);
+	_enter("{%u,%zu/%u}",
+	       call->unmarshall, iov_iter_count(call->_iter), call->count);
 
-again:
 	switch (call->unmarshall) {
 	case 0:
-		call->offset = 0;
+		afs_extract_to_buf(call,
+				   sizeof(struct afs_uuid__xdr) + 3 * sizeof(__be32));
 		call->unmarshall++;
 
 		/* Extract the returned uuid, uniquifier, nentries and blkaddrs size */
 	case 1:
-		ret = afs_extract_data(call, call->buffer,
-				       sizeof(struct afs_uuid__xdr) + 3 * sizeof(__be32),
-				       true);
+		ret = afs_extract_data(call, true);
 		if (ret < 0)
 			return ret;
 
@@ -216,28 +215,28 @@ static int afs_deliver_vl_get_addrs_u(struct afs_call *call)
 		call->reply[0] = alist;
 		call->count = count;
 		call->count2 = nentries;
-		call->offset = 0;
 		call->unmarshall++;
 
+	more_entries:
+		count = min(call->count, 4U);
+		afs_extract_to_buf(call, count * sizeof(__be32));
+
 		/* Extract entries */
 	case 2:
-		count = min(call->count, 4U);
-		ret = afs_extract_data(call, call->buffer,
-				       count * sizeof(__be32),
-				       call->count > 4);
+		ret = afs_extract_data(call, call->count > 4);
 		if (ret < 0)
 			return ret;
 
 		alist = call->reply[0];
 		bp = call->buffer;
+		count = min(call->count, 4U);
 		for (i = 0; i < count; i++)
 			if (alist->nr_addrs < call->count2)
 				afs_merge_fs_addr4(alist, *bp++, AFS_FS_PORT);
 
 		call->count -= count;
 		if (call->count > 0)
-			goto again;
-		call->offset = 0;
+			goto more_entries;
 		call->unmarshall++;
 		break;
 	}
@@ -318,44 +317,35 @@ static int afs_deliver_vl_get_capabilities(struct afs_call *call)
 	u32 count;
 	int ret;
 
-	_enter("{%u,%zu/%u}", call->unmarshall, call->offset, call->count);
+	_enter("{%u,%zu/%u}",
+	       call->unmarshall, iov_iter_count(call->_iter), call->count);
 
-again:
 	switch (call->unmarshall) {
 	case 0:
-		call->offset = 0;
+		afs_extract_to_tmp(call);
 		call->unmarshall++;
 
 		/* Extract the capabilities word count */
 	case 1:
-		ret = afs_extract_data(call, &call->tmp,
-				       1 * sizeof(__be32),
-				       true);
+		ret = afs_extract_data(call, true);
 		if (ret < 0)
 			return ret;
 
 		count = ntohl(call->tmp);
-
 		call->count = count;
 		call->count2 = count;
-		call->offset = 0;
+
 		call->unmarshall++;
+		afs_extract_discard(call, count * sizeof(__be32));
 
 		/* Extract capabilities words */
 	case 2:
-		count = min(call->count, 16U);
-		ret = afs_extract_data(call, call->buffer,
-				       count * sizeof(__be32),
-				       call->count > 16);
+		ret = afs_extract_data(call, false);
 		if (ret < 0)
 			return ret;
 
 		/* TODO: Examine capabilities */
 
-		call->count -= count;
-		if (call->count > 0)
-			goto again;
-		call->offset = 0;
 		call->unmarshall++;
 		break;
 	}
@@ -426,22 +416,19 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
 	u32 uniquifier, size;
 	int ret;
 
-	_enter("{%u,%zu/%u,%u}", call->unmarshall, call->offset, call->count, call->count2);
+	_enter("{%u,%zu,%u}",
+	       call->unmarshall, iov_iter_count(call->_iter), call->count2);
 
-again:
 	switch (call->unmarshall) {
 	case 0:
-		call->offset = 0;
+		afs_extract_to_buf(call, sizeof(uuid_t) + 3 * sizeof(__be32));
 		call->unmarshall = 1;
 
 		/* Extract the returned uuid, uniquifier, fsEndpoints count and
 		 * either the first fsEndpoint type or the volEndpoints
 		 * count if there are no fsEndpoints. */
 	case 1:
-		ret = afs_extract_data(call, call->buffer,
-				       sizeof(uuid_t) +
-				       3 * sizeof(__be32),
-				       true);
+		ret = afs_extract_data(call, true);
 		if (ret < 0)
 			return ret;
 
@@ -459,15 +446,11 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
 			return -ENOMEM;
 		alist->version = uniquifier;
 		call->reply[0] = alist;
-		call->offset = 0;
 
 		if (call->count == 0)
 			goto extract_volendpoints;
 
-		call->unmarshall = 2;
-
-		/* Extract fsEndpoints[] entries */
-	case 2:
+	next_fsendpoint:
 		switch (call->count2) {
 		case YFS_ENDPOINT_IPV4:
 			size = sizeof(__be32) * (1 + 1 + 1);
@@ -481,7 +464,12 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
 		}
 
 		size += sizeof(__be32);
-		ret = afs_extract_data(call, call->buffer, size, true);
+		afs_extract_to_buf(call, size);
+		call->unmarshall = 2;
+
+		/* Extract fsEndpoints[] entries */
+	case 2:
+		ret = afs_extract_data(call, true);
 		if (ret < 0)
 			return ret;
 
@@ -512,10 +500,9 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
 		 */
 		call->count2 = ntohl(*bp++);
 
-		call->offset = 0;
 		call->count--;
 		if (call->count > 0)
-			goto again;
+			goto next_fsendpoint;
 
 	extract_volendpoints:
 		/* Extract the list of volEndpoints. */
@@ -526,6 +513,7 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
 			return afs_protocol_error(call, -EBADMSG,
 						  afs_eproto_yvl_vlendpt_type);
 
+		afs_extract_to_buf(call, 1 * sizeof(__be32));
 		call->unmarshall = 3;
 
 		/* Extract the type of volEndpoints[0].  Normally we would
@@ -533,17 +521,14 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
 		 * data of the current one, but this is the first...
 		 */
 	case 3:
-		ret = afs_extract_data(call, call->buffer, sizeof(__be32), true);
+		ret = afs_extract_data(call, true);
 		if (ret < 0)
 			return ret;
 
 		bp = call->buffer;
-		call->count2 = ntohl(*bp++);
-		call->offset = 0;
-		call->unmarshall = 4;
 
-		/* Extract volEndpoints[] entries */
-	case 4:
+	next_volendpoint:
+		call->count2 = ntohl(*bp++);
 		switch (call->count2) {
 		case YFS_ENDPOINT_IPV4:
 			size = sizeof(__be32) * (1 + 1 + 1);
@@ -557,8 +542,13 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
 		}
 
 		if (call->count > 1)
-			size += sizeof(__be32);
-		ret = afs_extract_data(call, call->buffer, size, true);
+			size += sizeof(__be32); /* Get next type too */
+		afs_extract_to_buf(call, size);
+		call->unmarshall = 4;
+
+		/* Extract volEndpoints[] entries */
+	case 4:
+		ret = afs_extract_data(call, true);
 		if (ret < 0)
 			return ret;
 
@@ -584,19 +574,17 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
 		/* Got either the type of the next entry or the count of
 		 * volEndpoints if no more fsEndpoints.
 		 */
-		call->offset = 0;
 		call->count--;
-		if (call->count > 0) {
-			call->count2 = ntohl(*bp++);
-			goto again;
-		}
+		if (call->count > 0)
+			goto next_volendpoint;
 
 	end:
+		afs_extract_discard(call, 0);
 		call->unmarshall = 5;
 
 		/* Done */
 	case 5:
-		ret = afs_extract_data(call, call->buffer, 0, false);
+		ret = afs_extract_data(call, false);
 		if (ret < 0)
 			return ret;
 		call->unmarshall = 6;
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index 5c60ade2c7d88..5e0f8dcede261 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -207,17 +207,16 @@ afs_edit_dir_reasons;
 #define EM(a, b)	{ a, b },
 #define E_(a, b)	{ a, b }
 
-TRACE_EVENT(afs_recv_data,
-	    TP_PROTO(struct afs_call *call, unsigned count, unsigned offset,
+TRACE_EVENT(afs_receive_data,
+	    TP_PROTO(struct afs_call *call, struct iov_iter *iter,
 		     bool want_more, int ret),
 
-	    TP_ARGS(call, count, offset, want_more, ret),
+	    TP_ARGS(call, iter, want_more, ret),
 
 	    TP_STRUCT__entry(
+		    __field(loff_t,			remain		)
 		    __field(unsigned int,		call		)
 		    __field(enum afs_call_state,	state		)
-		    __field(unsigned int,		count		)
-		    __field(unsigned int,		offset		)
 		    __field(unsigned short,		unmarshall	)
 		    __field(bool,			want_more	)
 		    __field(int,			ret		)
@@ -227,17 +226,18 @@ TRACE_EVENT(afs_recv_data,
 		    __entry->call	= call->debug_id;
 		    __entry->state	= call->state;
 		    __entry->unmarshall	= call->unmarshall;
-		    __entry->count	= count;
-		    __entry->offset	= offset;
+		    __entry->remain	= iov_iter_count(iter);
 		    __entry->want_more	= want_more;
 		    __entry->ret	= ret;
 			   ),
 
-	    TP_printk("c=%08x s=%u u=%u %u/%u wm=%u ret=%d",
+	    TP_printk("c=%08x r=%llu u=%u w=%u s=%u ret=%d",
 		      __entry->call,
-		      __entry->state, __entry->unmarshall,
-		      __entry->offset, __entry->count,
-		      __entry->want_more, __entry->ret)
+		      __entry->remain,
+		      __entry->unmarshall,
+		      __entry->want_more,
+		      __entry->state,
+		      __entry->ret)
 	    );
 
 TRACE_EVENT(afs_notify_call,
-- 
GitLab


From e7f680f45bd1deb4ca479c2348b395e1a4d44b17 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 20 Oct 2018 00:57:57 +0100
Subject: [PATCH 0149/1890] afs: Improve FS server rotation error handling

Improve the error handling in FS server rotation by:

 (1) Cache the latest useful error value for the fs operation as a whole in
     struct afs_fs_cursor separately from the error cached in the
     afs_addr_cursor struct.  The one in the address cursor gets clobbered
     occasionally.  Copy over the error to the fs operation only when it's
     something we'd be interested in passing to userspace.

 (2) Make it so that EDESTADDRREQ is the default that is seen only if no
     addresses are available to be accessed.

 (3) When calling utility functions, such as checking a volume status or
     probing a fileserver, don't let a successful result clobber the cached
     error in the cursor; instead, stash the result in a temporary variable
     until it has been assessed.

 (4) Don't return ETIMEDOUT or ETIME if a better error, such as
     ENETUNREACH, is already cached.

 (5) On leaving the rotation loop, turn any remote abort code into a more
     useful error than ECONNABORTED.

Fixes: d2ddc776a458 ("afs: Overhaul volume and server record caching and fileserver rotation")
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/addr_list.c |  4 +-
 fs/afs/internal.h  |  1 +
 fs/afs/rotate.c    | 95 ++++++++++++++++++++++++++--------------------
 3 files changed, 55 insertions(+), 45 deletions(-)

diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c
index 55a756c60746c..7b34fad4f8f5e 100644
--- a/fs/afs/addr_list.c
+++ b/fs/afs/addr_list.c
@@ -318,10 +318,8 @@ bool afs_iterate_addresses(struct afs_addr_cursor *ac)
 		if (ac->index == ac->alist->nr_addrs)
 			ac->index = 0;
 
-		if (ac->index == ac->start) {
-			ac->error = -EDESTADDRREQ;
+		if (ac->index == ac->start)
 			return false;
-		}
 	}
 
 	ac->begun = true;
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 36e9cc74ac115..81936a4d5035e 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -629,6 +629,7 @@ struct afs_fs_cursor {
 	unsigned int		cb_break_2;	/* cb_break + cb_s_break (2nd vnode) */
 	unsigned char		start;		/* Initial index in server list */
 	unsigned char		index;		/* Number of servers tried beyond start */
+	short			error;
 	unsigned short		flags;
 #define AFS_FS_CURSOR_STOP	0x0001		/* Set to cease iteration */
 #define AFS_FS_CURSOR_VBUSY	0x0002		/* Set if seen VBUSY */
diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c
index 1faef56b12bd3..d7cbc3c230ee0 100644
--- a/fs/afs/rotate.c
+++ b/fs/afs/rotate.c
@@ -39,9 +39,10 @@ bool afs_begin_vnode_operation(struct afs_fs_cursor *fc, struct afs_vnode *vnode
 	fc->vnode = vnode;
 	fc->key = key;
 	fc->ac.error = SHRT_MAX;
+	fc->error = -EDESTADDRREQ;
 
 	if (mutex_lock_interruptible(&vnode->io_lock) < 0) {
-		fc->ac.error = -EINTR;
+		fc->error = -EINTR;
 		fc->flags |= AFS_FS_CURSOR_STOP;
 		return false;
 	}
@@ -80,7 +81,7 @@ static bool afs_start_fs_iteration(struct afs_fs_cursor *fc,
 		 * and have to return an error.
 		 */
 		if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) {
-			fc->ac.error = -ESTALE;
+			fc->error = -ESTALE;
 			return false;
 		}
 
@@ -127,7 +128,7 @@ static bool afs_sleep_and_retry(struct afs_fs_cursor *fc)
 {
 	msleep_interruptible(1000);
 	if (signal_pending(current)) {
-		fc->ac.error = -ERESTARTSYS;
+		fc->error = -ERESTARTSYS;
 		return false;
 	}
 
@@ -143,11 +144,12 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
 	struct afs_addr_list *alist;
 	struct afs_server *server;
 	struct afs_vnode *vnode = fc->vnode;
+	int error = fc->ac.error;
 
 	_enter("%u/%u,%u/%u,%d,%d",
 	       fc->index, fc->start,
 	       fc->ac.index, fc->ac.start,
-	       fc->ac.error, fc->ac.abort_code);
+	       error, fc->ac.abort_code);
 
 	if (fc->flags & AFS_FS_CURSOR_STOP) {
 		_leave(" = f [stopped]");
@@ -155,15 +157,16 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
 	}
 
 	/* Evaluate the result of the previous operation, if there was one. */
-	switch (fc->ac.error) {
+	switch (error) {
 	case SHRT_MAX:
 		goto start;
 
 	case 0:
 	default:
 		/* Success or local failure.  Stop. */
+		fc->error = error;
 		fc->flags |= AFS_FS_CURSOR_STOP;
-		_leave(" = f [okay/local %d]", fc->ac.error);
+		_leave(" = f [okay/local %d]", error);
 		return false;
 
 	case -ECONNABORTED:
@@ -178,7 +181,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
 			 * - May indicate that the fileserver couldn't attach to the vol.
 			 */
 			if (fc->flags & AFS_FS_CURSOR_VNOVOL) {
-				fc->ac.error = -EREMOTEIO;
+				fc->error = -EREMOTEIO;
 				goto next_server;
 			}
 
@@ -187,12 +190,12 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
 			write_unlock(&vnode->volume->servers_lock);
 
 			set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags);
-			fc->ac.error = afs_check_volume_status(vnode->volume, fc->key);
-			if (fc->ac.error < 0)
-				goto failed;
+			error = afs_check_volume_status(vnode->volume, fc->key);
+			if (error < 0)
+				goto failed_set_error;
 
 			if (test_bit(AFS_VOLUME_DELETED, &vnode->volume->flags)) {
-				fc->ac.error = -ENOMEDIUM;
+				fc->error = -ENOMEDIUM;
 				goto failed;
 			}
 
@@ -200,7 +203,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
 			 * it's the fileserver having trouble.
 			 */
 			if (vnode->volume->servers == fc->server_list) {
-				fc->ac.error = -EREMOTEIO;
+				fc->error = -EREMOTEIO;
 				goto next_server;
 			}
 
@@ -215,7 +218,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
 		case VONLINE:
 		case VDISKFULL:
 		case VOVERQUOTA:
-			fc->ac.error = afs_abort_to_error(fc->ac.abort_code);
+			fc->error = afs_abort_to_error(fc->ac.abort_code);
 			goto next_server;
 
 		case VOFFLINE:
@@ -224,11 +227,11 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
 				clear_bit(AFS_VOLUME_BUSY, &vnode->volume->flags);
 			}
 			if (fc->flags & AFS_FS_CURSOR_NO_VSLEEP) {
-				fc->ac.error = -EADV;
+				fc->error = -EADV;
 				goto failed;
 			}
 			if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) {
-				fc->ac.error = -ESTALE;
+				fc->error = -ESTALE;
 				goto failed;
 			}
 			goto busy;
@@ -240,7 +243,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
 			 * have a file lock we need to maintain.
 			 */
 			if (fc->flags & AFS_FS_CURSOR_NO_VSLEEP) {
-				fc->ac.error = -EBUSY;
+				fc->error = -EBUSY;
 				goto failed;
 			}
 			if (!test_and_set_bit(AFS_VOLUME_BUSY, &vnode->volume->flags)) {
@@ -269,16 +272,16 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
 			 * honour, just in case someone sets up a loop.
 			 */
 			if (fc->flags & AFS_FS_CURSOR_VMOVED) {
-				fc->ac.error = -EREMOTEIO;
+				fc->error = -EREMOTEIO;
 				goto failed;
 			}
 			fc->flags |= AFS_FS_CURSOR_VMOVED;
 
 			set_bit(AFS_VOLUME_WAIT, &vnode->volume->flags);
 			set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags);
-			fc->ac.error = afs_check_volume_status(vnode->volume, fc->key);
-			if (fc->ac.error < 0)
-				goto failed;
+			error = afs_check_volume_status(vnode->volume, fc->key);
+			if (error < 0)
+				goto failed_set_error;
 
 			/* If the server list didn't change, then the VLDB is
 			 * out of sync with the fileservers.  This is hopefully
@@ -290,7 +293,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
 			 * TODO: Retry a few times with sleeps.
 			 */
 			if (vnode->volume->servers == fc->server_list) {
-				fc->ac.error = -ENOMEDIUM;
+				fc->error = -ENOMEDIUM;
 				goto failed;
 			}
 
@@ -299,20 +302,25 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
 		default:
 			clear_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags);
 			clear_bit(AFS_VOLUME_BUSY, &vnode->volume->flags);
-			fc->ac.error = afs_abort_to_error(fc->ac.abort_code);
+			fc->error = afs_abort_to_error(fc->ac.abort_code);
 			goto failed;
 		}
 
+	case -ETIMEDOUT:
+	case -ETIME:
+		if (fc->error != -EDESTADDRREQ)
+			goto iterate_address;
+		/* Fall through */
 	case -ENETUNREACH:
 	case -EHOSTUNREACH:
 	case -ECONNREFUSED:
-	case -ETIMEDOUT:
-	case -ETIME:
 		_debug("no conn");
+		fc->error = error;
 		goto iterate_address;
 
 	case -ECONNRESET:
 		_debug("call reset");
+		fc->error = error;
 		goto failed;
 	}
 
@@ -328,9 +336,9 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
 	/* See if we need to do an update of the volume record.  Note that the
 	 * volume may have moved or even have been deleted.
 	 */
-	fc->ac.error = afs_check_volume_status(vnode->volume, fc->key);
-	if (fc->ac.error < 0)
-		goto failed;
+	error = afs_check_volume_status(vnode->volume, fc->key);
+	if (error < 0)
+		goto failed_set_error;
 
 	if (!afs_start_fs_iteration(fc, vnode))
 		goto failed;
@@ -354,10 +362,10 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
 	 * break request before we've finished decoding the reply and
 	 * installing the vnode.
 	 */
-	fc->ac.error = afs_register_server_cb_interest(vnode, fc->server_list,
-						       fc->index);
-	if (fc->ac.error < 0)
-		goto failed;
+	error = afs_register_server_cb_interest(vnode, fc->server_list,
+						fc->index);
+	if (error < 0)
+		goto failed_set_error;
 
 	fc->cbi = afs_get_cb_interest(vnode->cb_interest);
 
@@ -422,13 +430,14 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
 	if (fc->flags & AFS_FS_CURSOR_VBUSY)
 		goto restart_from_beginning;
 
-	fc->ac.error = -EDESTADDRREQ;
 	goto failed;
 
+failed_set_error:
+	fc->error = error;
 failed:
 	fc->flags |= AFS_FS_CURSOR_STOP;
 	afs_end_cursor(&fc->ac);
-	_leave(" = f [failed %d]", fc->ac.error);
+	_leave(" = f [failed %d]", fc->error);
 	return false;
 }
 
@@ -442,13 +451,14 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
 	struct afs_vnode *vnode = fc->vnode;
 	struct afs_cb_interest *cbi = vnode->cb_interest;
 	struct afs_addr_list *alist;
+	int error = fc->ac.error;
 
 	_enter("");
 
-	switch (fc->ac.error) {
+	switch (error) {
 	case SHRT_MAX:
 		if (!cbi) {
-			fc->ac.error = -ESTALE;
+			fc->error = -ESTALE;
 			fc->flags |= AFS_FS_CURSOR_STOP;
 			return false;
 		}
@@ -461,7 +471,7 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
 		afs_get_addrlist(alist);
 		read_unlock(&cbi->server->fs_lock);
 		if (!alist) {
-			fc->ac.error = -ESTALE;
+			fc->error = -ESTALE;
 			fc->flags |= AFS_FS_CURSOR_STOP;
 			return false;
 		}
@@ -475,11 +485,13 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
 	case 0:
 	default:
 		/* Success or local failure.  Stop. */
+		fc->error = error;
 		fc->flags |= AFS_FS_CURSOR_STOP;
-		_leave(" = f [okay/local %d]", fc->ac.error);
+		_leave(" = f [okay/local %d]", error);
 		return false;
 
 	case -ECONNABORTED:
+		fc->error = afs_abort_to_error(fc->ac.abort_code);
 		fc->flags |= AFS_FS_CURSOR_STOP;
 		_leave(" = f [abort]");
 		return false;
@@ -490,6 +502,7 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
 	case -ETIMEDOUT:
 	case -ETIME:
 		_debug("no conn");
+		fc->error = error;
 		goto iterate_address;
 	}
 
@@ -512,7 +525,6 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
 int afs_end_vnode_operation(struct afs_fs_cursor *fc)
 {
 	struct afs_net *net = afs_v2net(fc->vnode);
-	int ret;
 
 	mutex_unlock(&fc->vnode->io_lock);
 
@@ -520,9 +532,8 @@ int afs_end_vnode_operation(struct afs_fs_cursor *fc)
 	afs_put_cb_interest(net, fc->cbi);
 	afs_put_serverlist(net, fc->server_list);
 
-	ret = fc->ac.error;
-	if (ret == -ECONNABORTED)
-		afs_abort_to_error(fc->ac.abort_code);
+	if (fc->error == -ECONNABORTED)
+		fc->error = afs_abort_to_error(fc->ac.abort_code);
 
-	return fc->ac.error;
+	return fc->error;
 }
-- 
GitLab


From 0a5143f2f89cc88d8a3eada8e8ccd86c1e988257 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 20 Oct 2018 00:57:57 +0100
Subject: [PATCH 0150/1890] afs: Implement VL server rotation

Track VL servers as independent entities rather than lumping all their
addresses together into one set and implement server-level rotation by:

 (1) Add the concept of a VL server list, where each server has its own
     separate address list.  This code is similar to the FS server list.

 (2) Use the DNS resolver to retrieve a set of servers and their associated
     addresses, ports, preference and weight ratings.

 (3) In the case of a legacy DNS resolver or an address list given directly
     through /proc/net/afs/cells, create a list containing just a dummy
     server record and attach all the addresses to that.

 (4) Implement a simple rotation policy, for the moment ignoring the
     priorities and weights assigned to the servers.

 (5) Show the address list through /proc/net/afs/<cell>/vlservers.  This
     also displays the source and status of the data as indicated by the
     upcall.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/Makefile    |   2 +
 fs/afs/addr_list.c | 163 +++++++++++-----------
 fs/afs/cell.c      |  39 +++---
 fs/afs/dynroot.c   |   2 +-
 fs/afs/internal.h  | 114 +++++++++++++--
 fs/afs/proc.c      |  90 +++++++++---
 fs/afs/server.c    |  42 ++----
 fs/afs/vl_list.c   | 336 +++++++++++++++++++++++++++++++++++++++++++++
 fs/afs/vl_rotate.c | 251 +++++++++++++++++++++++++++++++++
 fs/afs/vlclient.c  |  32 ++---
 fs/afs/volume.c    |  52 +++----
 11 files changed, 905 insertions(+), 218 deletions(-)
 create mode 100644 fs/afs/vl_list.c
 create mode 100644 fs/afs/vl_rotate.c

diff --git a/fs/afs/Makefile b/fs/afs/Makefile
index 546874057bd35..03e9f7afea1b5 100644
--- a/fs/afs/Makefile
+++ b/fs/afs/Makefile
@@ -29,6 +29,8 @@ kafs-y := \
 	super.o \
 	netdevices.o \
 	vlclient.o \
+	vl_rotate.o \
+	vl_list.o \
 	volume.o \
 	write.o \
 	xattr.o
diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c
index 7b34fad4f8f5e..3f60b40125873 100644
--- a/fs/afs/addr_list.c
+++ b/fs/afs/addr_list.c
@@ -64,19 +64,25 @@ struct afs_addr_list *afs_alloc_addrlist(unsigned int nr,
 /*
  * Parse a text string consisting of delimited addresses.
  */
-struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
-					   char delim,
-					   unsigned short service,
-					   unsigned short port)
+struct afs_vlserver_list *afs_parse_text_addrs(struct afs_net *net,
+					       const char *text, size_t len,
+					       char delim,
+					       unsigned short service,
+					       unsigned short port)
 {
+	struct afs_vlserver_list *vllist;
 	struct afs_addr_list *alist;
 	const char *p, *end = text + len;
+	const char *problem;
 	unsigned int nr = 0;
+	int ret = -ENOMEM;
 
 	_enter("%*.*s,%c", (int)len, (int)len, text, delim);
 
-	if (!len)
+	if (!len) {
+		_leave(" = -EDESTADDRREQ [empty]");
 		return ERR_PTR(-EDESTADDRREQ);
+	}
 
 	if (delim == ':' && (memchr(text, ',', len) || !memchr(text, '.', len)))
 		delim = ',';
@@ -84,18 +90,24 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
 	/* Count the addresses */
 	p = text;
 	do {
-		if (!*p)
-			return ERR_PTR(-EINVAL);
+		if (!*p) {
+			problem = "nul";
+			goto inval;
+		}
 		if (*p == delim)
 			continue;
 		nr++;
 		if (*p == '[') {
 			p++;
-			if (p == end)
-				return ERR_PTR(-EINVAL);
+			if (p == end) {
+				problem = "brace1";
+				goto inval;
+			}
 			p = memchr(p, ']', end - p);
-			if (!p)
-				return ERR_PTR(-EINVAL);
+			if (!p) {
+				problem = "brace2";
+				goto inval;
+			}
 			p++;
 			if (p >= end)
 				break;
@@ -109,10 +121,19 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
 
 	_debug("%u/%u addresses", nr, AFS_MAX_ADDRESSES);
 
-	alist = afs_alloc_addrlist(nr, service, port);
-	if (!alist)
+	vllist = afs_alloc_vlserver_list(1);
+	if (!vllist)
 		return ERR_PTR(-ENOMEM);
 
+	vllist->nr_servers = 1;
+	vllist->servers[0].server = afs_alloc_vlserver("<dummy>", 7, AFS_VL_PORT);
+	if (!vllist->servers[0].server)
+		goto error_vl;
+
+	alist = afs_alloc_addrlist(nr, service, AFS_VL_PORT);
+	if (!alist)
+		goto error;
+
 	/* Extract the addresses */
 	p = text;
 	do {
@@ -135,17 +156,21 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
 					break;
 		}
 
-		if (in4_pton(p, q - p, (u8 *)&x[0], -1, &stop))
+		if (in4_pton(p, q - p, (u8 *)&x[0], -1, &stop)) {
 			family = AF_INET;
-		else if (in6_pton(p, q - p, (u8 *)x, -1, &stop))
+		} else if (in6_pton(p, q - p, (u8 *)x, -1, &stop)) {
 			family = AF_INET6;
-		else
+		} else {
+			problem = "family";
 			goto bad_address;
+		}
 
-		if (stop != q)
+		p = q;
+		if (stop != p) {
+			problem = "nostop";
 			goto bad_address;
+		}
 
-		p = q;
 		if (q < end && *q == ']')
 			p++;
 
@@ -154,18 +179,23 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
 				/* Port number specification "+1234" */
 				xport = 0;
 				p++;
-				if (p >= end || !isdigit(*p))
+				if (p >= end || !isdigit(*p)) {
+					problem = "port";
 					goto bad_address;
+				}
 				do {
 					xport *= 10;
 					xport += *p - '0';
-					if (xport > 65535)
+					if (xport > 65535) {
+						problem = "pval";
 						goto bad_address;
+					}
 					p++;
 				} while (p < end && isdigit(*p));
 			} else if (*p == delim) {
 				p++;
 			} else {
+				problem = "weird";
 				goto bad_address;
 			}
 		}
@@ -177,12 +207,23 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len,
 
 	} while (p < end);
 
+	rcu_assign_pointer(vllist->servers[0].server->addresses, alist);
 	_leave(" = [nr %u]", alist->nr_addrs);
-	return alist;
+	return vllist;
 
-bad_address:
-	kfree(alist);
+inval:
+	_leave(" = -EINVAL [%s %zu %*.*s]",
+	       problem, p - text, (int)len, (int)len, text);
 	return ERR_PTR(-EINVAL);
+bad_address:
+	_leave(" = -EINVAL [%s %zu %*.*s]",
+	       problem, p - text, (int)len, (int)len, text);
+	ret = -EINVAL;
+error:
+	afs_put_addrlist(alist);
+error_vl:
+	afs_put_vlserverlist(net, vllist);
+	return ERR_PTR(ret);
 }
 
 /*
@@ -201,30 +242,34 @@ static int afs_cmp_addr_list(const struct afs_addr_list *a1,
 /*
  * Perform a DNS query for VL servers and build a up an address list.
  */
-struct afs_addr_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry)
+struct afs_vlserver_list *afs_dns_query(struct afs_cell *cell, time64_t *_expiry)
 {
-	struct afs_addr_list *alist;
-	char *vllist = NULL;
+	struct afs_vlserver_list *vllist;
+	char *result = NULL;
 	int ret;
 
 	_enter("%s", cell->name);
 
-	ret = dns_query("afsdb", cell->name, cell->name_len,
-			"", &vllist, _expiry);
-	if (ret < 0)
+	ret = dns_query("afsdb", cell->name, cell->name_len, "srv=1",
+			&result, _expiry);
+	if (ret < 0) {
+		_leave(" = %d [dns]", ret);
 		return ERR_PTR(ret);
-
-	alist = afs_parse_text_addrs(vllist, strlen(vllist), ',',
-				     VL_SERVICE, AFS_VL_PORT);
-	if (IS_ERR(alist)) {
-		kfree(vllist);
-		if (alist != ERR_PTR(-ENOMEM))
-			pr_err("Failed to parse DNS data\n");
-		return alist;
 	}
 
-	kfree(vllist);
-	return alist;
+	if (*_expiry == 0)
+		*_expiry = ktime_get_real_seconds() + 60;
+
+	if (ret > 1 && result[0] == 0)
+		vllist = afs_extract_vlserver_list(cell, result, ret);
+	else
+		vllist = afs_parse_text_addrs(cell->net, result, ret, ',',
+					      VL_SERVICE, AFS_VL_PORT);
+	kfree(result);
+	if (IS_ERR(vllist) && vllist != ERR_PTR(-ENOMEM))
+		pr_err("Failed to parse DNS data %ld\n", PTR_ERR(vllist));
+
+	return vllist;
 }
 
 /*
@@ -347,43 +392,3 @@ int afs_end_cursor(struct afs_addr_cursor *ac)
 	ac->begun = false;
 	return ac->error;
 }
-
-/*
- * Set the address cursor for iterating over VL servers.
- */
-int afs_set_vl_cursor(struct afs_addr_cursor *ac, struct afs_cell *cell)
-{
-	struct afs_addr_list *alist;
-	int ret;
-
-	if (!rcu_access_pointer(cell->vl_addrs)) {
-		ret = wait_on_bit(&cell->flags, AFS_CELL_FL_NO_LOOKUP_YET,
-				  TASK_INTERRUPTIBLE);
-		if (ret < 0)
-			return ret;
-
-		if (!rcu_access_pointer(cell->vl_addrs) &&
-		    ktime_get_real_seconds() < cell->dns_expiry)
-			return cell->error;
-	}
-
-	read_lock(&cell->vl_addrs_lock);
-	alist = rcu_dereference_protected(cell->vl_addrs,
-					  lockdep_is_held(&cell->vl_addrs_lock));
-	if (alist->nr_addrs > 0)
-		afs_get_addrlist(alist);
-	else
-		alist = NULL;
-	read_unlock(&cell->vl_addrs_lock);
-
-	if (!alist)
-		return -EDESTADDRREQ;
-
-	ac->alist = alist;
-	ac->addr = NULL;
-	ac->start = READ_ONCE(alist->index);
-	ac->index = ac->start;
-	ac->error = 0;
-	ac->begun = false;
-	return 0;
-}
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 6127f0fcd62c4..963b6fa51fdf8 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -119,7 +119,7 @@ struct afs_cell *afs_lookup_cell_rcu(struct afs_net *net,
  */
 static struct afs_cell *afs_alloc_cell(struct afs_net *net,
 				       const char *name, unsigned int namelen,
-				       const char *vllist)
+				       const char *addresses)
 {
 	struct afs_cell *cell;
 	int i, ret;
@@ -134,7 +134,7 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net,
 	if (namelen == 5 && memcmp(name, "@cell", 5) == 0)
 		return ERR_PTR(-EINVAL);
 
-	_enter("%*.*s,%s", namelen, namelen, name, vllist);
+	_enter("%*.*s,%s", namelen, namelen, name, addresses);
 
 	cell = kzalloc(sizeof(struct afs_cell), GFP_KERNEL);
 	if (!cell) {
@@ -153,22 +153,23 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net,
 		       (1 << AFS_CELL_FL_NO_LOOKUP_YET));
 	INIT_LIST_HEAD(&cell->proc_volumes);
 	rwlock_init(&cell->proc_lock);
-	rwlock_init(&cell->vl_addrs_lock);
+	rwlock_init(&cell->vl_servers_lock);
 
 	/* Fill in the VL server list if we were given a list of addresses to
 	 * use.
 	 */
-	if (vllist) {
-		struct afs_addr_list *alist;
-
-		alist = afs_parse_text_addrs(vllist, strlen(vllist), ':',
-					     VL_SERVICE, AFS_VL_PORT);
-		if (IS_ERR(alist)) {
-			ret = PTR_ERR(alist);
+	if (addresses) {
+		struct afs_vlserver_list *vllist;
+
+		vllist = afs_parse_text_addrs(net,
+					      addresses, strlen(addresses), ':',
+					      VL_SERVICE, AFS_VL_PORT);
+		if (IS_ERR(vllist)) {
+			ret = PTR_ERR(vllist);
 			goto parse_failed;
 		}
 
-		rcu_assign_pointer(cell->vl_addrs, alist);
+		rcu_assign_pointer(cell->vl_servers, vllist);
 		cell->dns_expiry = TIME64_MAX;
 	}
 
@@ -356,14 +357,14 @@ int afs_cell_init(struct afs_net *net, const char *rootcell)
  */
 static void afs_update_cell(struct afs_cell *cell)
 {
-	struct afs_addr_list *alist, *old;
+	struct afs_vlserver_list *vllist, *old;
 	time64_t now, expiry;
 
 	_enter("%s", cell->name);
 
-	alist = afs_dns_query(cell, &expiry);
-	if (IS_ERR(alist)) {
-		switch (PTR_ERR(alist)) {
+	vllist = afs_dns_query(cell, &expiry);
+	if (IS_ERR(vllist)) {
+		switch (PTR_ERR(vllist)) {
 		case -ENODATA:
 			/* The DNS said that the cell does not exist */
 			set_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags);
@@ -387,12 +388,12 @@ static void afs_update_cell(struct afs_cell *cell)
 		/* Exclusion on changing vl_addrs is achieved by a
 		 * non-reentrant work item.
 		 */
-		old = rcu_dereference_protected(cell->vl_addrs, true);
-		rcu_assign_pointer(cell->vl_addrs, alist);
+		old = rcu_dereference_protected(cell->vl_servers, true);
+		rcu_assign_pointer(cell->vl_servers, vllist);
 		cell->dns_expiry = expiry;
 
 		if (old)
-			afs_put_addrlist(old);
+			afs_put_vlserverlist(cell->net, old);
 	}
 
 	if (test_and_clear_bit(AFS_CELL_FL_NO_LOOKUP_YET, &cell->flags))
@@ -414,7 +415,7 @@ static void afs_cell_destroy(struct rcu_head *rcu)
 
 	ASSERTCMP(atomic_read(&cell->usage), ==, 0);
 
-	afs_put_addrlist(rcu_access_pointer(cell->vl_addrs));
+	afs_put_vlserverlist(cell->net, rcu_access_pointer(cell->vl_servers));
 	key_put(cell->anonymous_key);
 	kfree(cell);
 
diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c
index f29c6dade7f62..0efed0a63080c 100644
--- a/fs/afs/dynroot.c
+++ b/fs/afs/dynroot.c
@@ -46,7 +46,7 @@ static int afs_probe_cell_name(struct dentry *dentry)
 		return 0;
 	}
 
-	ret = dns_query("afsdb", name, len, "", NULL, NULL);
+	ret = dns_query("afsdb", name, len, "srv=1", NULL, NULL);
 	if (ret == -ENODATA)
 		ret = -EDESTADDRREQ;
 	return ret;
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 81936a4d5035e..7e264cb9b4f7e 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -22,6 +22,7 @@
 #include <linux/backing-dev.h>
 #include <linux/uuid.h>
 #include <linux/mm_types.h>
+#include <linux/dns_resolver.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 #include <net/sock.h>
@@ -77,6 +78,8 @@ struct afs_addr_list {
 	unsigned char		nr_addrs;
 	unsigned char		index;		/* Address currently in use */
 	unsigned char		nr_ipv4;	/* Number of IPv4 addresses */
+	enum dns_record_source	source:8;
+	enum dns_lookup_status	status:8;
 	unsigned long		probed;		/* Mask of servers that have been probed */
 	unsigned long		yfs;		/* Mask of servers that are YFS */
 	struct sockaddr_rxrpc	addrs[];
@@ -355,12 +358,51 @@ struct afs_cell {
 	rwlock_t		proc_lock;
 
 	/* VL server list. */
-	rwlock_t		vl_addrs_lock;	/* Lock on vl_addrs */
-	struct afs_addr_list	__rcu *vl_addrs; /* List of VL servers */
+	rwlock_t		vl_servers_lock; /* Lock on vl_servers */
+	struct afs_vlserver_list __rcu *vl_servers;
+
 	u8			name_len;	/* Length of name */
 	char			name[64 + 1];	/* Cell name, case-flattened and NUL-padded */
 };
 
+/*
+ * Volume Location server record.
+ */
+struct afs_vlserver {
+	struct rcu_head		rcu;
+	struct afs_addr_list	__rcu *addresses; /* List of addresses for this VL server */
+	unsigned long		flags;
+#define AFS_VLSERVER_FL_PROBED	0		/* The VL server has been probed */
+#define AFS_VLSERVER_FL_PROBING	1		/* VL server is being probed */
+	rwlock_t		lock;		/* Lock on addresses */
+	atomic_t		usage;
+	u16			name_len;	/* Length of name */
+	u16			port;
+	char			name[];		/* Server name, case-flattened */
+};
+
+/*
+ * Weighted list of Volume Location servers.
+ */
+struct afs_vlserver_entry {
+	u16			priority;	/* Preference (as SRV) */
+	u16			weight;		/* Weight (as SRV) */
+	enum dns_record_source	source:8;
+	enum dns_lookup_status	status:8;
+	struct afs_vlserver	*server;
+};
+
+struct afs_vlserver_list {
+	struct rcu_head		rcu;
+	atomic_t		usage;
+	u8			nr_servers;
+	u8			index;		/* Server currently in use */
+	enum dns_record_source	source:8;
+	enum dns_lookup_status	status:8;
+	rwlock_t		lock;
+	struct afs_vlserver_entry servers[];
+};
+
 /*
  * Cached VLDB entry.
  *
@@ -616,6 +658,23 @@ struct afs_addr_cursor {
 	bool			responded;	/* T if the current address responded */
 };
 
+/*
+ * Cursor for iterating over a set of volume location servers.
+ */
+struct afs_vl_cursor {
+	struct afs_addr_cursor	ac;
+	struct afs_cell		*cell;		/* The cell we're querying */
+	struct afs_vlserver_list *server_list;	/* Current server list (pins ref) */
+	struct key		*key;		/* Key for the server */
+	unsigned char		start;		/* Initial index in server list */
+	unsigned char		index;		/* Number of servers tried beyond start */
+	short			error;
+	unsigned short		flags;
+#define AFS_VL_CURSOR_STOP	0x0001		/* Set to cease iteration */
+#define AFS_VL_CURSOR_RETRY	0x0002		/* Set to do a retry */
+#define AFS_VL_CURSOR_RETRIED	0x0004		/* Set if started a retry */
+};
+
 /*
  * Cursor for iterating over a set of fileservers.
  */
@@ -662,12 +721,12 @@ extern struct afs_addr_list *afs_alloc_addrlist(unsigned int,
 						unsigned short,
 						unsigned short);
 extern void afs_put_addrlist(struct afs_addr_list *);
-extern struct afs_addr_list *afs_parse_text_addrs(const char *, size_t, char,
-						  unsigned short, unsigned short);
-extern struct afs_addr_list *afs_dns_query(struct afs_cell *, time64_t *);
+extern struct afs_vlserver_list *afs_parse_text_addrs(struct afs_net *,
+						      const char *, size_t, char,
+						      unsigned short, unsigned short);
+extern struct afs_vlserver_list *afs_dns_query(struct afs_cell *, time64_t *);
 extern bool afs_iterate_addresses(struct afs_addr_cursor *);
 extern int afs_end_cursor(struct afs_addr_cursor *);
-extern int afs_set_vl_cursor(struct afs_addr_cursor *, struct afs_cell *);
 
 extern void afs_merge_fs_addr4(struct afs_addr_list *, __be32, u16);
 extern void afs_merge_fs_addr6(struct afs_addr_list *, __be32 *, u16);
@@ -1088,14 +1147,43 @@ extern void afs_fs_exit(void);
 /*
  * vlclient.c
  */
-extern struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *,
-							 struct afs_addr_cursor *,
-							 struct key *, const char *, int);
-extern struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *, struct afs_addr_cursor *,
-						struct key *, const uuid_t *);
+extern struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_vl_cursor *,
+							 const char *, int);
+extern struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *, const uuid_t *);
 extern int afs_vl_get_capabilities(struct afs_net *, struct afs_addr_cursor *, struct key *);
-extern struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *, struct afs_addr_cursor *,
-						     struct key *, const uuid_t *);
+extern struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *, const uuid_t *);
+
+/*
+ * vl_rotate.c
+ */
+extern bool afs_begin_vlserver_operation(struct afs_vl_cursor *,
+					 struct afs_cell *, struct key *);
+extern bool afs_select_vlserver(struct afs_vl_cursor *);
+extern bool afs_select_current_vlserver(struct afs_vl_cursor *);
+extern int afs_end_vlserver_operation(struct afs_vl_cursor *);
+
+/*
+ * vlserver_list.c
+ */
+static inline struct afs_vlserver *afs_get_vlserver(struct afs_vlserver *vlserver)
+{
+	atomic_inc(&vlserver->usage);
+	return vlserver;
+}
+
+static inline struct afs_vlserver_list *afs_get_vlserverlist(struct afs_vlserver_list *vllist)
+{
+	if (vllist)
+		atomic_inc(&vllist->usage);
+	return vllist;
+}
+
+extern struct afs_vlserver *afs_alloc_vlserver(const char *, size_t, unsigned short);
+extern void afs_put_vlserver(struct afs_net *, struct afs_vlserver *);
+extern struct afs_vlserver_list *afs_alloc_vlserver_list(unsigned int);
+extern void afs_put_vlserverlist(struct afs_net *, struct afs_vlserver_list *);
+extern struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *,
+							   const void *, size_t);
 
 /*
  * volume.c
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 9101f62707af2..6585f4bec0d37 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -17,6 +17,11 @@
 #include <linux/uaccess.h>
 #include "internal.h"
 
+struct afs_vl_seq_net_private {
+	struct seq_net_private		seq;	/* Must be first */
+	struct afs_vlserver_list	*vllist;
+};
+
 static inline struct afs_net *afs_seq2net(struct seq_file *m)
 {
 	return afs_net(seq_file_net(m));
@@ -247,61 +252,102 @@ static const struct seq_operations afs_proc_cell_volumes_ops = {
 	.show	= afs_proc_cell_volumes_show,
 };
 
+static const char *const dns_record_sources[NR__dns_record_source + 1] = {
+	[DNS_RECORD_UNAVAILABLE]	= "unav",
+	[DNS_RECORD_FROM_CONFIG]	= "cfg",
+	[DNS_RECORD_FROM_DNS_A]		= "A",
+	[DNS_RECORD_FROM_DNS_AFSDB]	= "AFSDB",
+	[DNS_RECORD_FROM_DNS_SRV]	= "SRV",
+	[DNS_RECORD_FROM_NSS]		= "nss",
+	[NR__dns_record_source]		= "[weird]"
+};
+
+static const char *const dns_lookup_statuses[NR__dns_lookup_status + 1] = {
+	[DNS_LOOKUP_NOT_DONE]		= "no-lookup",
+	[DNS_LOOKUP_GOOD]		= "good",
+	[DNS_LOOKUP_GOOD_WITH_BAD]	= "good/bad",
+	[DNS_LOOKUP_BAD]		= "bad",
+	[DNS_LOOKUP_GOT_NOT_FOUND]	= "not-found",
+	[DNS_LOOKUP_GOT_LOCAL_FAILURE]	= "local-failure",
+	[DNS_LOOKUP_GOT_TEMP_FAILURE]	= "temp-failure",
+	[DNS_LOOKUP_GOT_NS_FAILURE]	= "ns-failure",
+	[NR__dns_lookup_status]		= "[weird]"
+};
+
 /*
  * Display the list of Volume Location servers we're using for a cell.
  */
 static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v)
 {
-	struct sockaddr_rxrpc *addr = v;
+	const struct afs_vl_seq_net_private *priv = m->private;
+	const struct afs_vlserver_list *vllist = priv->vllist;
+	const struct afs_vlserver_entry *entry;
+	const struct afs_vlserver *vlserver;
+	const struct afs_addr_list *alist;
+	int i;
 
-	/* display header on line 1 */
-	if (v == (void *)1) {
-		seq_puts(m, "ADDRESS\n");
+	if (v == SEQ_START_TOKEN) {
+		seq_printf(m, "# source %s, status %s\n",
+			   dns_record_sources[vllist->source],
+			   dns_lookup_statuses[vllist->status]);
 		return 0;
 	}
 
-	/* display one cell per line on subsequent lines */
-	seq_printf(m, "%pISp\n", &addr->transport);
+	entry = v;
+	vlserver = entry->server;
+	alist = rcu_dereference(vlserver->addresses);
+
+	seq_printf(m, "%s [p=%hu w=%hu s=%s,%s]:\n",
+		   vlserver->name, entry->priority, entry->weight,
+		   dns_record_sources[alist ? alist->source : entry->source],
+		   dns_lookup_statuses[alist ? alist->status : entry->status]);
+	if (alist) {
+		for (i = 0; i < alist->nr_addrs; i++)
+			seq_printf(m, " %c %pISpc\n",
+				   alist->index == i ? '>' : '-',
+				   &alist->addrs[i].transport);
+	}
 	return 0;
 }
 
 static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos)
 	__acquires(rcu)
 {
-	struct afs_addr_list *alist;
+	struct afs_vl_seq_net_private *priv = m->private;
+	struct afs_vlserver_list *vllist;
 	struct afs_cell *cell = PDE_DATA(file_inode(m->file));
 	loff_t pos = *_pos;
 
 	rcu_read_lock();
 
-	alist = rcu_dereference(cell->vl_addrs);
+	vllist = rcu_dereference(cell->vl_servers);
+	priv->vllist = vllist;
 
-	/* allow for the header line */
-	if (!pos)
-		return (void *) 1;
-	pos--;
+	if (pos < 0)
+		*_pos = pos = 0;
+	if (pos == 0)
+		return SEQ_START_TOKEN;
 
-	if (!alist || pos >= alist->nr_addrs)
+	if (!vllist || pos - 1 >= vllist->nr_servers)
 		return NULL;
 
-	return alist->addrs + pos;
+	return &vllist->servers[pos - 1];
 }
 
 static void *afs_proc_cell_vlservers_next(struct seq_file *m, void *v,
 					  loff_t *_pos)
 {
-	struct afs_addr_list *alist;
-	struct afs_cell *cell = PDE_DATA(file_inode(m->file));
+	struct afs_vl_seq_net_private *priv = m->private;
+	struct afs_vlserver_list *vllist = priv->vllist;
 	loff_t pos;
 
-	alist = rcu_dereference(cell->vl_addrs);
-
 	pos = *_pos;
-	(*_pos)++;
-	if (!alist || pos >= alist->nr_addrs)
+	pos++;
+	*_pos = pos;
+	if (!vllist || pos - 1 >= vllist->nr_servers)
 		return NULL;
 
-	return alist->addrs + pos;
+	return &vllist->servers[pos - 1];
 }
 
 static void afs_proc_cell_vlservers_stop(struct seq_file *m, void *v)
@@ -562,7 +608,7 @@ int afs_proc_cell_setup(struct afs_cell *cell)
 
 	if (!proc_create_net_data("vlservers", 0444, dir,
 				  &afs_proc_cell_vlservers_ops,
-				  sizeof(struct seq_net_private),
+				  sizeof(struct afs_vl_seq_net_private),
 				  cell) ||
 	    !proc_create_net_data("volumes", 0444, dir,
 				  &afs_proc_cell_volumes_ops,
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 1d329e6981d51..6102ea9ee3fbf 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -246,41 +246,23 @@ static struct afs_server *afs_alloc_server(struct afs_net *net,
 static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell,
 						 struct key *key, const uuid_t *uuid)
 {
-	struct afs_addr_cursor ac;
-	struct afs_addr_list *alist;
+	struct afs_vl_cursor vc;
+	struct afs_addr_list *alist = NULL;
 	int ret;
 
-	ret = afs_set_vl_cursor(&ac, cell);
-	if (ret < 0)
-		return ERR_PTR(ret);
-
-	while (afs_iterate_addresses(&ac)) {
-		if (test_bit(ac.index, &ac.alist->yfs))
-			alist = afs_yfsvl_get_endpoints(cell->net, &ac, key, uuid);
-		else
-			alist = afs_vl_get_addrs_u(cell->net, &ac, key, uuid);
-		switch (ac.error) {
-		case 0:
-			afs_end_cursor(&ac);
-			return alist;
-		case -ECONNABORTED:
-			ac.error = afs_abort_to_error(ac.abort_code);
-			goto error;
-		case -ENOMEM:
-		case -ENONET:
-			goto error;
-		case -ENETUNREACH:
-		case -EHOSTUNREACH:
-		case -ECONNREFUSED:
-			break;
-		default:
-			ac.error = -EIO;
-			goto error;
+	ret = -ERESTARTSYS;
+	if (afs_begin_vlserver_operation(&vc, cell, key)) {
+		while (afs_select_vlserver(&vc)) {
+			if (test_bit(vc.ac.index, &vc.ac.alist->yfs))
+				alist = afs_yfsvl_get_endpoints(&vc, uuid);
+			else
+				alist = afs_vl_get_addrs_u(&vc, uuid);
 		}
+
+		ret = afs_end_vlserver_operation(&vc);
 	}
 
-error:
-	return ERR_PTR(afs_end_cursor(&ac));
+	return ret < 0 ? ERR_PTR(ret) : alist;
 }
 
 /*
diff --git a/fs/afs/vl_list.c b/fs/afs/vl_list.c
new file mode 100644
index 0000000000000..c1e316ba105a9
--- /dev/null
+++ b/fs/afs/vl_list.c
@@ -0,0 +1,336 @@
+/* AFS vlserver list management.
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include "internal.h"
+
+struct afs_vlserver *afs_alloc_vlserver(const char *name, size_t name_len,
+					unsigned short port)
+{
+	struct afs_vlserver *vlserver;
+
+	vlserver = kzalloc(struct_size(vlserver, name, name_len + 1),
+			   GFP_KERNEL);
+	if (vlserver) {
+		atomic_set(&vlserver->usage, 1);
+		rwlock_init(&vlserver->lock);
+		vlserver->name_len = name_len;
+		vlserver->port = port;
+		memcpy(vlserver->name, name, name_len);
+	}
+	return vlserver;
+}
+
+static void afs_vlserver_rcu(struct rcu_head *rcu)
+{
+	struct afs_vlserver *vlserver = container_of(rcu, struct afs_vlserver, rcu);
+
+	afs_put_addrlist(rcu_access_pointer(vlserver->addresses));
+	kfree_rcu(vlserver, rcu);
+}
+
+void afs_put_vlserver(struct afs_net *net, struct afs_vlserver *vlserver)
+{
+	if (vlserver) {
+		unsigned int u = atomic_dec_return(&vlserver->usage);
+		//_debug("VL PUT %p{%u}", vlserver, u);
+
+		if (u == 0)
+			call_rcu(&vlserver->rcu, afs_vlserver_rcu);
+	}
+}
+
+struct afs_vlserver_list *afs_alloc_vlserver_list(unsigned int nr_servers)
+{
+	struct afs_vlserver_list *vllist;
+
+	vllist = kzalloc(struct_size(vllist, servers, nr_servers), GFP_KERNEL);
+	if (vllist) {
+		atomic_set(&vllist->usage, 1);
+		rwlock_init(&vllist->lock);
+	}
+
+	return vllist;
+}
+
+void afs_put_vlserverlist(struct afs_net *net, struct afs_vlserver_list *vllist)
+{
+	if (vllist) {
+		unsigned int u = atomic_dec_return(&vllist->usage);
+
+		//_debug("VLLS PUT %p{%u}", vllist, u);
+		if (u == 0) {
+			int i;
+
+			for (i = 0; i < vllist->nr_servers; i++) {
+				afs_put_vlserver(net, vllist->servers[i].server);
+			}
+			kfree_rcu(vllist, rcu);
+		}
+	}
+}
+
+static u16 afs_extract_le16(const u8 **_b)
+{
+	u16 val;
+
+	val  = (u16)*(*_b)++ << 0;
+	val |= (u16)*(*_b)++ << 8;
+	return val;
+}
+
+/*
+ * Build a VL server address list from a DNS queried server list.
+ */
+static struct afs_addr_list *afs_extract_vl_addrs(const u8 **_b, const u8 *end,
+						  u8 nr_addrs, u16 port)
+{
+	struct afs_addr_list *alist;
+	const u8 *b = *_b;
+	int ret = -EINVAL;
+
+	alist = afs_alloc_addrlist(nr_addrs, VL_SERVICE, port);
+	if (!alist)
+		return ERR_PTR(-ENOMEM);
+	if (nr_addrs == 0)
+		return alist;
+
+	for (; nr_addrs > 0 && end - b >= nr_addrs; nr_addrs--) {
+		struct dns_server_list_v1_address hdr;
+		__be32 x[4];
+
+		hdr.address_type = *b++;
+
+		switch (hdr.address_type) {
+		case DNS_ADDRESS_IS_IPV4:
+			if (end - b < 4) {
+				_leave(" = -EINVAL [short inet]");
+				goto error;
+			}
+			memcpy(x, b, 4);
+			afs_merge_fs_addr4(alist, x[0], port);
+			b += 4;
+			break;
+
+		case DNS_ADDRESS_IS_IPV6:
+			if (end - b < 16) {
+				_leave(" = -EINVAL [short inet6]");
+				goto error;
+			}
+			memcpy(x, b, 16);
+			afs_merge_fs_addr6(alist, x, port);
+			b += 16;
+			break;
+
+		default:
+			_leave(" = -EADDRNOTAVAIL [unknown af %u]",
+			       hdr.address_type);
+			ret = -EADDRNOTAVAIL;
+			goto error;
+		}
+	}
+
+	/* Start with IPv6 if available. */
+	if (alist->nr_ipv4 < alist->nr_addrs)
+		alist->index = alist->nr_ipv4;
+
+	*_b = b;
+	return alist;
+
+error:
+	*_b = b;
+	afs_put_addrlist(alist);
+	return ERR_PTR(ret);
+}
+
+/*
+ * Build a VL server list from a DNS queried server list.
+ */
+struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *cell,
+						    const void *buffer,
+						    size_t buffer_size)
+{
+	const struct dns_server_list_v1_header *hdr = buffer;
+	struct dns_server_list_v1_server bs;
+	struct afs_vlserver_list *vllist, *previous;
+	struct afs_addr_list *addrs;
+	struct afs_vlserver *server;
+	const u8 *b = buffer, *end = buffer + buffer_size;
+	int ret = -ENOMEM, nr_servers, i, j;
+
+	_enter("");
+
+	/* Check that it's a server list, v1 */
+	if (end - b < sizeof(*hdr) ||
+	    hdr->hdr.content != DNS_PAYLOAD_IS_SERVER_LIST ||
+	    hdr->hdr.version != 1) {
+		pr_notice("kAFS: Got DNS record [%u,%u] len %zu\n",
+			  hdr->hdr.content, hdr->hdr.version, end - b);
+		ret = -EDESTADDRREQ;
+		goto dump;
+	}
+
+	nr_servers = hdr->nr_servers;
+
+	vllist = afs_alloc_vlserver_list(nr_servers);
+	if (!vllist)
+		return ERR_PTR(-ENOMEM);
+
+	vllist->source = (hdr->source < NR__dns_record_source) ?
+		hdr->source : NR__dns_record_source;
+	vllist->status = (hdr->status < NR__dns_lookup_status) ?
+		hdr->status : NR__dns_lookup_status;
+
+	read_lock(&cell->vl_servers_lock);
+	previous = afs_get_vlserverlist(
+		rcu_dereference_protected(cell->vl_servers,
+					  lockdep_is_held(&cell->vl_servers_lock)));
+	read_unlock(&cell->vl_servers_lock);
+
+	b += sizeof(*hdr);
+	while (end - b >= sizeof(bs)) {
+		bs.name_len	= afs_extract_le16(&b);
+		bs.priority	= afs_extract_le16(&b);
+		bs.weight	= afs_extract_le16(&b);
+		bs.port		= afs_extract_le16(&b);
+		bs.source	= *b++;
+		bs.status	= *b++;
+		bs.protocol	= *b++;
+		bs.nr_addrs	= *b++;
+
+		_debug("extract %u %u %u %u %u %u %*.*s",
+		       bs.name_len, bs.priority, bs.weight,
+		       bs.port, bs.protocol, bs.nr_addrs,
+		       bs.name_len, bs.name_len, b);
+
+		if (end - b < bs.name_len)
+			break;
+
+		ret = -EPROTONOSUPPORT;
+		if (bs.protocol == DNS_SERVER_PROTOCOL_UNSPECIFIED) {
+			bs.protocol = DNS_SERVER_PROTOCOL_UDP;
+		} else if (bs.protocol != DNS_SERVER_PROTOCOL_UDP) {
+			_leave(" = [proto %u]", bs.protocol);
+			goto error;
+		}
+
+		if (bs.port == 0)
+			bs.port = AFS_VL_PORT;
+		if (bs.source > NR__dns_record_source)
+			bs.source = NR__dns_record_source;
+		if (bs.status > NR__dns_lookup_status)
+			bs.status = NR__dns_lookup_status;
+
+		server = NULL;
+		if (previous) {
+			/* See if we can update an old server record */
+			for (i = 0; i < previous->nr_servers; i++) {
+				struct afs_vlserver *p = previous->servers[i].server;
+
+				if (p->name_len == bs.name_len &&
+				    p->port == bs.port &&
+				    strncasecmp(b, p->name, bs.name_len) == 0) {
+					server = afs_get_vlserver(p);
+					break;
+				}
+			}
+		}
+
+		if (!server) {
+			ret = -ENOMEM;
+			server = afs_alloc_vlserver(b, bs.name_len, bs.port);
+			if (!server)
+				goto error;
+		}
+
+		b += bs.name_len;
+
+		/* Extract the addresses - note that we can't skip this as we
+		 * have to advance the payload pointer.
+		 */
+		addrs = afs_extract_vl_addrs(&b, end, bs.nr_addrs, bs.port);
+		if (IS_ERR(addrs)) {
+			ret = PTR_ERR(addrs);
+			goto error_2;
+		}
+
+		if (vllist->nr_servers >= nr_servers) {
+			_debug("skip %u >= %u", vllist->nr_servers, nr_servers);
+			afs_put_addrlist(addrs);
+			afs_put_vlserver(cell->net, server);
+			continue;
+		}
+
+		addrs->source = bs.source;
+		addrs->status = bs.status;
+
+		if (addrs->nr_addrs == 0) {
+			afs_put_addrlist(addrs);
+			if (!rcu_access_pointer(server->addresses)) {
+				afs_put_vlserver(cell->net, server);
+				continue;
+			}
+		} else {
+			struct afs_addr_list *old = addrs;
+
+			write_lock(&server->lock);
+			rcu_swap_protected(server->addresses, old,
+					   lockdep_is_held(&server->lock));
+			write_unlock(&server->lock);
+			afs_put_addrlist(old);
+		}
+
+
+		/* TODO: Might want to check for duplicates */
+
+		/* Insertion-sort by priority and weight */
+		for (j = 0; j < vllist->nr_servers; j++) {
+			if (bs.priority < vllist->servers[j].priority)
+				break; /* Lower preferable */
+			if (bs.priority == vllist->servers[j].priority &&
+			    bs.weight > vllist->servers[j].weight)
+				break; /* Higher preferable */
+		}
+
+		if (j < vllist->nr_servers) {
+			memmove(vllist->servers + j + 1,
+				vllist->servers + j,
+				(vllist->nr_servers - j) * sizeof(struct afs_vlserver_entry));
+		}
+
+		vllist->servers[j].priority = bs.priority;
+		vllist->servers[j].weight = bs.weight;
+		vllist->servers[j].server = server;
+		vllist->nr_servers++;
+	}
+
+	if (b != end) {
+		_debug("parse error %zd", b - end);
+		goto error;
+	}
+
+	afs_put_vlserverlist(cell->net, previous);
+	_leave(" = ok [%u]", vllist->nr_servers);
+	return vllist;
+
+error_2:
+	afs_put_vlserver(cell->net, server);
+error:
+	afs_put_vlserverlist(cell->net, vllist);
+	afs_put_vlserverlist(cell->net, previous);
+dump:
+	if (ret != -ENOMEM) {
+		printk(KERN_DEBUG "DNS: at %zu\n", (const void *)b - buffer);
+		print_hex_dump_bytes("DNS: ", DUMP_PREFIX_NONE, buffer, buffer_size);
+	}
+	return ERR_PTR(ret);
+}
diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c
new file mode 100644
index 0000000000000..44a936ad9c7aa
--- /dev/null
+++ b/fs/afs/vl_rotate.c
@@ -0,0 +1,251 @@
+/* Handle vlserver selection and rotation.
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/sched/signal.h>
+#include "internal.h"
+#include "afs_vl.h"
+
+/*
+ * Begin an operation on a volume location server.
+ */
+bool afs_begin_vlserver_operation(struct afs_vl_cursor *vc, struct afs_cell *cell,
+				  struct key *key)
+{
+	memset(vc, 0, sizeof(*vc));
+	vc->cell = cell;
+	vc->key = key;
+	vc->error = -EDESTADDRREQ;
+	vc->ac.error = SHRT_MAX;
+
+	if (signal_pending(current)) {
+		vc->error = -EINTR;
+		vc->flags |= AFS_VL_CURSOR_STOP;
+		return false;
+	}
+
+	return true;
+}
+
+/*
+ * Begin iteration through a server list, starting with the last used server if
+ * possible, or the last recorded good server if not.
+ */
+static bool afs_start_vl_iteration(struct afs_vl_cursor *vc)
+{
+	struct afs_cell *cell = vc->cell;
+
+	if (wait_on_bit(&cell->flags, AFS_CELL_FL_NO_LOOKUP_YET,
+			TASK_INTERRUPTIBLE)) {
+		vc->error = -ERESTARTSYS;
+		return false;
+	}
+
+	read_lock(&cell->vl_servers_lock);
+	vc->server_list = afs_get_vlserverlist(
+		rcu_dereference_protected(cell->vl_servers,
+					  lockdep_is_held(&cell->vl_servers_lock)));
+	read_unlock(&cell->vl_servers_lock);
+	if (!vc->server_list || !vc->server_list->nr_servers)
+		return false;
+
+	vc->start = READ_ONCE(vc->server_list->index);
+	vc->index = vc->start;
+	return true;
+}
+
+/*
+ * Select the vlserver to use.  May be called multiple times to rotate
+ * through the vlservers.
+ */
+bool afs_select_vlserver(struct afs_vl_cursor *vc)
+{
+	struct afs_addr_list *alist;
+	struct afs_vlserver *vlserver;
+	int error = vc->ac.error;
+
+	_enter("%u/%u,%u/%u,%d,%d",
+	       vc->index, vc->start,
+	       vc->ac.index, vc->ac.start,
+	       error, vc->ac.abort_code);
+
+	if (vc->flags & AFS_VL_CURSOR_STOP) {
+		_leave(" = f [stopped]");
+		return false;
+	}
+
+	/* Evaluate the result of the previous operation, if there was one. */
+	switch (error) {
+	case SHRT_MAX:
+		goto start;
+
+	default:
+	case 0:
+		/* Success or local failure.  Stop. */
+		vc->error = error;
+		vc->flags |= AFS_VL_CURSOR_STOP;
+		_leave(" = f [okay/local %d]", vc->ac.error);
+		return false;
+
+	case -ECONNABORTED:
+		/* The far side rejected the operation on some grounds.  This
+		 * might involve the server being busy or the volume having been moved.
+		 */
+		switch (vc->ac.abort_code) {
+		case AFSVL_IO:
+		case AFSVL_BADVOLOPER:
+		case AFSVL_NOMEM:
+			/* The server went weird. */
+			vc->error = -EREMOTEIO;
+			//write_lock(&vc->cell->vl_servers_lock);
+			//vc->server_list->weird_mask |= 1 << vc->index;
+			//write_unlock(&vc->cell->vl_servers_lock);
+			goto next_server;
+
+		default:
+			vc->error = afs_abort_to_error(vc->ac.abort_code);
+			goto failed;
+		}
+
+	case -ENETUNREACH:
+	case -EHOSTUNREACH:
+	case -ECONNREFUSED:
+	case -ETIMEDOUT:
+	case -ETIME:
+		_debug("no conn %d", error);
+		vc->error = error;
+		goto iterate_address;
+
+	case -ECONNRESET:
+		_debug("call reset");
+		vc->error = error;
+		vc->flags |= AFS_VL_CURSOR_RETRY;
+		goto next_server;
+	}
+
+restart_from_beginning:
+	_debug("restart");
+	afs_end_cursor(&vc->ac);
+	afs_put_vlserverlist(vc->cell->net, vc->server_list);
+	vc->server_list = NULL;
+	if (vc->flags & AFS_VL_CURSOR_RETRIED)
+		goto failed;
+	vc->flags |= AFS_VL_CURSOR_RETRIED;
+start:
+	_debug("start");
+
+	/* TODO: Consider checking the VL server list */
+
+	if (!afs_start_vl_iteration(vc))
+		goto failed;
+
+use_server:
+	_debug("use");
+	/* We're starting on a different vlserver from the list.  We need to
+	 * check it, find its address list and probe its capabilities before we
+	 * use it.
+	 */
+	ASSERTCMP(vc->ac.alist, ==, NULL);
+	vlserver = vc->server_list->servers[vc->index].server;
+
+	// TODO: Check the vlserver occasionally
+	//if (!afs_check_vlserver_record(vc, vlserver))
+	//	goto failed;
+
+	_debug("USING VLSERVER: %s", vlserver->name);
+
+	read_lock(&vlserver->lock);
+	alist = rcu_dereference_protected(vlserver->addresses,
+					  lockdep_is_held(&vlserver->lock));
+	afs_get_addrlist(alist);
+	read_unlock(&vlserver->lock);
+
+	memset(&vc->ac, 0, sizeof(vc->ac));
+
+	/* Probe the current vlserver if we haven't done so yet. */
+#if 0 // TODO
+	if (!test_bit(AFS_VLSERVER_FL_PROBED, &vlserver->flags)) {
+		vc->ac.alist = afs_get_addrlist(alist);
+
+		if (!afs_probe_vlserver(vc)) {
+			error = vc->ac.error;
+			switch (error) {
+			case -ENOMEM:
+			case -ERESTARTSYS:
+			case -EINTR:
+				goto failed_set_error;
+			default:
+				goto next_server;
+			}
+		}
+	}
+#endif
+
+	if (!vc->ac.alist)
+		vc->ac.alist = alist;
+	else
+		afs_put_addrlist(alist);
+
+	vc->ac.start = READ_ONCE(alist->index);
+	vc->ac.index = vc->ac.start;
+
+iterate_address:
+	ASSERT(vc->ac.alist);
+	_debug("iterate %d/%d", vc->ac.index, vc->ac.alist->nr_addrs);
+	/* Iterate over the current server's address list to try and find an
+	 * address on which it will respond to us.
+	 */
+	if (!afs_iterate_addresses(&vc->ac))
+		goto next_server;
+
+	_leave(" = t %pISpc", &vc->ac.addr->transport);
+	return true;
+
+next_server:
+	_debug("next");
+	afs_end_cursor(&vc->ac);
+	vc->index++;
+	if (vc->index >= vc->server_list->nr_servers)
+		vc->index = 0;
+	if (vc->index != vc->start)
+		goto use_server;
+
+	/* That's all the servers poked to no good effect.  Try again if some
+	 * of them were busy.
+	 */
+	if (vc->flags & AFS_VL_CURSOR_RETRY)
+		goto restart_from_beginning;
+
+	goto failed;
+
+failed:
+	vc->flags |= AFS_VL_CURSOR_STOP;
+	afs_end_cursor(&vc->ac);
+	_leave(" = f [failed %d]", vc->error);
+	return false;
+}
+
+/*
+ * Tidy up a volume location server cursor and unlock the vnode.
+ */
+int afs_end_vlserver_operation(struct afs_vl_cursor *vc)
+{
+	struct afs_net *net = vc->cell->net;
+
+	afs_end_cursor(&vc->ac);
+	afs_put_vlserverlist(net, vc->server_list);
+
+	if (vc->error == -ECONNABORTED)
+		vc->error = afs_abort_to_error(vc->ac.abort_code);
+
+	return vc->error;
+}
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index e18c51742daaa..3127ab9b55210 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -128,14 +128,13 @@ static const struct afs_call_type afs_RXVLGetEntryByNameU = {
  * Dispatch a get volume entry by name or ID operation (uuid variant).  If the
  * volname is a decimal number then it's a volume ID not a volume name.
  */
-struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *net,
-						  struct afs_addr_cursor *ac,
-						  struct key *key,
+struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_vl_cursor *vc,
 						  const char *volname,
 						  int volnamesz)
 {
 	struct afs_vldb_entry *entry;
 	struct afs_call *call;
+	struct afs_net *net = vc->cell->net;
 	size_t reqsz, padsz;
 	__be32 *bp;
 
@@ -155,7 +154,7 @@ struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *net,
 		return ERR_PTR(-ENOMEM);
 	}
 
-	call->key = key;
+	call->key = vc->key;
 	call->reply[0] = entry;
 	call->ret_reply0 = true;
 
@@ -168,7 +167,7 @@ struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_net *net,
 		memset((void *)bp + volnamesz, 0, padsz);
 
 	trace_afs_make_vl_call(call);
-	return (struct afs_vldb_entry *)afs_make_call(ac, call, GFP_KERNEL, false);
+	return (struct afs_vldb_entry *)afs_make_call(&vc->ac, call, GFP_KERNEL, false);
 }
 
 /*
@@ -266,14 +265,13 @@ static const struct afs_call_type afs_RXVLGetAddrsU = {
  * Dispatch an operation to get the addresses for a server, where the server is
  * nominated by UUID.
  */
-struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *net,
-					 struct afs_addr_cursor *ac,
-					 struct key *key,
+struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *vc,
 					 const uuid_t *uuid)
 {
 	struct afs_ListAddrByAttributes__xdr *r;
 	const struct afs_uuid *u = (const struct afs_uuid *)uuid;
 	struct afs_call *call;
+	struct afs_net *net = vc->cell->net;
 	__be32 *bp;
 	int i;
 
@@ -285,7 +283,7 @@ struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *net,
 	if (!call)
 		return ERR_PTR(-ENOMEM);
 
-	call->key = key;
+	call->key = vc->key;
 	call->reply[0] = NULL;
 	call->ret_reply0 = true;
 
@@ -306,7 +304,7 @@ struct afs_addr_list *afs_vl_get_addrs_u(struct afs_net *net,
 		r->uuid.node[i] = htonl(u->node[i]);
 
 	trace_afs_make_vl_call(call);
-	return (struct afs_addr_list *)afs_make_call(ac, call, GFP_KERNEL, false);
+	return (struct afs_addr_list *)afs_make_call(&vc->ac, call, GFP_KERNEL, false);
 }
 
 /*
@@ -367,14 +365,13 @@ static const struct afs_call_type afs_RXVLGetCapabilities = {
 };
 
 /*
- * Probe a fileserver for the capabilities that it supports.  This can
+ * Probe a volume server for the capabilities that it supports.  This can
  * return up to 196 words.
  *
  * We use this to probe for service upgrade to determine what the server at the
  * other end supports.
  */
-int afs_vl_get_capabilities(struct afs_net *net,
-			    struct afs_addr_cursor *ac,
+int afs_vl_get_capabilities(struct afs_net *net, struct afs_addr_cursor *ac,
 			    struct key *key)
 {
 	struct afs_call *call;
@@ -617,12 +614,11 @@ static const struct afs_call_type afs_YFSVLGetEndpoints = {
  * Dispatch an operation to get the addresses for a server, where the server is
  * nominated by UUID.
  */
-struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *net,
-					      struct afs_addr_cursor *ac,
-					      struct key *key,
+struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *vc,
 					      const uuid_t *uuid)
 {
 	struct afs_call *call;
+	struct afs_net *net = vc->cell->net;
 	__be32 *bp;
 
 	_enter("");
@@ -633,7 +629,7 @@ struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *net,
 	if (!call)
 		return ERR_PTR(-ENOMEM);
 
-	call->key = key;
+	call->key = vc->key;
 	call->reply[0] = NULL;
 	call->ret_reply0 = true;
 
@@ -644,5 +640,5 @@ struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_net *net,
 	memcpy(bp, uuid, sizeof(*uuid)); /* Type opr_uuid */
 
 	trace_afs_make_vl_call(call);
-	return (struct afs_addr_list *)afs_make_call(ac, call, GFP_KERNEL, false);
+	return (struct afs_addr_list *)afs_make_call(&vc->ac, call, GFP_KERNEL, false);
 }
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index 3037bd01f617d..1cd263fa60282 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -74,55 +74,35 @@ static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell,
 						 const char *volname,
 						 size_t volnamesz)
 {
-	struct afs_addr_cursor ac;
-	struct afs_vldb_entry *vldb;
+	struct afs_vldb_entry *vldb = ERR_PTR(-EDESTADDRREQ);
+	struct afs_vl_cursor vc;
 	int ret;
 
-	ret = afs_set_vl_cursor(&ac, cell);
-	if (ret < 0)
-		return ERR_PTR(ret);
+	if (!afs_begin_vlserver_operation(&vc, cell, key))
+		return ERR_PTR(-ERESTARTSYS);
 
-	while (afs_iterate_addresses(&ac)) {
-		if (!test_bit(ac.index, &ac.alist->probed)) {
-			ret = afs_vl_get_capabilities(cell->net, &ac, key);
+	while (afs_select_vlserver(&vc)) {
+		if (!test_bit(vc.ac.index, &vc.ac.alist->probed)) {
+			ret = afs_vl_get_capabilities(cell->net, &vc.ac, key);
 			switch (ret) {
 			case VL_SERVICE:
-				clear_bit(ac.index, &ac.alist->yfs);
-				set_bit(ac.index, &ac.alist->probed);
-				ac.addr->srx_service = ret;
+				clear_bit(vc.ac.index, &vc.ac.alist->yfs);
+				set_bit(vc.ac.index, &vc.ac.alist->probed);
+				vc.ac.addr->srx_service = ret;
 				break;
 			case YFS_VL_SERVICE:
-				set_bit(ac.index, &ac.alist->yfs);
-				set_bit(ac.index, &ac.alist->probed);
-				ac.addr->srx_service = ret;
+				set_bit(vc.ac.index, &vc.ac.alist->yfs);
+				set_bit(vc.ac.index, &vc.ac.alist->probed);
+				vc.ac.addr->srx_service = ret;
 				break;
 			}
 		}
 		
-		vldb = afs_vl_get_entry_by_name_u(cell->net, &ac, key,
-						  volname, volnamesz);
-		switch (ac.error) {
-		case 0:
-			afs_end_cursor(&ac);
-			return vldb;
-		case -ECONNABORTED:
-			ac.error = afs_abort_to_error(ac.abort_code);
-			goto error;
-		case -ENOMEM:
-		case -ENONET:
-			goto error;
-		case -ENETUNREACH:
-		case -EHOSTUNREACH:
-		case -ECONNREFUSED:
-			break;
-		default:
-			ac.error = -EIO;
-			goto error;
-		}
+		vldb = afs_vl_get_entry_by_name_u(&vc, volname, volnamesz);
 	}
 
-error:
-	return ERR_PTR(afs_end_cursor(&ac));
+	ret = afs_end_vlserver_operation(&vc);
+	return ret < 0 ? ERR_PTR(ret) : vldb;
 }
 
 /*
-- 
GitLab


From ded2f4c58ac24083c536aa7d2ff2b73752a88612 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 20 Oct 2018 00:57:57 +0100
Subject: [PATCH 0151/1890] afs: Fix TTL on VL server and address lists

Currently the TTL on VL server and address lists isn't set in all
circumstances and may be set to poor choices in others, since the TTL is
derived from the SRV/AFSDB DNS record if and when available.

Fix the TTL by limiting the range to a minimum and maximum from the current
time.  At some point these can be made into sysctl knobs.  Further, use the
TTL we obtained from the upcall to set the expiry on negative results too;
in future a mechanism can be added to force reloading of such data.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/cell.c | 26 ++++++++++++++++++++++----
 fs/afs/proc.c | 14 +++++++++++---
 2 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index 963b6fa51fdf8..cf445dbd5f2e0 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -20,6 +20,8 @@
 #include "internal.h"
 
 static unsigned __read_mostly afs_cell_gc_delay = 10;
+static unsigned __read_mostly afs_cell_min_ttl = 10 * 60;
+static unsigned __read_mostly afs_cell_max_ttl = 24 * 60 * 60;
 
 static void afs_manage_cell(struct work_struct *);
 
@@ -171,6 +173,8 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net,
 
 		rcu_assign_pointer(cell->vl_servers, vllist);
 		cell->dns_expiry = TIME64_MAX;
+	} else {
+		cell->dns_expiry = ktime_get_real_seconds();
 	}
 
 	_leave(" = %p", cell);
@@ -358,25 +362,39 @@ int afs_cell_init(struct afs_net *net, const char *rootcell)
 static void afs_update_cell(struct afs_cell *cell)
 {
 	struct afs_vlserver_list *vllist, *old;
-	time64_t now, expiry;
+	unsigned int min_ttl = READ_ONCE(afs_cell_min_ttl);
+	unsigned int max_ttl = READ_ONCE(afs_cell_max_ttl);
+	time64_t now, expiry = 0;
 
 	_enter("%s", cell->name);
 
 	vllist = afs_dns_query(cell, &expiry);
+
+	now = ktime_get_real_seconds();
+	if (min_ttl > max_ttl)
+		max_ttl = min_ttl;
+	if (expiry < now + min_ttl)
+		expiry = now + min_ttl;
+	else if (expiry > now + max_ttl)
+		expiry = now + max_ttl;
+
 	if (IS_ERR(vllist)) {
 		switch (PTR_ERR(vllist)) {
 		case -ENODATA:
-			/* The DNS said that the cell does not exist */
+		case -EDESTADDRREQ:
+			/* The DNS said that the cell does not exist or there
+			 * weren't any addresses to be had.
+			 */
 			set_bit(AFS_CELL_FL_NOT_FOUND, &cell->flags);
 			clear_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags);
-			cell->dns_expiry = ktime_get_real_seconds() + 61;
+			cell->dns_expiry = expiry;
 			break;
 
 		case -EAGAIN:
 		case -ECONNREFUSED:
 		default:
 			set_bit(AFS_CELL_FL_DNS_FAIL, &cell->flags);
-			cell->dns_expiry = ktime_get_real_seconds() + 10;
+			cell->dns_expiry = now + 10;
 			break;
 		}
 
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 6585f4bec0d37..fc36c41641ab1 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -37,16 +37,24 @@ static inline struct afs_net *afs_seq2net_single(struct seq_file *m)
  */
 static int afs_proc_cells_show(struct seq_file *m, void *v)
 {
-	struct afs_cell *cell = list_entry(v, struct afs_cell, proc_link);
+	struct afs_vlserver_list *vllist;
+	struct afs_cell *cell;
 
 	if (v == SEQ_START_TOKEN) {
 		/* display header on line 1 */
-		seq_puts(m, "USE NAME\n");
+		seq_puts(m, "USE    TTL SV NAME\n");
 		return 0;
 	}
 
+	cell = list_entry(v, struct afs_cell, proc_link);
+	vllist = rcu_dereference(cell->vl_servers);
+
 	/* display one cell per line on subsequent lines */
-	seq_printf(m, "%3u %s\n", atomic_read(&cell->usage), cell->name);
+	seq_printf(m, "%3u %6lld %2u %s\n",
+		   atomic_read(&cell->usage),
+		   cell->dns_expiry - ktime_get_real_seconds(),
+		   vllist ? vllist->nr_servers : 0,
+		   cell->name);
 	return 0;
 }
 
-- 
GitLab


From 4ac15ea53622272c01954461b4814892b7481b40 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 20 Oct 2018 00:57:57 +0100
Subject: [PATCH 0152/1890] afs: Handle EIO from delivery function

Fix afs_deliver_to_call() to handle -EIO being returned by the operation
delivery function, indicating that the call found itself in the wrong
state, by printing an error and aborting the call.

Currently, an assertion failure will occur.  This can happen, say, if the
delivery function falls off the end without calling afs_extract_data() with
the want_more parameter set to false to collect the end of the Rx phase of
a call.

The assertion failure looks like:

	AFS: Assertion failed
	4 == 7 is false
	0x4 == 0x7 is false
	------------[ cut here ]------------
	kernel BUG at fs/afs/rxrpc.c:462!

and is matched in the trace buffer by a line like:

kworker/7:3-3226 [007] ...1 85158.030203: afs_io_error: c=0003be0c r=-5 CM_REPLY

Fixes: 98bf40cd99fc ("afs: Protect call->state changes against signals")
Reported-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/rxrpc.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index a3904a8315de6..947ae3ab389bb 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -499,7 +499,6 @@ static void afs_deliver_to_call(struct afs_call *call)
 		case -EINPROGRESS:
 		case -EAGAIN:
 			goto out;
-		case -EIO:
 		case -ECONNABORTED:
 			ASSERTCMP(state, ==, AFS_CALL_COMPLETE);
 			goto done;
@@ -508,6 +507,10 @@ static void afs_deliver_to_call(struct afs_call *call)
 			rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
 						abort_code, ret, "KIV");
 			goto local_abort;
+		case -EIO:
+			pr_err("kAFS: Call %u in bad state %u\n",
+			       call->debug_id, state);
+			/* Fall through */
 		case -ENODATA:
 		case -EBADMSG:
 		case -EMSGSIZE:
-- 
GitLab


From f51375cd9e1ad75e9e38186aa0d3749ade7d52a5 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 20 Oct 2018 00:57:57 +0100
Subject: [PATCH 0153/1890] afs: Add a couple of tracepoints to log I/O errors

Add a couple of tracepoints to log the production of I/O errors within the AFS
filesystem.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/cmservice.c         | 10 ++---
 fs/afs/dir.c               | 18 +++++----
 fs/afs/internal.h          | 11 ++++++
 fs/afs/mntpt.c             |  5 ++-
 fs/afs/rxrpc.c             |  2 +-
 fs/afs/server.c            |  2 +-
 fs/afs/write.c             |  1 +
 include/trace/events/afs.h | 81 ++++++++++++++++++++++++++++++++++++++
 8 files changed, 114 insertions(+), 16 deletions(-)

diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 4db62ae8dc1a6..186f621f87221 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -260,7 +260,7 @@ static int afs_deliver_cb_callback(struct afs_call *call)
 	}
 
 	if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
-		return -EIO;
+		return afs_io_error(call, afs_io_error_cm_reply);
 
 	/* we'll need the file server record as that tells us which set of
 	 * vnodes to operate upon */
@@ -368,7 +368,7 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
 	}
 
 	if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
-		return -EIO;
+		return afs_io_error(call, afs_io_error_cm_reply);
 
 	/* we'll need the file server record as that tells us which set of
 	 * vnodes to operate upon */
@@ -409,7 +409,7 @@ static int afs_deliver_cb_probe(struct afs_call *call)
 		return ret;
 
 	if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
-		return -EIO;
+		return afs_io_error(call, afs_io_error_cm_reply);
 
 	return afs_queue_call_work(call);
 }
@@ -490,7 +490,7 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call)
 	}
 
 	if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
-		return -EIO;
+		return afs_io_error(call, afs_io_error_cm_reply);
 
 	return afs_queue_call_work(call);
 }
@@ -573,7 +573,7 @@ static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call)
 		return ret;
 
 	if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
-		return -EIO;
+		return afs_io_error(call, afs_io_error_cm_reply);
 
 	return afs_queue_call_work(call);
 }
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 855bf2b79fed4..78f9754fd03d9 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -138,6 +138,7 @@ static bool afs_dir_check_page(struct afs_vnode *dvnode, struct page *page,
 			       ntohs(dbuf->blocks[tmp].hdr.magic));
 			trace_afs_dir_check_failed(dvnode, off, i_size);
 			kunmap(page);
+			trace_afs_file_error(dvnode, -EIO, afs_file_error_dir_bad_magic);
 			goto error;
 		}
 
@@ -190,9 +191,11 @@ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
 retry:
 	i_size = i_size_read(&dvnode->vfs_inode);
 	if (i_size < 2048)
-		return ERR_PTR(-EIO);
-	if (i_size > 2048 * 1024)
+		return ERR_PTR(afs_bad(dvnode, afs_file_error_dir_small));
+	if (i_size > 2048 * 1024) {
+		trace_afs_file_error(dvnode, -EFBIG, afs_file_error_dir_big);
 		return ERR_PTR(-EFBIG);
+	}
 
 	_enter("%llu", i_size);
 
@@ -315,7 +318,8 @@ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
 /*
  * deal with one block in an AFS directory
  */
-static int afs_dir_iterate_block(struct dir_context *ctx,
+static int afs_dir_iterate_block(struct afs_vnode *dvnode,
+				 struct dir_context *ctx,
 				 union afs_xdr_dir_block *block,
 				 unsigned blkoff)
 {
@@ -365,7 +369,7 @@ static int afs_dir_iterate_block(struct dir_context *ctx,
 				       " (len %u/%zu)",
 				       blkoff / sizeof(union afs_xdr_dir_block),
 				       offset, next, tmp, nlen);
-				return -EIO;
+				return afs_bad(dvnode, afs_file_error_dir_over_end);
 			}
 			if (!(block->hdr.bitmap[next / 8] &
 			      (1 << (next % 8)))) {
@@ -373,7 +377,7 @@ static int afs_dir_iterate_block(struct dir_context *ctx,
 				       " %u unmarked extension (len %u/%zu)",
 				       blkoff / sizeof(union afs_xdr_dir_block),
 				       offset, next, tmp, nlen);
-				return -EIO;
+				return afs_bad(dvnode, afs_file_error_dir_unmarked_ext);
 			}
 
 			_debug("ENT[%zu.%u]: ext %u/%zu",
@@ -442,7 +446,7 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
 		 */
 		page = req->pages[blkoff / PAGE_SIZE];
 		if (!page) {
-			ret = -EIO;
+			ret = afs_bad(dvnode, afs_file_error_dir_missing_page);
 			break;
 		}
 		mark_page_accessed(page);
@@ -455,7 +459,7 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
 		do {
 			dblock = &dbuf->blocks[(blkoff % PAGE_SIZE) /
 					       sizeof(union afs_xdr_dir_block)];
-			ret = afs_dir_iterate_block(ctx, dblock, blkoff);
+			ret = afs_dir_iterate_block(dvnode, ctx, dblock, blkoff);
 			if (ret != 1) {
 				kunmap(page);
 				goto out;
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 7e264cb9b4f7e..28d08aac515da 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -1257,6 +1257,17 @@ static inline void afs_check_for_remote_deletion(struct afs_fs_cursor *fc,
 	}
 }
 
+static inline int afs_io_error(struct afs_call *call, enum afs_io_error where)
+{
+	trace_afs_io_error(call->debug_id, -EIO, where);
+	return -EIO;
+}
+
+static inline int afs_bad(struct afs_vnode *vnode, enum afs_file_error where)
+{
+	trace_afs_file_error(vnode, -EIO, where);
+	return -EIO;
+}
 
 /*****************************************************************************/
 /*
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 99fd13500a97f..2e51c6994148f 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -130,9 +130,10 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
 			goto error_no_page;
 		}
 
-		ret = -EIO;
-		if (PageError(page))
+		if (PageError(page)) {
+			ret = afs_bad(AFS_FS_I(d_inode(mntpt)), afs_file_error_mntpt);
 			goto error;
+		}
 
 		buf = kmap_atomic(page);
 		memcpy(devname, buf, size);
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 947ae3ab389bb..e6ab824ab7613 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -915,7 +915,7 @@ int afs_extract_data(struct afs_call *call, bool want_more)
 			break;
 		case AFS_CALL_COMPLETE:
 			kdebug("prem complete %d", call->error);
-			return -EIO;
+			return afs_io_error(call, afs_io_error_extract);
 		default:
 			break;
 		}
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 6102ea9ee3fbf..1a087eb8f2d72 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -542,7 +542,7 @@ static bool afs_do_probe_fileserver(struct afs_fs_cursor *fc)
 		case -ETIME:
 			break;
 		default:
-			fc->ac.error = -EIO;
+			fc->ac.error = afs_io_error(NULL, afs_io_error_fs_probe_fail);
 			goto error;
 		}
 	}
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 19c04caf3c012..fdb9d6024126e 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -533,6 +533,7 @@ static int afs_write_back_from_locked_page(struct address_space *mapping,
 	case -ENOENT:
 	case -ENOMEDIUM:
 	case -ENXIO:
+		trace_afs_file_error(vnode, ret, afs_file_error_writeback_fail);
 		afs_kill_pages(mapping, first, last);
 		mapping_set_error(mapping, ret);
 		break;
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index 5e0f8dcede261..48b20a261d395 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -103,6 +103,24 @@ enum afs_eproto_cause {
 	afs_eproto_yvl_vlendpt_type,
 };
 
+enum afs_io_error {
+	afs_io_error_cm_reply,
+	afs_io_error_extract,
+	afs_io_error_fs_probe_fail,
+	afs_io_error_vl_lookup_fail,
+};
+
+enum afs_file_error {
+	afs_file_error_dir_bad_magic,
+	afs_file_error_dir_big,
+	afs_file_error_dir_missing_page,
+	afs_file_error_dir_over_end,
+	afs_file_error_dir_small,
+	afs_file_error_dir_unmarked_ext,
+	afs_file_error_mntpt,
+	afs_file_error_writeback_fail,
+};
+
 #endif /* end __AFS_DECLARE_TRACE_ENUMS_ONCE_ONLY */
 
 /*
@@ -183,6 +201,21 @@ enum afs_eproto_cause {
 	EM(afs_eproto_yvl_vlendpt6_len,	"YVL.VlEnd6Len") \
 	E_(afs_eproto_yvl_vlendpt_type,	"YVL.VlEndType")
 
+#define afs_io_errors							\
+	EM(afs_io_error_cm_reply,		"CM_REPLY")		\
+	EM(afs_io_error_extract,		"EXTRACT")		\
+	EM(afs_io_error_fs_probe_fail,		"FS_PROBE_FAIL")	\
+	E_(afs_io_error_vl_lookup_fail,		"VL_LOOKUP_FAIL")
+
+#define afs_file_errors							\
+	EM(afs_file_error_dir_bad_magic,	"DIR_BAD_MAGIC")	\
+	EM(afs_file_error_dir_big,		"DIR_BIG")		\
+	EM(afs_file_error_dir_missing_page,	"DIR_MISSING_PAGE")	\
+	EM(afs_file_error_dir_over_end,		"DIR_ENT_OVER_END")	\
+	EM(afs_file_error_dir_small,		"DIR_SMALL")		\
+	EM(afs_file_error_dir_unmarked_ext,	"DIR_UNMARKED_EXT")	\
+	EM(afs_file_error_mntpt,		"MNTPT_READ_FAILED")	\
+	E_(afs_file_error_writeback_fail,	"WRITEBACK_FAILED")
 
 /*
  * Export enum symbols via userspace.
@@ -197,6 +230,9 @@ afs_fs_operations;
 afs_vl_operations;
 afs_edit_dir_ops;
 afs_edit_dir_reasons;
+afs_eproto_causes;
+afs_io_errors;
+afs_file_errors;
 
 /*
  * Now redefine the EM() and E_() macros to map the enums to the strings that
@@ -613,6 +649,51 @@ TRACE_EVENT(afs_protocol_error,
 		      __print_symbolic(__entry->cause, afs_eproto_causes))
 	    );
 
+TRACE_EVENT(afs_io_error,
+	    TP_PROTO(unsigned int call, int error, enum afs_io_error where),
+
+	    TP_ARGS(call, error, where),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int,	call		)
+		    __field(int,		error		)
+		    __field(enum afs_io_error,	where		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->call = call;
+		    __entry->error = error;
+		    __entry->where = where;
+			   ),
+
+	    TP_printk("c=%08x r=%d %s",
+		      __entry->call, __entry->error,
+		      __print_symbolic(__entry->where, afs_io_errors))
+	    );
+
+TRACE_EVENT(afs_file_error,
+	    TP_PROTO(struct afs_vnode *vnode, int error, enum afs_file_error where),
+
+	    TP_ARGS(vnode, error, where),
+
+	    TP_STRUCT__entry(
+		    __field_struct(struct afs_fid,	fid		)
+		    __field(int,			error		)
+		    __field(enum afs_file_error,	where		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->fid = vnode->fid;
+		    __entry->error = error;
+		    __entry->where = where;
+			   ),
+
+	    TP_printk("%x:%x:%x r=%d %s",
+		      __entry->fid.vid, __entry->fid.vnode, __entry->fid.unique,
+		      __entry->error,
+		      __print_symbolic(__entry->where, afs_file_errors))
+	    );
+
 TRACE_EVENT(afs_cm_no_server,
 	    TP_PROTO(struct afs_call *call, struct sockaddr_rxrpc *srx),
 
-- 
GitLab


From 2a0b4f64c9edcdcb67306f26d9d08ef982cb0ccd Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 20 Oct 2018 00:57:57 +0100
Subject: [PATCH 0154/1890] afs: Don't invoke the server to read data beyond
 EOF

When writing a new page, clear space in the page rather than attempting to
load it from the server if the space is beyond the EOF.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/write.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/fs/afs/write.c b/fs/afs/write.c
index fdb9d6024126e..11066a3248baf 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -33,10 +33,21 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
 			 loff_t pos, unsigned int len, struct page *page)
 {
 	struct afs_read *req;
+	size_t p;
+	void *data;
 	int ret;
 
 	_enter(",,%llu", (unsigned long long)pos);
 
+	if (pos >= vnode->vfs_inode.i_size) {
+		p = pos & ~PAGE_MASK;
+		ASSERTCMP(p + len, <=, PAGE_SIZE);
+		data = kmap(page);
+		memset(data + p, 0, len);
+		kunmap(page);
+		return 0;
+	}
+
 	req = kzalloc(sizeof(struct afs_read) + sizeof(struct page *),
 		      GFP_KERNEL);
 	if (!req)
-- 
GitLab


From 3b6492df4153b8550d347dfc581856138678a231 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 20 Oct 2018 00:57:57 +0100
Subject: [PATCH 0155/1890] afs: Increase to 64-bit volume ID and 96-bit vnode
 ID for YFS

Increase the sizes of the volume ID to 64 bits and the vnode ID (inode
number equivalent) to 96 bits to allow the support of YFS.

This requires the iget comparator to check the vnode->fid rather than i_ino
and i_generation as i_ino is not sufficiently capacious.  It also requires
this data to be placed into the vnode cache key for fscache.

For the moment, just discard the top 32 bits of the vnode ID when returning
it though stat.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/afs.h               | 11 ++++++-----
 fs/afs/cache.c             |  2 +-
 fs/afs/callback.c          |  2 +-
 fs/afs/dir.c               | 24 ++++++++++++------------
 fs/afs/dynroot.c           |  2 +-
 fs/afs/file.c              |  8 ++++----
 fs/afs/flock.c             | 22 +++++++++++-----------
 fs/afs/fsclient.c          | 24 ++++++++++++------------
 fs/afs/inode.c             | 31 +++++++++++++++++--------------
 fs/afs/proc.c              |  2 +-
 fs/afs/rotate.c            |  2 +-
 fs/afs/security.c          |  6 +++---
 fs/afs/super.c             |  5 +++--
 fs/afs/volume.c            |  2 +-
 fs/afs/write.c             | 18 +++++++++---------
 fs/afs/xattr.c             |  2 +-
 include/trace/events/afs.h |  4 ++--
 17 files changed, 86 insertions(+), 81 deletions(-)

diff --git a/fs/afs/afs.h b/fs/afs/afs.h
index b4ff1f7ae4ab0..c23b31b742fa8 100644
--- a/fs/afs/afs.h
+++ b/fs/afs/afs.h
@@ -23,9 +23,9 @@
 #define AFSPATHMAX		1024	/* Maximum length of a pathname plus NUL */
 #define AFSOPAQUEMAX		1024	/* Maximum length of an opaque field */
 
-typedef unsigned			afs_volid_t;
-typedef unsigned			afs_vnodeid_t;
-typedef unsigned long long		afs_dataversion_t;
+typedef u64			afs_volid_t;
+typedef u64			afs_vnodeid_t;
+typedef u64			afs_dataversion_t;
 
 typedef enum {
 	AFSVL_RWVOL,			/* read/write volume */
@@ -52,8 +52,9 @@ typedef enum {
  */
 struct afs_fid {
 	afs_volid_t	vid;		/* volume ID */
-	afs_vnodeid_t	vnode;		/* file index within volume */
-	unsigned	unique;		/* unique ID number (file index version) */
+	afs_vnodeid_t	vnode;		/* Lower 64-bits of file index within volume */
+	u32		vnode_hi;	/* Upper 32-bits of file index */
+	u32		unique;		/* unique ID number (file index version) */
 };
 
 /*
diff --git a/fs/afs/cache.c b/fs/afs/cache.c
index b1c31ec4523a8..f6d0a21e8052f 100644
--- a/fs/afs/cache.c
+++ b/fs/afs/cache.c
@@ -49,7 +49,7 @@ static enum fscache_checkaux afs_vnode_cache_check_aux(void *cookie_netfs_data,
 	struct afs_vnode *vnode = cookie_netfs_data;
 	struct afs_vnode_cache_aux aux;
 
-	_enter("{%x,%x,%llx},%p,%u",
+	_enter("{%llx,%x,%llx},%p,%u",
 	       vnode->fid.vnode, vnode->fid.unique, vnode->status.data_version,
 	       buffer, buflen);
 
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index 5f261fbf2182b..8698198ad4279 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -310,7 +310,7 @@ void afs_break_callbacks(struct afs_server *server, size_t count,
 	/* TODO: Sort the callback break list by volume ID */
 
 	for (; count > 0; callbacks++, count--) {
-		_debug("- Fid { vl=%08x n=%u u=%u }  CB { v=%u x=%u t=%u }",
+		_debug("- Fid { vl=%08llx n=%llu u=%u }  CB { v=%u x=%u t=%u }",
 		       callbacks->fid.vid,
 		       callbacks->fid.vnode,
 		       callbacks->fid.unique,
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 78f9754fd03d9..024b7cf7441c1 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -552,7 +552,7 @@ static int afs_do_lookup_one(struct inode *dir, struct dentry *dentry,
 	}
 
 	*fid = cookie.fid;
-	_leave(" = 0 { vn=%u u=%u }", fid->vnode, fid->unique);
+	_leave(" = 0 { vn=%llu u=%u }", fid->vnode, fid->unique);
 	return 0;
 }
 
@@ -830,7 +830,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
 	struct key *key;
 	int ret;
 
-	_enter("{%x:%u},%p{%pd},",
+	_enter("{%llx:%llu},%p{%pd},",
 	       dvnode->fid.vid, dvnode->fid.vnode, dentry, dentry);
 
 	ASSERTCMP(d_inode(dentry), ==, NULL);
@@ -900,7 +900,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
 
 	if (d_really_is_positive(dentry)) {
 		vnode = AFS_FS_I(d_inode(dentry));
-		_enter("{v={%x:%u} n=%pd fl=%lx},",
+		_enter("{v={%llx:%llu} n=%pd fl=%lx},",
 		       vnode->fid.vid, vnode->fid.vnode, dentry,
 		       vnode->flags);
 	} else {
@@ -969,7 +969,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
 		/* if the vnode ID has changed, then the dirent points to a
 		 * different file */
 		if (fid.vnode != vnode->fid.vnode) {
-			_debug("%pd: dirent changed [%u != %u]",
+			_debug("%pd: dirent changed [%llu != %llu]",
 			       dentry, fid.vnode,
 			       vnode->fid.vnode);
 			goto not_found;
@@ -1108,7 +1108,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 
 	mode |= S_IFDIR;
 
-	_enter("{%x:%u},{%pd},%ho",
+	_enter("{%llx:%llu},{%pd},%ho",
 	       dvnode->fid.vid, dvnode->fid.vnode, dentry, mode);
 
 	key = afs_request_key(dvnode->volume->cell);
@@ -1178,7 +1178,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
 	u64 data_version = dvnode->status.data_version;
 	int ret;
 
-	_enter("{%x:%u},{%pd}",
+	_enter("{%llx:%llu},{%pd}",
 	       dvnode->fid.vid, dvnode->fid.vnode, dentry);
 
 	key = afs_request_key(dvnode->volume->cell);
@@ -1270,7 +1270,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
 	u64 data_version = dvnode->status.data_version;
 	int ret;
 
-	_enter("{%x:%u},{%pd}",
+	_enter("{%llx:%llu},{%pd}",
 	       dvnode->fid.vid, dvnode->fid.vnode, dentry);
 
 	if (dentry->d_name.len >= AFSNAMEMAX)
@@ -1334,7 +1334,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 
 	mode |= S_IFREG;
 
-	_enter("{%x:%u},{%pd},%ho,",
+	_enter("{%llx:%llu},{%pd},%ho,",
 	       dvnode->fid.vid, dvnode->fid.vnode, dentry, mode);
 
 	ret = -ENAMETOOLONG;
@@ -1397,7 +1397,7 @@ static int afs_link(struct dentry *from, struct inode *dir,
 	dvnode = AFS_FS_I(dir);
 	data_version = dvnode->status.data_version;
 
-	_enter("{%x:%u},{%x:%u},{%pd}",
+	_enter("{%llx:%llu},{%llx:%llu},{%pd}",
 	       vnode->fid.vid, vnode->fid.vnode,
 	       dvnode->fid.vid, dvnode->fid.vnode,
 	       dentry);
@@ -1468,7 +1468,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry,
 	u64 data_version = dvnode->status.data_version;
 	int ret;
 
-	_enter("{%x:%u},{%pd},%s",
+	_enter("{%llx:%llu},{%pd},%s",
 	       dvnode->fid.vid, dvnode->fid.vnode, dentry,
 	       content);
 
@@ -1544,7 +1544,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	orig_data_version = orig_dvnode->status.data_version;
 	new_data_version = new_dvnode->status.data_version;
 
-	_enter("{%x:%u},{%x:%u},{%x:%u},{%pd}",
+	_enter("{%llx:%llu},{%llx:%llu},{%llx:%llu},{%pd}",
 	       orig_dvnode->fid.vid, orig_dvnode->fid.vnode,
 	       vnode->fid.vid, vnode->fid.vnode,
 	       new_dvnode->fid.vid, new_dvnode->fid.vnode,
@@ -1611,7 +1611,7 @@ static int afs_dir_releasepage(struct page *page, gfp_t gfp_flags)
 {
 	struct afs_vnode *dvnode = AFS_FS_I(page->mapping->host);
 
-	_enter("{{%x:%u}[%lu]}", dvnode->fid.vid, dvnode->fid.vnode, page->index);
+	_enter("{{%llx:%llu}[%lu]}", dvnode->fid.vid, dvnode->fid.vnode, page->index);
 
 	set_page_private(page, 0);
 	ClearPagePrivate(page);
diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c
index 0efed0a63080c..a9ba81ddf1546 100644
--- a/fs/afs/dynroot.c
+++ b/fs/afs/dynroot.c
@@ -62,7 +62,7 @@ struct inode *afs_try_auto_mntpt(struct dentry *dentry, struct inode *dir)
 	struct inode *inode;
 	int ret = -ENOENT;
 
-	_enter("%p{%pd}, {%x:%u}",
+	_enter("%p{%pd}, {%llx:%llu}",
 	       dentry, dentry, vnode->fid.vid, vnode->fid.vnode);
 
 	if (!test_bit(AFS_VNODE_AUTOCELL, &vnode->flags))
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 7d4f26198573d..d6bc3f5d784b5 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -121,7 +121,7 @@ int afs_open(struct inode *inode, struct file *file)
 	struct key *key;
 	int ret;
 
-	_enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode);
+	_enter("{%llx:%llu},", vnode->fid.vid, vnode->fid.vnode);
 
 	key = afs_request_key(vnode->volume->cell);
 	if (IS_ERR(key)) {
@@ -170,7 +170,7 @@ int afs_release(struct inode *inode, struct file *file)
 	struct afs_vnode *vnode = AFS_FS_I(inode);
 	struct afs_file *af = file->private_data;
 
-	_enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode);
+	_enter("{%llx:%llu},", vnode->fid.vid, vnode->fid.vnode);
 
 	if ((file->f_mode & FMODE_WRITE))
 		return vfs_fsync(file, 0);
@@ -228,7 +228,7 @@ int afs_fetch_data(struct afs_vnode *vnode, struct key *key, struct afs_read *de
 	struct afs_fs_cursor fc;
 	int ret;
 
-	_enter("%s{%x:%u.%u},%x,,,",
+	_enter("%s{%llx:%llu.%u},%x,,,",
 	       vnode->volume->name,
 	       vnode->fid.vid,
 	       vnode->fid.vnode,
@@ -634,7 +634,7 @@ static int afs_releasepage(struct page *page, gfp_t gfp_flags)
 	struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
 	unsigned long priv;
 
-	_enter("{{%x:%u}[%lu],%lx},%x",
+	_enter("{{%llx:%llu}[%lu],%lx},%x",
 	       vnode->fid.vid, vnode->fid.vnode, page->index, page->flags,
 	       gfp_flags);
 
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index dc62d15a964b8..0568fd9868210 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -29,7 +29,7 @@ static const struct file_lock_operations afs_lock_ops = {
  */
 void afs_lock_may_be_available(struct afs_vnode *vnode)
 {
-	_enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode);
+	_enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
 
 	queue_delayed_work(afs_lock_manager, &vnode->lock_work, 0);
 }
@@ -76,7 +76,7 @@ static int afs_set_lock(struct afs_vnode *vnode, struct key *key,
 	struct afs_fs_cursor fc;
 	int ret;
 
-	_enter("%s{%x:%u.%u},%x,%u",
+	_enter("%s{%llx:%llu.%u},%x,%u",
 	       vnode->volume->name,
 	       vnode->fid.vid,
 	       vnode->fid.vnode,
@@ -107,7 +107,7 @@ static int afs_extend_lock(struct afs_vnode *vnode, struct key *key)
 	struct afs_fs_cursor fc;
 	int ret;
 
-	_enter("%s{%x:%u.%u},%x",
+	_enter("%s{%llx:%llu.%u},%x",
 	       vnode->volume->name,
 	       vnode->fid.vid,
 	       vnode->fid.vnode,
@@ -138,7 +138,7 @@ static int afs_release_lock(struct afs_vnode *vnode, struct key *key)
 	struct afs_fs_cursor fc;
 	int ret;
 
-	_enter("%s{%x:%u.%u},%x",
+	_enter("%s{%llx:%llu.%u},%x",
 	       vnode->volume->name,
 	       vnode->fid.vid,
 	       vnode->fid.vnode,
@@ -175,7 +175,7 @@ void afs_lock_work(struct work_struct *work)
 	struct key *key;
 	int ret;
 
-	_enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode);
+	_enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
 
 	spin_lock(&vnode->lock);
 
@@ -192,7 +192,7 @@ void afs_lock_work(struct work_struct *work)
 		ret = afs_release_lock(vnode, vnode->lock_key);
 		if (ret < 0)
 			printk(KERN_WARNING "AFS:"
-			       " Failed to release lock on {%x:%x} error %d\n",
+			       " Failed to release lock on {%llx:%llx} error %d\n",
 			       vnode->fid.vid, vnode->fid.vnode, ret);
 
 		spin_lock(&vnode->lock);
@@ -229,7 +229,7 @@ void afs_lock_work(struct work_struct *work)
 		key_put(key);
 
 		if (ret < 0)
-			pr_warning("AFS: Failed to extend lock on {%x:%x} error %d\n",
+			pr_warning("AFS: Failed to extend lock on {%llx:%llx} error %d\n",
 				   vnode->fid.vid, vnode->fid.vnode, ret);
 
 		spin_lock(&vnode->lock);
@@ -430,7 +430,7 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
 	struct key *key = afs_file_key(file);
 	int ret;
 
-	_enter("{%x:%u},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type);
+	_enter("{%llx:%llu},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type);
 
 	/* only whole-file locks are supported */
 	if (fl->fl_start != 0 || fl->fl_end != OFFSET_MAX)
@@ -582,7 +582,7 @@ static int afs_do_unlk(struct file *file, struct file_lock *fl)
 	struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
 	int ret;
 
-	_enter("{%x:%u},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type);
+	_enter("{%llx:%llu},%u", vnode->fid.vid, vnode->fid.vnode, fl->fl_type);
 
 	/* Flush all pending writes before doing anything with locks. */
 	vfs_fsync(file, 0);
@@ -639,7 +639,7 @@ int afs_lock(struct file *file, int cmd, struct file_lock *fl)
 {
 	struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
 
-	_enter("{%x:%u},%d,{t=%x,fl=%x,r=%Ld:%Ld}",
+	_enter("{%llx:%llu},%d,{t=%x,fl=%x,r=%Ld:%Ld}",
 	       vnode->fid.vid, vnode->fid.vnode, cmd,
 	       fl->fl_type, fl->fl_flags,
 	       (long long) fl->fl_start, (long long) fl->fl_end);
@@ -662,7 +662,7 @@ int afs_flock(struct file *file, int cmd, struct file_lock *fl)
 {
 	struct afs_vnode *vnode = AFS_FS_I(locks_inode(file));
 
-	_enter("{%x:%u},%d,{t=%x,fl=%x}",
+	_enter("{%llx:%llu},%d,{t=%x,fl=%x}",
 	       vnode->fid.vid, vnode->fid.vnode, cmd,
 	       fl->fl_type, fl->fl_flags);
 
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index fe2e9e39b388d..5e3027f213909 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -90,7 +90,7 @@ void afs_update_inode_from_status(struct afs_vnode *vnode,
 	if (!(flags & AFS_VNODE_NOT_YET_SET)) {
 		if (expected_version &&
 		    *expected_version != status->data_version) {
-			_debug("vnode modified %llx on {%x:%u} [exp %llx]",
+			_debug("vnode modified %llx on {%llx:%llu} [exp %llx]",
 			       (unsigned long long) status->data_version,
 			       vnode->fid.vid, vnode->fid.vnode,
 			       (unsigned long long) *expected_version);
@@ -164,7 +164,7 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call,
 		if (type != status->type &&
 		    vnode &&
 		    !test_bit(AFS_VNODE_UNSET, &vnode->flags)) {
-			pr_warning("Vnode %x:%x:%x changed type %u to %u\n",
+			pr_warning("Vnode %llx:%llx:%x changed type %u to %u\n",
 				   vnode->fid.vid,
 				   vnode->fid.vnode,
 				   vnode->fid.unique,
@@ -389,7 +389,7 @@ static int afs_deliver_fs_fetch_status_vnode(struct afs_call *call)
 	if (ret < 0)
 		return ret;
 
-	_enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode);
+	_enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
 
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
@@ -426,7 +426,7 @@ int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsy
 	struct afs_net *net = afs_v2net(vnode);
 	__be32 *bp;
 
-	_enter(",%x,{%x:%u},,",
+	_enter(",%x,{%llx:%llu},,",
 	       key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
 
 	call = afs_alloc_flat_call(net, &afs_RXFSFetchStatus_vnode,
@@ -1261,7 +1261,7 @@ static int afs_fs_store_data64(struct afs_fs_cursor *fc,
 	struct afs_net *net = afs_v2net(vnode);
 	__be32 *bp;
 
-	_enter(",%x,{%x:%u},,",
+	_enter(",%x,{%llx:%llu},,",
 	       key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
 
 	call = afs_alloc_flat_call(net, &afs_RXFSStoreData64,
@@ -1318,7 +1318,7 @@ int afs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping,
 	loff_t size, pos, i_size;
 	__be32 *bp;
 
-	_enter(",%x,{%x:%u},,",
+	_enter(",%x,{%llx:%llu},,",
 	       key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
 
 	size = (loff_t)to - (loff_t)offset;
@@ -1440,7 +1440,7 @@ static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct iattr *attr)
 	struct afs_net *net = afs_v2net(vnode);
 	__be32 *bp;
 
-	_enter(",%x,{%x:%u},,",
+	_enter(",%x,{%llx:%llu},,",
 	       key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
 
 	ASSERT(attr->ia_valid & ATTR_SIZE);
@@ -1487,7 +1487,7 @@ static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr)
 	struct afs_net *net = afs_v2net(vnode);
 	__be32 *bp;
 
-	_enter(",%x,{%x:%u},,",
+	_enter(",%x,{%llx:%llu},,",
 	       key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
 
 	ASSERT(attr->ia_valid & ATTR_SIZE);
@@ -1536,7 +1536,7 @@ int afs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr)
 	if (attr->ia_valid & ATTR_SIZE)
 		return afs_fs_setattr_size(fc, attr);
 
-	_enter(",%x,{%x:%u},,",
+	_enter(",%x,{%llx:%llu},,",
 	       key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
 
 	call = afs_alloc_flat_call(net, &afs_RXFSStoreStatus,
@@ -2034,7 +2034,7 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call)
 	if (ret < 0)
 		return ret;
 
-	_enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode);
+	_enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
 
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
@@ -2079,7 +2079,7 @@ int afs_fs_fetch_status(struct afs_fs_cursor *fc,
 	struct afs_call *call;
 	__be32 *bp;
 
-	_enter(",%x,{%x:%u},,",
+	_enter(",%x,{%llx:%llu},,",
 	       key_serial(fc->key), fid->vid, fid->vnode);
 
 	call = afs_alloc_flat_call(net, &afs_RXFSFetchStatus, 16, (21 + 3 + 6) * 4);
@@ -2250,7 +2250,7 @@ int afs_fs_inline_bulk_status(struct afs_fs_cursor *fc,
 	__be32 *bp;
 	int i;
 
-	_enter(",%x,{%x:%u},%u",
+	_enter(",%x,{%llx:%llu},%u",
 	       key_serial(fc->key), fids[0].vid, fids[1].vnode, nr_fids);
 
 	call = afs_alloc_flat_call(net, &afs_RXFSInlineBulkStatus,
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index ab4e7a15c205a..4ba47efe96682 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -100,7 +100,7 @@ int afs_fetch_status(struct afs_vnode *vnode, struct key *key, bool new_inode)
 	struct afs_fs_cursor fc;
 	int ret;
 
-	_enter("%s,{%x:%u.%u,S=%lx}",
+	_enter("%s,{%llx:%llu.%u,S=%lx}",
 	       vnode->volume->name,
 	       vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique,
 	       vnode->flags);
@@ -127,9 +127,9 @@ int afs_fetch_status(struct afs_vnode *vnode, struct key *key, bool new_inode)
 int afs_iget5_test(struct inode *inode, void *opaque)
 {
 	struct afs_iget_data *data = opaque;
+	struct afs_vnode *vnode = AFS_FS_I(inode);
 
-	return inode->i_ino == data->fid.vnode &&
-		inode->i_generation == data->fid.unique;
+	return memcmp(&vnode->fid, &data->fid, sizeof(data->fid)) == 0;
 }
 
 /*
@@ -150,11 +150,14 @@ static int afs_iget5_set(struct inode *inode, void *opaque)
 	struct afs_iget_data *data = opaque;
 	struct afs_vnode *vnode = AFS_FS_I(inode);
 
-	inode->i_ino = data->fid.vnode;
-	inode->i_generation = data->fid.unique;
 	vnode->fid = data->fid;
 	vnode->volume = data->volume;
 
+	/* YFS supports 96-bit vnode IDs, but Linux only supports
+	 * 64-bit inode numbers.
+	 */
+	inode->i_ino = data->fid.vnode;
+	inode->i_generation = data->fid.unique;
 	return 0;
 }
 
@@ -193,7 +196,7 @@ struct inode *afs_iget_pseudo_dir(struct super_block *sb, bool root)
 		return ERR_PTR(-ENOMEM);
 	}
 
-	_debug("GOT INODE %p { ino=%lu, vl=%x, vn=%x, u=%x }",
+	_debug("GOT INODE %p { ino=%lu, vl=%llx, vn=%llx, u=%x }",
 	       inode, inode->i_ino, data.fid.vid, data.fid.vnode,
 	       data.fid.unique);
 
@@ -252,8 +255,8 @@ static void afs_get_inode_cache(struct afs_vnode *vnode)
 
 	key.vnode_id		= vnode->fid.vnode;
 	key.unique		= vnode->fid.unique;
-	key.vnode_id_ext[0]	= 0;
-	key.vnode_id_ext[1]	= 0;
+	key.vnode_id_ext[0]	= vnode->fid.vnode >> 32;
+	key.vnode_id_ext[1]	= vnode->fid.vnode_hi;
 	aux.data_version	= vnode->status.data_version;
 
 	vnode->cache = fscache_acquire_cookie(vnode->volume->cache,
@@ -277,7 +280,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
 	struct inode *inode;
 	int ret;
 
-	_enter(",{%x:%u.%u},,", fid->vid, fid->vnode, fid->unique);
+	_enter(",{%llx:%llu.%u},,", fid->vid, fid->vnode, fid->unique);
 
 	as = sb->s_fs_info;
 	data.volume = as->volume;
@@ -289,7 +292,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
 		return ERR_PTR(-ENOMEM);
 	}
 
-	_debug("GOT INODE %p { vl=%x vn=%x, u=%x }",
+	_debug("GOT INODE %p { vl=%llx vn=%llx, u=%x }",
 	       inode, fid->vid, fid->vnode, fid->unique);
 
 	vnode = AFS_FS_I(inode);
@@ -352,7 +355,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
  */
 void afs_zap_data(struct afs_vnode *vnode)
 {
-	_enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode);
+	_enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
 
 #ifdef CONFIG_AFS_FSCACHE
 	fscache_invalidate(vnode->cache);
@@ -382,7 +385,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
 	bool valid = false;
 	int ret;
 
-	_enter("{v={%x:%u} fl=%lx},%x",
+	_enter("{v={%llx:%llu} fl=%lx},%x",
 	       vnode->fid.vid, vnode->fid.vnode, vnode->flags,
 	       key_serial(key));
 
@@ -501,7 +504,7 @@ void afs_evict_inode(struct inode *inode)
 
 	vnode = AFS_FS_I(inode);
 
-	_enter("{%x:%u.%d}",
+	_enter("{%llx:%llu.%d}",
 	       vnode->fid.vid,
 	       vnode->fid.vnode,
 	       vnode->fid.unique);
@@ -550,7 +553,7 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr)
 	struct key *key;
 	int ret;
 
-	_enter("{%x:%u},{n=%pd},%x",
+	_enter("{%llx:%llu},{n=%pd},%x",
 	       vnode->fid.vid, vnode->fid.vnode, dentry,
 	       attr->ia_valid);
 
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index fc36c41641ab1..d887f822f4eb5 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -221,7 +221,7 @@ static int afs_proc_cell_volumes_show(struct seq_file *m, void *v)
 		return 0;
 	}
 
-	seq_printf(m, "%3d %08x %s\n",
+	seq_printf(m, "%3d %08llx %s\n",
 		   atomic_read(&vol->usage), vol->vid,
 		   afs_vol_types[vol->type]);
 
diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c
index d7cbc3c230ee0..41405dde01133 100644
--- a/fs/afs/rotate.c
+++ b/fs/afs/rotate.c
@@ -118,7 +118,7 @@ static void afs_busy(struct afs_volume *volume, u32 abort_code)
 	default:		m = "busy";		break;
 	}
 
-	pr_notice("kAFS: Volume %u '%s' is %s\n", volume->vid, volume->name, m);
+	pr_notice("kAFS: Volume %llu '%s' is %s\n", volume->vid, volume->name, m);
 }
 
 /*
diff --git a/fs/afs/security.c b/fs/afs/security.c
index 81dfedb7879ff..d1ae53fd37399 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -126,7 +126,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
 	bool changed = false;
 	int i, j;
 
-	_enter("{%x:%u},%x,%x",
+	_enter("{%llx:%llu},%x,%x",
 	       vnode->fid.vid, vnode->fid.vnode, key_serial(key), caller_access);
 
 	rcu_read_lock();
@@ -289,7 +289,7 @@ int afs_check_permit(struct afs_vnode *vnode, struct key *key,
 	bool valid = false;
 	int i, ret;
 
-	_enter("{%x:%u},%x",
+	_enter("{%llx:%llu},%x",
 	       vnode->fid.vid, vnode->fid.vnode, key_serial(key));
 
 	/* check the permits to see if we've got one yet */
@@ -349,7 +349,7 @@ int afs_permission(struct inode *inode, int mask)
 	if (mask & MAY_NOT_BLOCK)
 		return -ECHILD;
 
-	_enter("{{%x:%u},%lx},%x,",
+	_enter("{{%llx:%llu},%lx},%x,",
 	       vnode->fid.vid, vnode->fid.vnode, vnode->flags, mask);
 
 	key = afs_request_key(vnode->volume->cell);
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 4d3e274207fb7..dcd07fe99871b 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -406,10 +406,11 @@ static int afs_fill_super(struct super_block *sb,
 		inode = afs_iget_pseudo_dir(sb, true);
 		sb->s_flags	|= SB_RDONLY;
 	} else {
-		sprintf(sb->s_id, "%u", as->volume->vid);
+		sprintf(sb->s_id, "%llu", as->volume->vid);
 		afs_activate_volume(as->volume);
 		fid.vid		= as->volume->vid;
 		fid.vnode	= 1;
+		fid.vnode_hi	= 0;
 		fid.unique	= 1;
 		inode = afs_iget(sb, params->key, &fid, NULL, NULL, NULL);
 	}
@@ -663,7 +664,7 @@ static void afs_destroy_inode(struct inode *inode)
 {
 	struct afs_vnode *vnode = AFS_FS_I(inode);
 
-	_enter("%p{%x:%u}", inode, vnode->fid.vid, vnode->fid.vnode);
+	_enter("%p{%llx:%llu}", inode, vnode->fid.vid, vnode->fid.vnode);
 
 	_debug("DESTROY INODE %p", inode);
 
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index 1cd263fa60282..f0020e35bf6fe 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -250,7 +250,7 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key)
 	/* We look up an ID by passing it as a decimal string in the
 	 * operation's name parameter.
 	 */
-	idsz = sprintf(idbuf, "%u", volume->vid);
+	idsz = sprintf(idbuf, "%llu", volume->vid);
 
 	vldb = afs_vl_lookup_vldb(volume->cell, key, idbuf, idsz);
 	if (IS_ERR(vldb)) {
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 11066a3248baf..72efcfcf9f95e 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -92,7 +92,7 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
 	pgoff_t index = pos >> PAGE_SHIFT;
 	int ret;
 
-	_enter("{%x:%u},{%lx},%u,%u",
+	_enter("{%llx:%llu},{%lx},%u,%u",
 	       vnode->fid.vid, vnode->fid.vnode, index, from, to);
 
 	/* We want to store information about how much of a page is altered in
@@ -192,7 +192,7 @@ int afs_write_end(struct file *file, struct address_space *mapping,
 	loff_t i_size, maybe_i_size;
 	int ret;
 
-	_enter("{%x:%u},{%lx}",
+	_enter("{%llx:%llu},{%lx}",
 	       vnode->fid.vid, vnode->fid.vnode, page->index);
 
 	maybe_i_size = pos + copied;
@@ -241,7 +241,7 @@ static void afs_kill_pages(struct address_space *mapping,
 	struct pagevec pv;
 	unsigned count, loop;
 
-	_enter("{%x:%u},%lx-%lx",
+	_enter("{%llx:%llu},%lx-%lx",
 	       vnode->fid.vid, vnode->fid.vnode, first, last);
 
 	pagevec_init(&pv);
@@ -283,7 +283,7 @@ static void afs_redirty_pages(struct writeback_control *wbc,
 	struct pagevec pv;
 	unsigned count, loop;
 
-	_enter("{%x:%u},%lx-%lx",
+	_enter("{%llx:%llu},%lx-%lx",
 	       vnode->fid.vid, vnode->fid.vnode, first, last);
 
 	pagevec_init(&pv);
@@ -325,7 +325,7 @@ static int afs_store_data(struct address_space *mapping,
 	struct list_head *p;
 	int ret = -ENOKEY, ret2;
 
-	_enter("%s{%x:%u.%u},%lx,%lx,%x,%x",
+	_enter("%s{%llx:%llu.%u},%lx,%lx,%x,%x",
 	       vnode->volume->name,
 	       vnode->fid.vid,
 	       vnode->fid.vnode,
@@ -687,7 +687,7 @@ void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call)
 	unsigned count, loop;
 	pgoff_t first = call->first, last = call->last;
 
-	_enter("{%x:%u},{%lx-%lx}",
+	_enter("{%llx:%llu},{%lx-%lx}",
 	       vnode->fid.vid, vnode->fid.vnode, first, last);
 
 	pagevec_init(&pv);
@@ -726,7 +726,7 @@ ssize_t afs_file_write(struct kiocb *iocb, struct iov_iter *from)
 	ssize_t result;
 	size_t count = iov_iter_count(from);
 
-	_enter("{%x.%u},{%zu},",
+	_enter("{%llx:%llu},{%zu},",
 	       vnode->fid.vid, vnode->fid.vnode, count);
 
 	if (IS_SWAPFILE(&vnode->vfs_inode)) {
@@ -754,7 +754,7 @@ int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 	struct inode *inode = file_inode(file);
 	struct afs_vnode *vnode = AFS_FS_I(inode);
 
-	_enter("{%x:%u},{n=%pD},%d",
+	_enter("{%llx:%llu},{n=%pD},%d",
 	       vnode->fid.vid, vnode->fid.vnode, file,
 	       datasync);
 
@@ -772,7 +772,7 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
 	struct afs_vnode *vnode = AFS_FS_I(inode);
 	unsigned long priv;
 
-	_enter("{{%x:%u}},{%lx}",
+	_enter("{{%llx:%llu}},{%lx}",
 	       vnode->fid.vid, vnode->fid.vnode, vmf->page->index);
 
 	sb_start_pagefault(inode->i_sb);
diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c
index cfcc674e64a55..a2cdf25573e24 100644
--- a/fs/afs/xattr.c
+++ b/fs/afs/xattr.c
@@ -72,7 +72,7 @@ static int afs_xattr_get_fid(const struct xattr_handler *handler,
 	char text[8 + 1 + 8 + 1 + 8 + 1];
 	size_t len;
 
-	len = sprintf(text, "%x:%x:%x",
+	len = sprintf(text, "%llx:%llx:%x",
 		      vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique);
 	if (size == 0)
 		return len;
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index 48b20a261d395..8acd56e20a371 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -374,7 +374,7 @@ TRACE_EVENT(afs_make_fs_call,
 		    }
 			   ),
 
-	    TP_printk("c=%08x %06x:%06x:%06x %s",
+	    TP_printk("c=%08x %06llx:%06llx:%06x %s",
 		      __entry->call,
 		      __entry->fid.vid,
 		      __entry->fid.vnode,
@@ -688,7 +688,7 @@ TRACE_EVENT(afs_file_error,
 		    __entry->where = where;
 			   ),
 
-	    TP_printk("%x:%x:%x r=%d %s",
+	    TP_printk("%llx:%llx:%x r=%d %s",
 		      __entry->fid.vid, __entry->fid.vnode, __entry->fid.unique,
 		      __entry->error,
 		      __print_symbolic(__entry->where, afs_file_errors))
-- 
GitLab


From 0067191201de0d1870ff692368900da5207daf2c Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 20 Oct 2018 00:57:57 +0100
Subject: [PATCH 0156/1890] afs: Commit the status on a new file/dir/symlink

Call the function to commit the status on a new file, dir or symlink so
that the access rights for the caller's key are cached for that object.

Without this, the next access to the file will cause a FetchStatus
operation to be emitted to retrieve the access rights.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/dir.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 024b7cf7441c1..8936731c59ff8 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -1089,6 +1089,7 @@ static void afs_vnode_new_inode(struct afs_fs_cursor *fc,
 
 	vnode = AFS_FS_I(inode);
 	set_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
+	afs_vnode_commit_status(fc, vnode, 0);
 	d_add(new_dentry, inode);
 }
 
-- 
GitLab


From 06aeb2971457b33c1123af9f307a55f3dc4052c9 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 20 Oct 2018 00:57:58 +0100
Subject: [PATCH 0157/1890] afs: Remove callback details from
 afs_callback_break struct

Remove unnecessary details of a broken callback, such as version, expiry
and type, from the afs_callback_break struct as they're not actually used
and make the list take more memory.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/afs.h       |  2 +-
 fs/afs/callback.c  |  8 ++------
 fs/afs/cmservice.c | 17 +++++------------
 3 files changed, 8 insertions(+), 19 deletions(-)

diff --git a/fs/afs/afs.h b/fs/afs/afs.h
index c23b31b742fa8..fb9bcb8758eaa 100644
--- a/fs/afs/afs.h
+++ b/fs/afs/afs.h
@@ -75,7 +75,7 @@ struct afs_callback {
 
 struct afs_callback_break {
 	struct afs_fid		fid;		/* File identifier */
-	struct afs_callback	cb;		/* Callback details */
+	//struct afs_callback	cb;		/* Callback details */
 };
 
 #define AFSCBMAX 50	/* maximum callbacks transferred per bulk op */
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index 8698198ad4279..df9bfee698ad3 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -310,14 +310,10 @@ void afs_break_callbacks(struct afs_server *server, size_t count,
 	/* TODO: Sort the callback break list by volume ID */
 
 	for (; count > 0; callbacks++, count--) {
-		_debug("- Fid { vl=%08llx n=%llu u=%u }  CB { v=%u x=%u t=%u }",
+		_debug("- Fid { vl=%08llx n=%llu u=%u }",
 		       callbacks->fid.vid,
 		       callbacks->fid.vnode,
-		       callbacks->fid.unique,
-		       callbacks->cb.version,
-		       callbacks->cb.expiry,
-		       callbacks->cb.type
-		       );
+		       callbacks->fid.unique);
 		afs_break_one_callback(server, &callbacks->fid);
 	}
 
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 186f621f87221..fc0010d800a0d 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -218,7 +218,6 @@ static int afs_deliver_cb_callback(struct afs_call *call)
 			cb->fid.vid	= ntohl(*bp++);
 			cb->fid.vnode	= ntohl(*bp++);
 			cb->fid.unique	= ntohl(*bp++);
-			cb->cb.type	= AFSCM_CB_UNTYPED;
 		}
 
 		afs_extract_to_tmp(call);
@@ -236,24 +235,18 @@ static int afs_deliver_cb_callback(struct afs_call *call)
 		if (call->count2 != call->count && call->count2 != 0)
 			return afs_protocol_error(call, -EBADMSG,
 						  afs_eproto_cb_count);
-		afs_extract_to_buf(call, call->count2 * 3 * 4);
+		call->_iter = &call->iter;
+		iov_iter_discard(&call->iter, READ, call->count2 * 3 * 4);
 		call->unmarshall++;
 
 	case 4:
-		_debug("extract CB array");
+		_debug("extract discard %zu/%u",
+		       iov_iter_count(&call->iter), call->count2 * 3 * 4);
+
 		ret = afs_extract_data(call, false);
 		if (ret < 0)
 			return ret;
 
-		_debug("unmarshall CB array");
-		cb = call->request;
-		bp = call->buffer;
-		for (loop = call->count2; loop > 0; loop--, cb++) {
-			cb->cb.version	= ntohl(*bp++);
-			cb->cb.expiry	= ntohl(*bp++);
-			cb->cb.type	= ntohl(*bp++);
-		}
-
 		call->unmarshall++;
 	case 5:
 		break;
-- 
GitLab


From 35dbfba3111a5ef0663bb89185ce8dfdbef63f8d Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 20 Oct 2018 00:57:58 +0100
Subject: [PATCH 0158/1890] afs: Implement the YFS cache manager service

Implement the YFS cache manager service which gives extra capabilities on
top of AFS.  This is done by listening for an additional service on the
same port and indicating that anyone requesting an upgrade should be
upgraded to the YFS port.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/cmservice.c    | 103 +++++++++++++++++++++++++++++++++++++++++-
 fs/afs/protocol_yfs.h |  57 +++++++++++++++++++++++
 fs/afs/rxrpc.c        |  15 ++++++
 3 files changed, 174 insertions(+), 1 deletion(-)
 create mode 100644 fs/afs/protocol_yfs.h

diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index fc0010d800a0d..8cf8d10daa6c1 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -16,6 +16,7 @@
 #include <linux/ip.h>
 #include "internal.h"
 #include "afs_cm.h"
+#include "protocol_yfs.h"
 
 static int afs_deliver_cb_init_call_back_state(struct afs_call *);
 static int afs_deliver_cb_init_call_back_state3(struct afs_call *);
@@ -30,6 +31,8 @@ static void SRXAFSCB_Probe(struct work_struct *);
 static void SRXAFSCB_ProbeUuid(struct work_struct *);
 static void SRXAFSCB_TellMeAboutYourself(struct work_struct *);
 
+static int afs_deliver_yfs_cb_callback(struct afs_call *);
+
 #define CM_NAME(name) \
 	const char afs_SRXCB##name##_name[] __tracepoint_string =	\
 		"CB." #name
@@ -100,13 +103,24 @@ static const struct afs_call_type afs_SRXCBTellMeAboutYourself = {
 	.work		= SRXAFSCB_TellMeAboutYourself,
 };
 
+/*
+ * YFS CB.CallBack operation type
+ */
+static CM_NAME(YFS_CallBack);
+static const struct afs_call_type afs_SRXYFSCB_CallBack = {
+	.name		= afs_SRXCBYFS_CallBack_name,
+	.deliver	= afs_deliver_yfs_cb_callback,
+	.destructor	= afs_cm_destructor,
+	.work		= SRXAFSCB_CallBack,
+};
+
 /*
  * route an incoming cache manager call
  * - return T if supported, F if not
  */
 bool afs_cm_incoming_call(struct afs_call *call)
 {
-	_enter("{CB.OP %u}", call->operation_ID);
+	_enter("{%u, CB.OP %u}", call->service_id, call->operation_ID);
 
 	switch (call->operation_ID) {
 	case CBCallBack:
@@ -127,6 +141,11 @@ bool afs_cm_incoming_call(struct afs_call *call)
 	case CBTellMeAboutYourself:
 		call->type = &afs_SRXCBTellMeAboutYourself;
 		return true;
+	case YFSCBCallBack:
+		if (call->service_id != YFS_CM_SERVICE)
+			return false;
+		call->type = &afs_SRXYFSCB_CallBack;
+		return true;
 	default:
 		return false;
 	}
@@ -570,3 +589,85 @@ static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call)
 
 	return afs_queue_call_work(call);
 }
+
+/*
+ * deliver request data to a YFS CB.CallBack call
+ */
+static int afs_deliver_yfs_cb_callback(struct afs_call *call)
+{
+	struct afs_callback_break *cb;
+	struct sockaddr_rxrpc srx;
+	struct yfs_xdr_YFSFid *bp;
+	size_t size;
+	int ret, loop;
+
+	_enter("{%u}", call->unmarshall);
+
+	switch (call->unmarshall) {
+	case 0:
+		afs_extract_to_tmp(call);
+		call->unmarshall++;
+
+		/* extract the FID array and its count in two steps */
+	case 1:
+		_debug("extract FID count");
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		call->count = ntohl(call->tmp);
+		_debug("FID count: %u", call->count);
+		if (call->count > YFSCBMAX)
+			return afs_protocol_error(call, -EBADMSG,
+						  afs_eproto_cb_fid_count);
+
+		size = array_size(call->count, sizeof(struct yfs_xdr_YFSFid));
+		call->buffer = kmalloc(size, GFP_KERNEL);
+		if (!call->buffer)
+			return -ENOMEM;
+		afs_extract_to_buf(call, size);
+		call->unmarshall++;
+
+	case 2:
+		_debug("extract FID array");
+		ret = afs_extract_data(call, false);
+		if (ret < 0)
+			return ret;
+
+		_debug("unmarshall FID array");
+		call->request = kcalloc(call->count,
+					sizeof(struct afs_callback_break),
+					GFP_KERNEL);
+		if (!call->request)
+			return -ENOMEM;
+
+		cb = call->request;
+		bp = call->buffer;
+		for (loop = call->count; loop > 0; loop--, cb++) {
+			cb->fid.vid	= xdr_to_u64(bp->volume);
+			cb->fid.vnode	= xdr_to_u64(bp->vnode.lo);
+			cb->fid.vnode_hi = ntohl(bp->vnode.hi);
+			cb->fid.unique	= ntohl(bp->vnode.unique);
+			bp++;
+		}
+
+		afs_extract_to_tmp(call);
+		call->unmarshall++;
+
+	case 3:
+		break;
+	}
+
+	if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
+		return afs_io_error(call, afs_io_error_cm_reply);
+
+	/* We'll need the file server record as that tells us which set of
+	 * vnodes to operate upon.
+	 */
+	rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
+	call->cm_server = afs_find_server(call->net, &srx);
+	if (!call->cm_server)
+		trace_afs_cm_no_server(call, &srx);
+
+	return afs_queue_call_work(call);
+}
diff --git a/fs/afs/protocol_yfs.h b/fs/afs/protocol_yfs.h
new file mode 100644
index 0000000000000..b7b211dd98576
--- /dev/null
+++ b/fs/afs/protocol_yfs.h
@@ -0,0 +1,57 @@
+/* YFS protocol bits
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#define YFS_FS_SERVICE	2500
+#define YFS_CM_SERVICE	2501
+
+#define YFSCBMAX 1024
+
+enum YFS_CM_Operations {
+	YFSCBProbe		= 206,	/* probe client */
+	YFSCBGetLock		= 207,	/* get contents of CM lock table */
+	YFSCBXStatsVersion	= 209,	/* get version of extended statistics */
+	YFSCBGetXStats		= 210,	/* get contents of extended statistics data */
+	YFSCBInitCallBackState3	= 213,	/* initialise callback state, version 3 */
+	YFSCBProbeUuid		= 214,	/* check the client hasn't rebooted */
+	YFSCBGetServerPrefs	= 215,
+	YFSCBGetCellServDV	= 216,
+	YFSCBGetLocalCell	= 217,
+	YFSCBGetCacheConfig	= 218,
+	YFSCBGetCellByNum	= 65537,
+	YFSCBTellMeAboutYourself = 65538, /* get client capabilities */
+	YFSCBCallBack		= 64204,
+};
+
+struct yfs_xdr_u64 {
+	__be32			msw;
+	__be32			lsw;
+} __packed;
+
+static inline u64 xdr_to_u64(const struct yfs_xdr_u64 x)
+{
+	return ((u64)ntohl(x.msw) << 32) | ntohl(x.lsw);
+}
+
+static inline struct yfs_xdr_u64 u64_to_xdr(const u64 x)
+{
+	return (struct yfs_xdr_u64){ .msw = htonl(x >> 32), .lsw = htonl(x) };
+}
+
+struct yfs_xdr_vnode {
+	struct yfs_xdr_u64	lo;
+	__be32			hi;
+	__be32			unique;
+} __packed;
+
+struct yfs_xdr_YFSFid {
+	struct yfs_xdr_u64	volume;
+	struct yfs_xdr_vnode	vnode;
+} __packed;
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index e6ab824ab7613..ce98e133caa6d 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -16,6 +16,7 @@
 #include <net/af_rxrpc.h>
 #include "internal.h"
 #include "afs_cm.h"
+#include "protocol_yfs.h"
 
 struct workqueue_struct *afs_async_calls;
 
@@ -42,6 +43,7 @@ int afs_open_socket(struct afs_net *net)
 	struct sockaddr_rxrpc srx;
 	struct socket *socket;
 	unsigned int min_level;
+	u16 service_upgrade[2];
 	int ret;
 
 	_enter("");
@@ -75,6 +77,19 @@ int afs_open_socket(struct afs_net *net)
 	if (ret < 0)
 		goto error_2;
 
+	srx.srx_service = YFS_CM_SERVICE;
+	ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx));
+	if (ret < 0)
+		goto error_2;
+
+	service_upgrade[0] = CM_SERVICE;
+	service_upgrade[1] = YFS_CM_SERVICE;
+	ret = kernel_setsockopt(socket, SOL_RXRPC, RXRPC_UPGRADEABLE_SERVICE,
+				(void *)service_upgrade, sizeof(service_upgrade));
+	if (ret < 0)
+		goto error_2;
+
+
 	rxrpc_kernel_new_call_notification(socket, afs_rx_new_call,
 					   afs_rx_discard_new_call);
 
-- 
GitLab


From 36bb5f490a542f230beb982475b56d79d72033de Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 20 Oct 2018 00:57:58 +0100
Subject: [PATCH 0159/1890] afs: Fix FS.FetchStatus delivery from updating
 wrong vnode

The FS.FetchStatus reply delivery function was updating inode of the
directory in which a lookup had been done with the status of the looked up
file.  This corrupts some of the directory state.

Fixes: 5cf9dd55a0ec ("afs: Prospectively look up extra files when doing a single lookup")
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/fsclient.c | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 5e3027f213909..f758750e81d87 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -2026,7 +2026,7 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call)
 	struct afs_file_status *status = call->reply[1];
 	struct afs_callback *callback = call->reply[2];
 	struct afs_volsync *volsync = call->reply[3];
-	struct afs_vnode *vnode = call->reply[0];
+	struct afs_fid *fid = call->reply[0];
 	const __be32 *bp;
 	int ret;
 
@@ -2034,21 +2034,15 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call)
 	if (ret < 0)
 		return ret;
 
-	_enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
+	_enter("{%llx:%llu}", fid->vid, fid->vnode);
 
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
-	ret = afs_decode_status(call, &bp, status, vnode,
+	ret = afs_decode_status(call, &bp, status, NULL,
 				&call->expected_version, NULL);
 	if (ret < 0)
 		return ret;
-	callback[call->count].version	= ntohl(bp[0]);
-	callback[call->count].expiry	= ntohl(bp[1]);
-	callback[call->count].type	= ntohl(bp[2]);
-	if (vnode)
-		xdr_decode_AFSCallBack(call, vnode, &bp);
-	else
-		bp += 3;
+	xdr_decode_AFSCallBack_raw(&bp, callback);
 	if (volsync)
 		xdr_decode_AFSVolSync(&bp, volsync);
 
@@ -2089,7 +2083,7 @@ int afs_fs_fetch_status(struct afs_fs_cursor *fc,
 	}
 
 	call->key = fc->key;
-	call->reply[0] = NULL; /* vnode for fid[0] */
+	call->reply[0] = fid;
 	call->reply[1] = status;
 	call->reply[2] = callback;
 	call->reply[3] = volsync;
-- 
GitLab


From 12d8e95a911eb3921a1fecf1e4d913654f6f4fb5 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 20 Oct 2018 00:57:58 +0100
Subject: [PATCH 0160/1890] afs: Calc callback expiry in op reply delivery

Calculate the callback expiration time at the point of operation reply
delivery, using the reply time queried from AF_RXRPC on that call as a
base.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/afs.h      |  2 +-
 fs/afs/fsclient.c | 22 +++++++++++++++++-----
 fs/afs/inode.c    |  4 ++--
 fs/afs/internal.h |  2 ++
 fs/afs/rxrpc.c    |  6 ++++++
 5 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/fs/afs/afs.h b/fs/afs/afs.h
index fb9bcb8758eaa..417cd23529c54 100644
--- a/fs/afs/afs.h
+++ b/fs/afs/afs.h
@@ -68,8 +68,8 @@ typedef enum {
 } afs_callback_type_t;
 
 struct afs_callback {
+	time64_t		expires_at;	/* Time at which expires */
 	unsigned		version;	/* Callback version */
-	unsigned		expiry;		/* Time at which expires */
 	afs_callback_type_t	type;		/* Type of callback */
 };
 
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index f758750e81d87..6105cdb171632 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -287,13 +287,19 @@ static void xdr_decode_AFSCallBack(struct afs_call *call,
 	*_bp = bp;
 }
 
-static void xdr_decode_AFSCallBack_raw(const __be32 **_bp,
+static ktime_t xdr_decode_expiry(struct afs_call *call, u32 expiry)
+{
+	return ktime_add_ns(call->reply_time, expiry * NSEC_PER_SEC);
+}
+
+static void xdr_decode_AFSCallBack_raw(struct afs_call *call,
+				       const __be32 **_bp,
 				       struct afs_callback *cb)
 {
 	const __be32 *bp = *_bp;
 
 	cb->version	= ntohl(*bp++);
-	cb->expiry	= ntohl(*bp++);
+	cb->expires_at	= xdr_decode_expiry(call, ntohl(*bp++));
 	cb->type	= ntohl(*bp++);
 	*_bp = bp;
 }
@@ -440,6 +446,7 @@ int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsy
 	call->reply[0] = vnode;
 	call->reply[1] = volsync;
 	call->expected_version = new_inode ? 1 : vnode->status.data_version;
+	call->want_reply_time = true;
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -627,6 +634,7 @@ static int afs_fs_fetch_data64(struct afs_fs_cursor *fc, struct afs_read *req)
 	call->reply[1] = NULL; /* volsync */
 	call->reply[2] = req;
 	call->expected_version = vnode->status.data_version;
+	call->want_reply_time = true;
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -672,6 +680,7 @@ int afs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req)
 	call->reply[1] = NULL; /* volsync */
 	call->reply[2] = req;
 	call->expected_version = vnode->status.data_version;
+	call->want_reply_time = true;
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -714,7 +723,7 @@ static int afs_deliver_fs_create_vnode(struct afs_call *call)
 				&call->expected_version, NULL);
 	if (ret < 0)
 		return ret;
-	xdr_decode_AFSCallBack_raw(&bp, call->reply[3]);
+	xdr_decode_AFSCallBack_raw(call, &bp, call->reply[3]);
 	/* xdr_decode_AFSVolSync(&bp, call->reply[X]); */
 
 	_leave(" = 0 [done]");
@@ -773,6 +782,7 @@ int afs_fs_create(struct afs_fs_cursor *fc,
 	call->reply[2] = newstatus;
 	call->reply[3] = newcb;
 	call->expected_version = current_data_version + 1;
+	call->want_reply_time = true;
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -2042,7 +2052,7 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call)
 				&call->expected_version, NULL);
 	if (ret < 0)
 		return ret;
-	xdr_decode_AFSCallBack_raw(&bp, callback);
+	xdr_decode_AFSCallBack_raw(call, &bp, callback);
 	if (volsync)
 		xdr_decode_AFSVolSync(&bp, volsync);
 
@@ -2088,6 +2098,7 @@ int afs_fs_fetch_status(struct afs_fs_cursor *fc,
 	call->reply[2] = callback;
 	call->reply[3] = volsync;
 	call->expected_version = 1; /* vnode->status.data_version */
+	call->want_reply_time = true;
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -2188,7 +2199,7 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
 		bp = call->buffer;
 		callbacks = call->reply[2];
 		callbacks[call->count].version	= ntohl(bp[0]);
-		callbacks[call->count].expiry	= ntohl(bp[1]);
+		callbacks[call->count].expires_at = xdr_decode_expiry(call, ntohl(bp[1]));
 		callbacks[call->count].type	= ntohl(bp[2]);
 		statuses = call->reply[1];
 		if (call->count == 0 && vnode && statuses[0].abort_code == 0)
@@ -2261,6 +2272,7 @@ int afs_fs_inline_bulk_status(struct afs_fs_cursor *fc,
 	call->reply[2] = callbacks;
 	call->reply[3] = volsync;
 	call->count2 = nr_fids;
+	call->want_reply_time = true;
 
 	/* marshall the parameters */
 	bp = call->request;
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 4ba47efe96682..4c6d8e1112c2b 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -317,11 +317,11 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
 			 * didn't give us a callback) */
 			vnode->cb_version = 0;
 			vnode->cb_type = 0;
-			vnode->cb_expires_at = 0;
+			vnode->cb_expires_at = ktime_get();
 		} else {
 			vnode->cb_version = cb->version;
 			vnode->cb_type = cb->type;
-			vnode->cb_expires_at = cb->expiry;
+			vnode->cb_expires_at = cb->expires_at;
 			vnode->cb_interest = afs_get_cb_interest(cbi);
 			set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
 		}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 28d08aac515da..78065af051530 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -131,6 +131,7 @@ struct afs_call {
 	bool			async;		/* T if asynchronous */
 	bool			ret_reply0;	/* T if should return reply[0] on success */
 	bool			upgrade;	/* T to request service upgrade */
+	bool			want_reply_time;	/* T if want reply_time */
 	u16			service_id;	/* Actual service ID (after upgrade) */
 	unsigned int		debug_id;	/* Trace ID */
 	u32			operation_ID;	/* operation ID for an incoming call */
@@ -144,6 +145,7 @@ struct afs_call {
 	};
 	afs_dataversion_t	expected_version; /* Updated version expected from store */
 	afs_dataversion_t	expected_version_2; /* 2nd updated version expected from store */
+	ktime_t			reply_time;	/* Time of first reply packet */
 };
 
 struct afs_call_type {
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index ce98e133caa6d..444ba0d511ef7 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -499,6 +499,12 @@ static void afs_deliver_to_call(struct afs_call *call)
 			return;
 		}
 
+		if (call->want_reply_time &&
+		    rxrpc_kernel_get_reply_time(call->net->socket,
+						call->rxcall,
+						&call->reply_time))
+			call->want_reply_time = false;
+
 		ret = call->type->deliver(call);
 		state = READ_ONCE(call->state);
 		switch (ret) {
-- 
GitLab


From f58db83fd3325a305cf615f4ffb0e6f60745ed8a Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 20 Oct 2018 00:57:58 +0100
Subject: [PATCH 0161/1890] afs: Get the target vnode in afs_rmdir() and get a
 callback on it

Get the target vnode in afs_rmdir() and validate it before we attempt the
deletion, The vnode pointer will be passed through to the delivery function
in a later patch so that the delivery function can mark it deleted.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/dir.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 8936731c59ff8..f2dd48d4363fa 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -1174,7 +1174,7 @@ static void afs_dir_remove_subdir(struct dentry *dentry)
 static int afs_rmdir(struct inode *dir, struct dentry *dentry)
 {
 	struct afs_fs_cursor fc;
-	struct afs_vnode *dvnode = AFS_FS_I(dir);
+	struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode = NULL;
 	struct key *key;
 	u64 data_version = dvnode->status.data_version;
 	int ret;
@@ -1188,6 +1188,14 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
 		goto error;
 	}
 
+	/* Try to make sure we have a callback promise on the victim. */
+	if (d_really_is_positive(dentry)) {
+		vnode = AFS_FS_I(d_inode(dentry));
+		ret = afs_validate(vnode, key);
+		if (ret < 0)
+			goto error_key;
+	}
+
 	ret = -ERESTARTSYS;
 	if (afs_begin_vnode_operation(&fc, dvnode, key)) {
 		while (afs_select_fileserver(&fc)) {
@@ -1206,6 +1214,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
 		}
 	}
 
+error_key:
 	key_put(key);
 error:
 	return ret;
-- 
GitLab


From d4936803a92b7d088086b1d7b8ecb5739d52c03b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 20 Oct 2018 00:57:58 +0100
Subject: [PATCH 0162/1890] afs: Expand data structure fields to support YFS

Expand fields in various data structures to support the expanded
information that YFS is capable of returning.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/afs.h      | 35 ++++++++++++++++++-----------------
 fs/afs/fsclient.c |  9 +++++----
 2 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/fs/afs/afs.h b/fs/afs/afs.h
index 417cd23529c54..d12ffb457e474 100644
--- a/fs/afs/afs.h
+++ b/fs/afs/afs.h
@@ -130,19 +130,18 @@ typedef u32 afs_access_t;
 struct afs_file_status {
 	u64			size;		/* file size */
 	afs_dataversion_t	data_version;	/* current data version */
-	time_t			mtime_client;	/* last time client changed data */
-	time_t			mtime_server;	/* last time server changed data */
-	unsigned		abort_code;	/* Abort if bulk-fetching this failed */
-
-	afs_file_type_t		type;		/* file type */
-	unsigned		nlink;		/* link count */
-	u32			author;		/* author ID */
-	u32			owner;		/* owner ID */
-	u32			group;		/* group ID */
+	struct timespec64	mtime_client;	/* Last time client changed data */
+	struct timespec64	mtime_server;	/* Last time server changed data */
+	s64			author;		/* author ID */
+	s64			owner;		/* owner ID */
+	s64			group;		/* group ID */
 	afs_access_t		caller_access;	/* access rights for authenticated caller */
 	afs_access_t		anon_access;	/* access rights for unauthenticated caller */
 	umode_t			mode;		/* UNIX mode */
+	afs_file_type_t		type;		/* file type */
+	u32			nlink;		/* link count */
 	s32			lock_count;	/* file lock count (0=UNLK -1=WRLCK +ve=#RDLCK */
+	u32			abort_code;	/* Abort if bulk-fetching this failed */
 };
 
 /*
@@ -159,25 +158,27 @@ struct afs_file_status {
  * AFS volume synchronisation information
  */
 struct afs_volsync {
-	time_t			creation;	/* volume creation time */
+	time64_t		creation;	/* volume creation time */
 };
 
 /*
  * AFS volume status record
  */
 struct afs_volume_status {
-	u32			vid;		/* volume ID */
-	u32			parent_id;	/* parent volume ID */
+	afs_volid_t		vid;		/* volume ID */
+	afs_volid_t		parent_id;	/* parent volume ID */
 	u8			online;		/* true if volume currently online and available */
 	u8			in_service;	/* true if volume currently in service */
 	u8			blessed;	/* same as in_service */
 	u8			needs_salvage;	/* true if consistency checking required */
 	u32			type;		/* volume type (afs_voltype_t) */
-	u32			min_quota;	/* minimum space set aside (blocks) */
-	u32			max_quota;	/* maximum space this volume may occupy (blocks) */
-	u32			blocks_in_use;	/* space this volume currently occupies (blocks) */
-	u32			part_blocks_avail; /* space available in volume's partition */
-	u32			part_max_blocks; /* size of volume's partition */
+	u64			min_quota;	/* minimum space set aside (blocks) */
+	u64			max_quota;	/* maximum space this volume may occupy (blocks) */
+	u64			blocks_in_use;	/* space this volume currently occupies (blocks) */
+	u64			part_blocks_avail; /* space available in volume's partition */
+	u64			part_max_blocks; /* size of volume's partition */
+	s64			vol_copy_date;
+	s64			vol_backup_date;
 };
 
 #define AFS_BLOCK_SIZE	1024
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 6105cdb171632..2da65309e0dee 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -69,8 +69,7 @@ void afs_update_inode_from_status(struct afs_vnode *vnode,
 	struct timespec64 t;
 	umode_t mode;
 
-	t.tv_sec = status->mtime_client;
-	t.tv_nsec = 0;
+	t = status->mtime_client;
 	vnode->vfs_inode.i_ctime = t;
 	vnode->vfs_inode.i_mtime = t;
 	vnode->vfs_inode.i_atime = t;
@@ -194,8 +193,10 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call,
 	EXTRACT_M(mode);
 	EXTRACT_M(group);
 
-	status->mtime_client = ntohl(xdr->mtime_client);
-	status->mtime_server = ntohl(xdr->mtime_server);
+	status->mtime_client.tv_sec = ntohl(xdr->mtime_client);
+	status->mtime_client.tv_nsec = 0;
+	status->mtime_server.tv_sec = ntohl(xdr->mtime_server);
+	status->mtime_server.tv_nsec = 0;
 	status->lock_count   = ntohl(xdr->lock_count);
 
 	size  = (u64)ntohl(xdr->size_lo);
-- 
GitLab


From 30062bd13e3659a309d249a06d5f4ebb4a5c5251 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 20 Oct 2018 00:57:58 +0100
Subject: [PATCH 0163/1890] afs: Implement YFS support in the fs client

Implement support for talking to YFS-variant fileservers in the cache
manager and the filesystem client.  These implement upgraded services on
the same port as their AFS services.

YFS fileservers provide expanded capabilities over AFS.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/Makefile            |    3 +-
 fs/afs/callback.c          |    9 +-
 fs/afs/dir.c               |   21 +-
 fs/afs/fsclient.c          |  104 +-
 fs/afs/internal.h          |   35 +-
 fs/afs/protocol_yfs.h      |  106 ++
 fs/afs/server.c            |    8 +
 fs/afs/yfsclient.c         | 2184 ++++++++++++++++++++++++++++++++++++
 include/trace/events/afs.h |   58 +-
 9 files changed, 2500 insertions(+), 28 deletions(-)
 create mode 100644 fs/afs/yfsclient.c

diff --git a/fs/afs/Makefile b/fs/afs/Makefile
index 03e9f7afea1b5..cc942b790cffd 100644
--- a/fs/afs/Makefile
+++ b/fs/afs/Makefile
@@ -33,7 +33,8 @@ kafs-y := \
 	vl_list.o \
 	volume.o \
 	write.o \
-	xattr.o
+	xattr.o \
+	yfsclient.o
 
 kafs-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_AFS_FS)  := kafs.o
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index df9bfee698ad3..1c7955f5cdaf2 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -210,12 +210,10 @@ void afs_init_callback_state(struct afs_server *server)
 /*
  * actually break a callback
  */
-void afs_break_callback(struct afs_vnode *vnode)
+void __afs_break_callback(struct afs_vnode *vnode)
 {
 	_enter("");
 
-	write_seqlock(&vnode->cb_lock);
-
 	clear_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
 	if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
 		vnode->cb_break++;
@@ -230,7 +228,12 @@ void afs_break_callback(struct afs_vnode *vnode)
 			afs_lock_may_be_available(vnode);
 		spin_unlock(&vnode->lock);
 	}
+}
 
+void afs_break_callback(struct afs_vnode *vnode)
+{
+	write_seqlock(&vnode->cb_lock);
+	__afs_break_callback(vnode);
 	write_sequnlock(&vnode->cb_lock);
 }
 
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index f2dd48d4363fa..43dea3b00c29b 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -1200,7 +1200,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
 	if (afs_begin_vnode_operation(&fc, dvnode, key)) {
 		while (afs_select_fileserver(&fc)) {
 			fc.cb_break = afs_calc_vnode_cb_break(dvnode);
-			afs_fs_remove(&fc, dentry->d_name.name, true,
+			afs_fs_remove(&fc, vnode, dentry->d_name.name, true,
 				      data_version);
 		}
 
@@ -1245,7 +1245,9 @@ static int afs_dir_remove_link(struct dentry *dentry, struct key *key,
 	if (d_really_is_positive(dentry)) {
 		struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
 
-		if (dir_valid) {
+		if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
+			/* Already done */
+		} else if (dir_valid) {
 			drop_nlink(&vnode->vfs_inode);
 			if (vnode->vfs_inode.i_nlink == 0) {
 				set_bit(AFS_VNODE_DELETED, &vnode->flags);
@@ -1274,7 +1276,7 @@ static int afs_dir_remove_link(struct dentry *dentry, struct key *key,
 static int afs_unlink(struct inode *dir, struct dentry *dentry)
 {
 	struct afs_fs_cursor fc;
-	struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode;
+	struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode = NULL;
 	struct key *key;
 	unsigned long d_version = (unsigned long)dentry->d_fsdata;
 	u64 data_version = dvnode->status.data_version;
@@ -1304,7 +1306,18 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
 	if (afs_begin_vnode_operation(&fc, dvnode, key)) {
 		while (afs_select_fileserver(&fc)) {
 			fc.cb_break = afs_calc_vnode_cb_break(dvnode);
-			afs_fs_remove(&fc, dentry->d_name.name, false,
+
+			if (test_bit(AFS_SERVER_FL_IS_YFS, &fc.cbi->server->flags) &&
+			    !test_bit(AFS_SERVER_FL_NO_RM2, &fc.cbi->server->flags)) {
+				yfs_fs_remove_file2(&fc, vnode, dentry->d_name.name,
+						    data_version);
+				if (fc.ac.error != -ECONNABORTED ||
+				    fc.ac.abort_code != RXGEN_OPCODE)
+					continue;
+				set_bit(AFS_SERVER_FL_NO_RM2, &fc.cbi->server->flags);
+			}
+
+			afs_fs_remove(&fc, vnode, dentry->d_name.name, false,
 				      data_version);
 		}
 
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 2da65309e0dee..3975969719de6 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -17,6 +17,7 @@
 #include "internal.h"
 #include "afs_fs.h"
 #include "xdr_fs.h"
+#include "protocol_yfs.h"
 
 static const struct afs_fid afs_zero_fid;
 
@@ -312,14 +313,18 @@ static void xdr_decode_AFSVolSync(const __be32 **_bp,
 				  struct afs_volsync *volsync)
 {
 	const __be32 *bp = *_bp;
+	u32 creation;
 
-	volsync->creation = ntohl(*bp++);
+	creation = ntohl(*bp++);
 	bp++; /* spare2 */
 	bp++; /* spare3 */
 	bp++; /* spare4 */
 	bp++; /* spare5 */
 	bp++; /* spare6 */
 	*_bp = bp;
+
+	if (volsync)
+		volsync->creation = creation;
 }
 
 /*
@@ -380,6 +385,8 @@ static void xdr_decode_AFSFetchVolumeStatus(const __be32 **_bp,
 	vs->blocks_in_use	= ntohl(*bp++);
 	vs->part_blocks_avail	= ntohl(*bp++);
 	vs->part_max_blocks	= ntohl(*bp++);
+	vs->vol_copy_date	= 0;
+	vs->vol_backup_date	= 0;
 	*_bp = bp;
 }
 
@@ -405,8 +412,7 @@ static int afs_deliver_fs_fetch_status_vnode(struct afs_call *call)
 	if (ret < 0)
 		return ret;
 	xdr_decode_AFSCallBack(call, vnode, &bp);
-	if (call->reply[1])
-		xdr_decode_AFSVolSync(&bp, call->reply[1]);
+	xdr_decode_AFSVolSync(&bp, call->reply[1]);
 
 	_leave(" = 0 [done]");
 	return 0;
@@ -433,6 +439,9 @@ int afs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsy
 	struct afs_net *net = afs_v2net(vnode);
 	__be32 *bp;
 
+	if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+		return yfs_fs_fetch_file_status(fc, volsync, new_inode);
+
 	_enter(",%x,{%llx:%llu},,",
 	       key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
 
@@ -567,8 +576,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
 		if (ret < 0)
 			return ret;
 		xdr_decode_AFSCallBack(call, vnode, &bp);
-		if (call->reply[1])
-			xdr_decode_AFSVolSync(&bp, call->reply[1]);
+		xdr_decode_AFSVolSync(&bp, call->reply[1]);
 
 		call->unmarshall++;
 
@@ -665,6 +673,9 @@ int afs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req)
 	struct afs_net *net = afs_v2net(vnode);
 	__be32 *bp;
 
+	if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+		return yfs_fs_fetch_data(fc, req);
+
 	if (upper_32_bits(req->pos) ||
 	    upper_32_bits(req->len) ||
 	    upper_32_bits(req->pos + req->len))
@@ -765,6 +776,15 @@ int afs_fs_create(struct afs_fs_cursor *fc,
 	size_t namesz, reqsz, padsz;
 	__be32 *bp;
 
+	if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)){
+		if (S_ISDIR(mode))
+			return yfs_fs_make_dir(fc, name, mode, current_data_version,
+					       newfid, newstatus, newcb);
+		else
+			return yfs_fs_create_file(fc, name, mode, current_data_version,
+						  newfid, newstatus, newcb);
+	}
+
 	_enter("");
 
 	namesz = strlen(name);
@@ -857,15 +877,18 @@ static const struct afs_call_type afs_RXFSRemoveDir = {
 /*
  * remove a file or directory
  */
-int afs_fs_remove(struct afs_fs_cursor *fc, const char *name, bool isdir,
-		  u64 current_data_version)
+int afs_fs_remove(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
+		  const char *name, bool isdir, u64 current_data_version)
 {
-	struct afs_vnode *vnode = fc->vnode;
+	struct afs_vnode *dvnode = fc->vnode;
 	struct afs_call *call;
-	struct afs_net *net = afs_v2net(vnode);
+	struct afs_net *net = afs_v2net(dvnode);
 	size_t namesz, reqsz, padsz;
 	__be32 *bp;
 
+	if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+		return yfs_fs_remove(fc, vnode, name, isdir, current_data_version);
+
 	_enter("");
 
 	namesz = strlen(name);
@@ -879,15 +902,16 @@ int afs_fs_remove(struct afs_fs_cursor *fc, const char *name, bool isdir,
 		return -ENOMEM;
 
 	call->key = fc->key;
-	call->reply[0] = vnode;
+	call->reply[0] = dvnode;
+	call->reply[1] = vnode;
 	call->expected_version = current_data_version + 1;
 
 	/* marshall the parameters */
 	bp = call->request;
 	*bp++ = htonl(isdir ? FSREMOVEDIR : FSREMOVEFILE);
-	*bp++ = htonl(vnode->fid.vid);
-	*bp++ = htonl(vnode->fid.vnode);
-	*bp++ = htonl(vnode->fid.unique);
+	*bp++ = htonl(dvnode->fid.vid);
+	*bp++ = htonl(dvnode->fid.vnode);
+	*bp++ = htonl(dvnode->fid.unique);
 	*bp++ = htonl(namesz);
 	memcpy(bp, name, namesz);
 	bp = (void *) bp + namesz;
@@ -897,7 +921,7 @@ int afs_fs_remove(struct afs_fs_cursor *fc, const char *name, bool isdir,
 	}
 
 	afs_use_fs_server(call, fc->cbi);
-	trace_afs_make_fs_call(call, &vnode->fid);
+	trace_afs_make_fs_call(call, &dvnode->fid);
 	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
 }
 
@@ -953,6 +977,9 @@ int afs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
 	size_t namesz, reqsz, padsz;
 	__be32 *bp;
 
+	if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+		return yfs_fs_link(fc, vnode, name, current_data_version);
+
 	_enter("");
 
 	namesz = strlen(name);
@@ -1047,6 +1074,10 @@ int afs_fs_symlink(struct afs_fs_cursor *fc,
 	size_t namesz, reqsz, padsz, c_namesz, c_padsz;
 	__be32 *bp;
 
+	if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+		return yfs_fs_symlink(fc, name, contents, current_data_version,
+				      newfid, newstatus);
+
 	_enter("");
 
 	namesz = strlen(name);
@@ -1159,6 +1190,12 @@ int afs_fs_rename(struct afs_fs_cursor *fc,
 	size_t reqsz, o_namesz, o_padsz, n_namesz, n_padsz;
 	__be32 *bp;
 
+	if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+		return yfs_fs_rename(fc, orig_name,
+				     new_dvnode, new_name,
+				     current_orig_data_version,
+				     current_new_data_version);
+
 	_enter("");
 
 	o_namesz = strlen(orig_name);
@@ -1329,6 +1366,9 @@ int afs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping,
 	loff_t size, pos, i_size;
 	__be32 *bp;
 
+	if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+		return yfs_fs_store_data(fc, mapping, first, last, offset, to);
+
 	_enter(",%x,{%llx:%llu},,",
 	       key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
 
@@ -1544,6 +1584,9 @@ int afs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr)
 	struct afs_net *net = afs_v2net(vnode);
 	__be32 *bp;
 
+	if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+		return yfs_fs_setattr(fc, attr);
+
 	if (attr->ia_valid & ATTR_SIZE)
 		return afs_fs_setattr_size(fc, attr);
 
@@ -1728,6 +1771,9 @@ int afs_fs_get_volume_status(struct afs_fs_cursor *fc,
 	__be32 *bp;
 	void *tmpbuf;
 
+	if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+		return yfs_fs_get_volume_status(fc, vs);
+
 	_enter("");
 
 	tmpbuf = kmalloc(AFSOPAQUEMAX, GFP_KERNEL);
@@ -1817,6 +1863,9 @@ int afs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type)
 	struct afs_net *net = afs_v2net(vnode);
 	__be32 *bp;
 
+	if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+		return yfs_fs_set_lock(fc, type);
+
 	_enter("");
 
 	call = afs_alloc_flat_call(net, &afs_RXFSSetLock, 5 * 4, 6 * 4);
@@ -1849,6 +1898,9 @@ int afs_fs_extend_lock(struct afs_fs_cursor *fc)
 	struct afs_net *net = afs_v2net(vnode);
 	__be32 *bp;
 
+	if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+		return yfs_fs_extend_lock(fc);
+
 	_enter("");
 
 	call = afs_alloc_flat_call(net, &afs_RXFSExtendLock, 4 * 4, 6 * 4);
@@ -1880,6 +1932,9 @@ int afs_fs_release_lock(struct afs_fs_cursor *fc)
 	struct afs_net *net = afs_v2net(vnode);
 	__be32 *bp;
 
+	if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+		return yfs_fs_release_lock(fc);
+
 	_enter("");
 
 	call = afs_alloc_flat_call(net, &afs_RXFSReleaseLock, 4 * 4, 6 * 4);
@@ -1951,6 +2006,7 @@ int afs_fs_give_up_all_callbacks(struct afs_net *net,
  */
 static int afs_deliver_fs_get_capabilities(struct afs_call *call)
 {
+	struct afs_server *server = call->reply[0];
 	u32 count;
 	int ret;
 
@@ -1986,6 +2042,11 @@ static int afs_deliver_fs_get_capabilities(struct afs_call *call)
 		break;
 	}
 
+	if (call->service_id == YFS_FS_SERVICE)
+		set_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
+	else
+		clear_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
+
 	_leave(" = 0 [done]");
 	return 0;
 }
@@ -2019,6 +2080,8 @@ int afs_fs_get_capabilities(struct afs_net *net,
 		return -ENOMEM;
 
 	call->key = key;
+	call->reply[0] = server;
+	call->upgrade = true;
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -2054,8 +2117,7 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call)
 	if (ret < 0)
 		return ret;
 	xdr_decode_AFSCallBack_raw(call, &bp, callback);
-	if (volsync)
-		xdr_decode_AFSVolSync(&bp, volsync);
+	xdr_decode_AFSVolSync(&bp, volsync);
 
 	_leave(" = 0 [done]");
 	return 0;
@@ -2084,6 +2146,9 @@ int afs_fs_fetch_status(struct afs_fs_cursor *fc,
 	struct afs_call *call;
 	__be32 *bp;
 
+	if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+		return yfs_fs_fetch_status(fc, net, fid, status, callback, volsync);
+
 	_enter(",%x,{%llx:%llu},,",
 	       key_serial(fc->key), fid->vid, fid->vnode);
 
@@ -2218,8 +2283,7 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
 			return ret;
 
 		bp = call->buffer;
-		if (call->reply[3])
-			xdr_decode_AFSVolSync(&bp, call->reply[3]);
+		xdr_decode_AFSVolSync(&bp, call->reply[3]);
 
 		call->unmarshall++;
 
@@ -2256,6 +2320,10 @@ int afs_fs_inline_bulk_status(struct afs_fs_cursor *fc,
 	__be32 *bp;
 	int i;
 
+	if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags))
+		return yfs_fs_inline_bulk_status(fc, net, fids, statuses, callbacks,
+						 nr_fids, volsync);
+
 	_enter(",%x,{%llx:%llu},%u",
 	       key_serial(fc->key), fids[0].vid, fids[1].vnode, nr_fids);
 
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 78065af051530..ce79bd514331c 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -459,6 +459,8 @@ struct afs_server {
 #define AFS_SERVER_FL_PROBING	6		/* Fileserver is being probed */
 #define AFS_SERVER_FL_NO_IBULK	7		/* Fileserver doesn't support FS.InlineBulkStatus */
 #define AFS_SERVER_FL_MAY_HAVE_CB 8		/* May have callbacks on this fileserver */
+#define AFS_SERVER_FL_IS_YFS	9		/* Server is YFS not AFS */
+#define AFS_SERVER_FL_NO_RM2	10		/* Fileserver doesn't support YFS.RemoveFile2 */
 	atomic_t		usage;
 	u32			addr_version;	/* Address list version */
 
@@ -751,6 +753,7 @@ extern struct fscache_cookie_def afs_vnode_cache_index_def;
  * callback.c
  */
 extern void afs_init_callback_state(struct afs_server *);
+extern void __afs_break_callback(struct afs_vnode *);
 extern void afs_break_callback(struct afs_vnode *);
 extern void afs_break_callbacks(struct afs_server *, size_t, struct afs_callback_break*);
 
@@ -864,7 +867,7 @@ extern int afs_fs_give_up_callbacks(struct afs_net *, struct afs_server *);
 extern int afs_fs_fetch_data(struct afs_fs_cursor *, struct afs_read *);
 extern int afs_fs_create(struct afs_fs_cursor *, const char *, umode_t, u64,
 			 struct afs_fid *, struct afs_file_status *, struct afs_callback *);
-extern int afs_fs_remove(struct afs_fs_cursor *, const char *, bool, u64);
+extern int afs_fs_remove(struct afs_fs_cursor *, struct afs_vnode *, const char *, bool, u64);
 extern int afs_fs_link(struct afs_fs_cursor *, struct afs_vnode *, const char *, u64);
 extern int afs_fs_symlink(struct afs_fs_cursor *, const char *, const char *, u64,
 			  struct afs_fid *, struct afs_file_status *);
@@ -1228,6 +1231,36 @@ extern int afs_launder_page(struct page *);
 extern const struct xattr_handler *afs_xattr_handlers[];
 extern ssize_t afs_listxattr(struct dentry *, char *, size_t);
 
+/*
+ * yfsclient.c
+ */
+extern int yfs_fs_fetch_file_status(struct afs_fs_cursor *, struct afs_volsync *, bool);
+extern int yfs_fs_fetch_data(struct afs_fs_cursor *, struct afs_read *);
+extern int yfs_fs_create_file(struct afs_fs_cursor *, const char *, umode_t, u64,
+			      struct afs_fid *, struct afs_file_status *, struct afs_callback *);
+extern int yfs_fs_make_dir(struct afs_fs_cursor *, const char *, umode_t, u64,
+			 struct afs_fid *, struct afs_file_status *, struct afs_callback *);
+extern int yfs_fs_remove_file2(struct afs_fs_cursor *, struct afs_vnode *, const char *, u64);
+extern int yfs_fs_remove(struct afs_fs_cursor *, struct afs_vnode *, const char *, bool, u64);
+extern int yfs_fs_link(struct afs_fs_cursor *, struct afs_vnode *, const char *, u64);
+extern int yfs_fs_symlink(struct afs_fs_cursor *, const char *, const char *, u64,
+			  struct afs_fid *, struct afs_file_status *);
+extern int yfs_fs_rename(struct afs_fs_cursor *, const char *,
+			 struct afs_vnode *, const char *, u64, u64);
+extern int yfs_fs_store_data(struct afs_fs_cursor *, struct address_space *,
+			     pgoff_t, pgoff_t, unsigned, unsigned);
+extern int yfs_fs_setattr(struct afs_fs_cursor *, struct iattr *);
+extern int yfs_fs_get_volume_status(struct afs_fs_cursor *, struct afs_volume_status *);
+extern int yfs_fs_set_lock(struct afs_fs_cursor *, afs_lock_type_t);
+extern int yfs_fs_extend_lock(struct afs_fs_cursor *);
+extern int yfs_fs_release_lock(struct afs_fs_cursor *);
+extern int yfs_fs_fetch_status(struct afs_fs_cursor *, struct afs_net *,
+			       struct afs_fid *, struct afs_file_status *,
+			       struct afs_callback *, struct afs_volsync *);
+extern int yfs_fs_inline_bulk_status(struct afs_fs_cursor *, struct afs_net *,
+				     struct afs_fid *, struct afs_file_status *,
+				     struct afs_callback *, unsigned int,
+				     struct afs_volsync *);
 
 /*
  * Miscellaneous inline functions.
diff --git a/fs/afs/protocol_yfs.h b/fs/afs/protocol_yfs.h
index b7b211dd98576..07bc10f076aac 100644
--- a/fs/afs/protocol_yfs.h
+++ b/fs/afs/protocol_yfs.h
@@ -30,6 +30,36 @@ enum YFS_CM_Operations {
 	YFSCBCallBack		= 64204,
 };
 
+enum YFS_FS_Operations {
+	YFSFETCHACL		= 64131, /* YFS Fetch file ACL */
+	YFSFETCHSTATUS		= 64132, /* YFS Fetch file status */
+	YFSSTOREACL		= 64134, /* YFS Store file ACL */
+	YFSSTORESTATUS		= 64135, /* YFS Store file status */
+	YFSREMOVEFILE		= 64136, /* YFS Remove a file */
+	YFSCREATEFILE		= 64137, /* YFS Create a file */
+	YFSRENAME		= 64138, /* YFS Rename or move a file or directory */
+	YFSSYMLINK		= 64139, /* YFS Create a symbolic link */
+	YFSLINK			= 64140, /* YFS Create a hard link */
+	YFSMAKEDIR		= 64141, /* YFS Create a directory */
+	YFSREMOVEDIR		= 64142, /* YFS Remove a directory */
+	YFSGETVOLUMESTATUS	= 64149, /* YFS Get volume status information */
+	YFSSETVOLUMESTATUS	= 64150, /* YFS Set volume status information */
+	YFSSETLOCK		= 64156, /* YFS Request a file lock */
+	YFSEXTENDLOCK		= 64157, /* YFS Extend a file lock */
+	YFSRELEASELOCK		= 64158, /* YFS Release a file lock */
+	YFSLOOKUP		= 64161, /* YFS lookup file in directory */
+	YFSFLUSHCPS		= 64165,
+	YFSFETCHOPAQUEACL	= 64168,
+	YFSWHOAMI		= 64170,
+	YFSREMOVEACL		= 64171,
+	YFSREMOVEFILE2		= 64173,
+	YFSSTOREOPAQUEACL2	= 64174,
+	YFSINLINEBULKSTATUS	= 64536, /* YFS Fetch multiple file statuses with errors */
+	YFSFETCHDATA64		= 64537, /* YFS Fetch file data */
+	YFSSTOREDATA64		= 64538, /* YFS Store file data */
+	YFSUPDATESYMLINK	= 64540,
+};
+
 struct yfs_xdr_u64 {
 	__be32			msw;
 	__be32			lsw;
@@ -55,3 +85,79 @@ struct yfs_xdr_YFSFid {
 	struct yfs_xdr_u64	volume;
 	struct yfs_xdr_vnode	vnode;
 } __packed;
+
+
+struct yfs_xdr_YFSFetchStatus {
+	__be32			type;
+	__be32			nlink;
+	struct yfs_xdr_u64	size;
+	struct yfs_xdr_u64	data_version;
+	struct yfs_xdr_u64	author;
+	struct yfs_xdr_u64	owner;
+	struct yfs_xdr_u64	group;
+	__be32			mode;
+	__be32			caller_access;
+	__be32			anon_access;
+	struct yfs_xdr_vnode	parent;
+	__be32			data_access_protocol;
+	struct yfs_xdr_u64	mtime_client;
+	struct yfs_xdr_u64	mtime_server;
+	__be32			lock_count;
+	__be32			abort_code;
+} __packed;
+
+struct yfs_xdr_YFSCallBack {
+	__be32			version;
+	struct yfs_xdr_u64	expiration_time;
+	__be32			type;
+} __packed;
+
+struct yfs_xdr_YFSStoreStatus {
+	__be32			mask;
+	__be32			mode;
+	struct yfs_xdr_u64	mtime_client;
+	struct yfs_xdr_u64	owner;
+	struct yfs_xdr_u64	group;
+} __packed;
+
+struct yfs_xdr_RPCFlags {
+	__be32			rpc_flags;
+} __packed;
+
+struct yfs_xdr_YFSVolSync {
+	struct yfs_xdr_u64	vol_creation_date;
+	struct yfs_xdr_u64	vol_update_date;
+	struct yfs_xdr_u64	max_quota;
+	struct yfs_xdr_u64	blocks_in_use;
+	struct yfs_xdr_u64	blocks_avail;
+} __packed;
+
+enum yfs_volume_type {
+	yfs_volume_type_ro = 0,
+	yfs_volume_type_rw = 1,
+};
+
+#define yfs_FVSOnline		0x1
+#define yfs_FVSInservice	0x2
+#define yfs_FVSBlessed		0x4
+#define yfs_FVSNeedsSalvage	0x8
+
+struct yfs_xdr_YFSFetchVolumeStatus {
+	struct yfs_xdr_u64	vid;
+	struct yfs_xdr_u64	parent_id;
+	__be32			flags;
+	__be32			type;
+	struct yfs_xdr_u64	max_quota;
+	struct yfs_xdr_u64	blocks_in_use;
+	struct yfs_xdr_u64	part_blocks_avail;
+	struct yfs_xdr_u64	part_max_blocks;
+	struct yfs_xdr_u64	vol_copy_date;
+	struct yfs_xdr_u64	vol_backup_date;
+} __packed;
+
+struct yfs_xdr_YFSStoreVolumeStatus {
+	__be32			mask;
+	struct yfs_xdr_u64	min_quota;
+	struct yfs_xdr_u64	max_quota;
+	struct yfs_xdr_u64	file_quota;
+} __packed;
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 1a087eb8f2d72..aa35cfae54400 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -13,6 +13,7 @@
 #include <linux/slab.h>
 #include "afs_fs.h"
 #include "internal.h"
+#include "protocol_yfs.h"
 
 static unsigned afs_server_gc_delay = 10;	/* Server record timeout in seconds */
 static unsigned afs_server_update_delay = 30;	/* Time till VLDB recheck in secs */
@@ -513,6 +514,8 @@ void afs_purge_servers(struct afs_net *net)
  */
 static bool afs_do_probe_fileserver(struct afs_fs_cursor *fc)
 {
+	int i;
+
 	_enter("");
 
 	fc->ac.addr = NULL;
@@ -526,6 +529,11 @@ static bool afs_do_probe_fileserver(struct afs_fs_cursor *fc)
 					&fc->ac, fc->key);
 		switch (fc->ac.error) {
 		case 0:
+			if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) {
+				for (i = 0; i < fc->ac.alist->nr_addrs; i++)
+					fc->ac.alist->addrs[i].srx_service =
+						YFS_FS_SERVICE;
+			}
 			afs_end_cursor(&fc->ac);
 			set_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags);
 			return true;
diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c
new file mode 100644
index 0000000000000..d5e3f00950406
--- /dev/null
+++ b/fs/afs/yfsclient.c
@@ -0,0 +1,2184 @@
+/* YFS File Server client stubs
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/circ_buf.h>
+#include <linux/iversion.h>
+#include "internal.h"
+#include "afs_fs.h"
+#include "xdr_fs.h"
+#include "protocol_yfs.h"
+
+static const struct afs_fid afs_zero_fid;
+
+static inline void afs_use_fs_server(struct afs_call *call, struct afs_cb_interest *cbi)
+{
+	call->cbi = afs_get_cb_interest(cbi);
+}
+
+#define xdr_size(x) (sizeof(*x) / sizeof(__be32))
+
+static void xdr_decode_YFSFid(const __be32 **_bp, struct afs_fid *fid)
+{
+	const struct yfs_xdr_YFSFid *x = (const void *)*_bp;
+
+	fid->vid	= xdr_to_u64(x->volume);
+	fid->vnode	= xdr_to_u64(x->vnode.lo);
+	fid->vnode_hi	= ntohl(x->vnode.hi);
+	fid->unique	= ntohl(x->vnode.unique);
+	*_bp += xdr_size(x);
+}
+
+static __be32 *xdr_encode_u32(__be32 *bp, u32 n)
+{
+	*bp++ = htonl(n);
+	return bp;
+}
+
+static __be32 *xdr_encode_u64(__be32 *bp, u64 n)
+{
+	struct yfs_xdr_u64 *x = (void *)bp;
+
+	*x = u64_to_xdr(n);
+	return bp + xdr_size(x);
+}
+
+static __be32 *xdr_encode_YFSFid(__be32 *bp, struct afs_fid *fid)
+{
+	struct yfs_xdr_YFSFid *x = (void *)bp;
+
+	x->volume	= u64_to_xdr(fid->vid);
+	x->vnode.lo	= u64_to_xdr(fid->vnode);
+	x->vnode.hi	= htonl(fid->vnode_hi);
+	x->vnode.unique	= htonl(fid->unique);
+	return bp + xdr_size(x);
+}
+
+static size_t xdr_strlen(unsigned int len)
+{
+	return sizeof(__be32) + round_up(len, sizeof(__be32));
+}
+
+static __be32 *xdr_encode_string(__be32 *bp, const char *p, unsigned int len)
+{
+	bp = xdr_encode_u32(bp, len);
+	bp = memcpy(bp, p, len);
+	if (len & 3) {
+		unsigned int pad = 4 - (len & 3);
+
+		memset((u8 *)bp + len, 0, pad);
+		len += pad;
+	}
+
+	return bp + len / sizeof(__be32);
+}
+
+static s64 linux_to_yfs_time(const struct timespec64 *t)
+{
+	/* Convert to 100ns intervals. */
+	return (u64)t->tv_sec * 10000000 + t->tv_nsec/100;
+}
+
+static __be32 *xdr_encode_YFSStoreStatus_mode(__be32 *bp, mode_t mode)
+{
+	struct yfs_xdr_YFSStoreStatus *x = (void *)bp;
+
+	x->mask		= htonl(AFS_SET_MODE);
+	x->mode		= htonl(mode & S_IALLUGO);
+	x->mtime_client	= u64_to_xdr(0);
+	x->owner	= u64_to_xdr(0);
+	x->group	= u64_to_xdr(0);
+	return bp + xdr_size(x);
+}
+
+static __be32 *xdr_encode_YFSStoreStatus_mtime(__be32 *bp, const struct timespec64 *t)
+{
+	struct yfs_xdr_YFSStoreStatus *x = (void *)bp;
+	s64 mtime = linux_to_yfs_time(t);
+
+	x->mask		= htonl(AFS_SET_MTIME);
+	x->mode		= htonl(0);
+	x->mtime_client	= u64_to_xdr(mtime);
+	x->owner	= u64_to_xdr(0);
+	x->group	= u64_to_xdr(0);
+	return bp + xdr_size(x);
+}
+
+/*
+ * Convert a signed 100ns-resolution 64-bit time into a timespec.
+ */
+static struct timespec64 yfs_time_to_linux(s64 t)
+{
+	struct timespec64 ts;
+	u64 abs_t;
+
+	/*
+	 * Unfortunately can not use normal 64 bit division on 32 bit arch, but
+	 * the alternative, do_div, does not work with negative numbers so have
+	 * to special case them
+	 */
+	if (t < 0) {
+		abs_t = -t;
+		ts.tv_nsec = (time64_t)(do_div(abs_t, 10000000) * 100);
+		ts.tv_nsec = -ts.tv_nsec;
+		ts.tv_sec = -abs_t;
+	} else {
+		abs_t = t;
+		ts.tv_nsec = (time64_t)do_div(abs_t, 10000000) * 100;
+		ts.tv_sec = abs_t;
+	}
+
+	return ts;
+}
+
+static struct timespec64 xdr_to_time(const struct yfs_xdr_u64 xdr)
+{
+	s64 t = xdr_to_u64(xdr);
+
+	return yfs_time_to_linux(t);
+}
+
+static void yfs_check_req(struct afs_call *call, __be32 *bp)
+{
+	size_t len = (void *)bp - call->request;
+
+	if (len > call->request_size)
+		pr_err("kAFS: %s: Request buffer overflow (%zu>%u)\n",
+		       call->type->name, len, call->request_size);
+	else if (len < call->request_size)
+		pr_warning("kAFS: %s: Request buffer underflow (%zu<%u)\n",
+			   call->type->name, len, call->request_size);
+}
+
+/*
+ * Dump a bad file status record.
+ */
+static void xdr_dump_bad(const __be32 *bp)
+{
+	__be32 x[4];
+	int i;
+
+	pr_notice("YFS XDR: Bad status record\n");
+	for (i = 0; i < 5 * 4 * 4; i += 16) {
+		memcpy(x, bp, 16);
+		bp += 4;
+		pr_notice("%03x: %08x %08x %08x %08x\n",
+			  i, ntohl(x[0]), ntohl(x[1]), ntohl(x[2]), ntohl(x[3]));
+	}
+
+	memcpy(x, bp, 4);
+	pr_notice("0x50: %08x\n", ntohl(x[0]));
+}
+
+/*
+ * Decode a YFSFetchStatus block
+ */
+static int xdr_decode_YFSFetchStatus(struct afs_call *call,
+				     const __be32 **_bp,
+				     struct afs_file_status *status,
+				     struct afs_vnode *vnode,
+				     const afs_dataversion_t *expected_version,
+				     struct afs_read *read_req)
+{
+	const struct yfs_xdr_YFSFetchStatus *xdr = (const void *)*_bp;
+	u32 type;
+	u8 flags = 0;
+
+	status->abort_code = ntohl(xdr->abort_code);
+	if (status->abort_code != 0) {
+		if (vnode && status->abort_code == VNOVNODE) {
+			set_bit(AFS_VNODE_DELETED, &vnode->flags);
+			status->nlink = 0;
+			__afs_break_callback(vnode);
+		}
+		return 0;
+	}
+
+	type = ntohl(xdr->type);
+	switch (type) {
+	case AFS_FTYPE_FILE:
+	case AFS_FTYPE_DIR:
+	case AFS_FTYPE_SYMLINK:
+		if (type != status->type &&
+		    vnode &&
+		    !test_bit(AFS_VNODE_UNSET, &vnode->flags)) {
+			pr_warning("Vnode %llx:%llx:%x changed type %u to %u\n",
+				   vnode->fid.vid,
+				   vnode->fid.vnode,
+				   vnode->fid.unique,
+				   status->type, type);
+			goto bad;
+		}
+		status->type = type;
+		break;
+	default:
+		goto bad;
+	}
+
+#define EXTRACT_M4(FIELD)					\
+	do {							\
+		u32 x = ntohl(xdr->FIELD);			\
+		if (status->FIELD != x) {			\
+			flags |= AFS_VNODE_META_CHANGED;	\
+			status->FIELD = x;			\
+		}						\
+	} while (0)
+
+#define EXTRACT_M8(FIELD)					\
+	do {							\
+		u64 x = xdr_to_u64(xdr->FIELD);			\
+		if (status->FIELD != x) {			\
+			flags |= AFS_VNODE_META_CHANGED;	\
+			status->FIELD = x;			\
+		}						\
+	} while (0)
+
+#define EXTRACT_D8(FIELD)					\
+	do {							\
+		u64 x = xdr_to_u64(xdr->FIELD);			\
+		if (status->FIELD != x) {			\
+			flags |= AFS_VNODE_DATA_CHANGED;	\
+			status->FIELD = x;			\
+		}						\
+	} while (0)
+
+	EXTRACT_M4(nlink);
+	EXTRACT_D8(size);
+	EXTRACT_D8(data_version);
+	EXTRACT_M8(author);
+	EXTRACT_M8(owner);
+	EXTRACT_M8(group);
+	EXTRACT_M4(mode);
+	EXTRACT_M4(caller_access); /* call ticket dependent */
+	EXTRACT_M4(anon_access);
+
+	status->mtime_client = xdr_to_time(xdr->mtime_client);
+	status->mtime_server = xdr_to_time(xdr->mtime_server);
+	status->lock_count   = ntohl(xdr->lock_count);
+
+	if (read_req) {
+		read_req->data_version = status->data_version;
+		read_req->file_size = status->size;
+	}
+
+	*_bp += xdr_size(xdr);
+
+	if (vnode) {
+		if (test_bit(AFS_VNODE_UNSET, &vnode->flags))
+			flags |= AFS_VNODE_NOT_YET_SET;
+		afs_update_inode_from_status(vnode, status, expected_version,
+					     flags);
+	}
+
+	return 0;
+
+bad:
+	xdr_dump_bad(*_bp);
+	return afs_protocol_error(call, -EBADMSG, afs_eproto_bad_status);
+}
+
+/*
+ * Decode the file status.  We need to lock the target vnode if we're going to
+ * update its status so that stat() sees the attributes update atomically.
+ */
+static int yfs_decode_status(struct afs_call *call,
+			     const __be32 **_bp,
+			     struct afs_file_status *status,
+			     struct afs_vnode *vnode,
+			     const afs_dataversion_t *expected_version,
+			     struct afs_read *read_req)
+{
+	int ret;
+
+	if (!vnode)
+		return xdr_decode_YFSFetchStatus(call, _bp, status, vnode,
+						 expected_version, read_req);
+
+	write_seqlock(&vnode->cb_lock);
+	ret = xdr_decode_YFSFetchStatus(call, _bp, status, vnode,
+					expected_version, read_req);
+	write_sequnlock(&vnode->cb_lock);
+	return ret;
+}
+
+/*
+ * Decode a YFSCallBack block
+ */
+static void xdr_decode_YFSCallBack(struct afs_call *call,
+				   struct afs_vnode *vnode,
+				   const __be32 **_bp)
+{
+	struct yfs_xdr_YFSCallBack *xdr = (void *)*_bp;
+	struct afs_cb_interest *old, *cbi = call->cbi;
+	u64 cb_expiry;
+
+	write_seqlock(&vnode->cb_lock);
+
+	if (call->cb_break == afs_cb_break_sum(vnode, cbi)) {
+		cb_expiry = xdr_to_u64(xdr->expiration_time);
+		do_div(cb_expiry, 10 * 1000 * 1000);
+		vnode->cb_version	= ntohl(xdr->version);
+		vnode->cb_type		= ntohl(xdr->type);
+		vnode->cb_expires_at	= cb_expiry + ktime_get_real_seconds();
+		old = vnode->cb_interest;
+		if (old != call->cbi) {
+			vnode->cb_interest = cbi;
+			cbi = old;
+		}
+		set_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+	}
+
+	write_sequnlock(&vnode->cb_lock);
+	call->cbi = cbi;
+	*_bp += xdr_size(xdr);
+}
+
+static void xdr_decode_YFSCallBack_raw(const __be32 **_bp,
+				       struct afs_callback *cb)
+{
+	struct yfs_xdr_YFSCallBack *x = (void *)*_bp;
+	u64 cb_expiry;
+
+	cb_expiry = xdr_to_u64(x->expiration_time);
+	do_div(cb_expiry, 10 * 1000 * 1000);
+	cb->version	= ntohl(x->version);
+	cb->type	= ntohl(x->type);
+	cb->expires_at	= cb_expiry + ktime_get_real_seconds();
+
+	*_bp += xdr_size(x);
+}
+
+/*
+ * Decode a YFSVolSync block
+ */
+static void xdr_decode_YFSVolSync(const __be32 **_bp,
+				  struct afs_volsync *volsync)
+{
+	struct yfs_xdr_YFSVolSync *x = (void *)*_bp;
+	u64 creation;
+
+	if (volsync) {
+		creation = xdr_to_u64(x->vol_creation_date);
+		do_div(creation, 10 * 1000 * 1000);
+		volsync->creation = creation;
+	}
+
+	*_bp += xdr_size(x);
+}
+
+/*
+ * Encode the requested attributes into a YFSStoreStatus block
+ */
+static __be32 *xdr_encode_YFS_StoreStatus(__be32 *bp, struct iattr *attr)
+{
+	struct yfs_xdr_YFSStoreStatus *x = (void *)bp;
+	s64 mtime = 0, owner = 0, group = 0;
+	u32 mask = 0, mode = 0;
+
+	mask = 0;
+	if (attr->ia_valid & ATTR_MTIME) {
+		mask |= AFS_SET_MTIME;
+		mtime = linux_to_yfs_time(&attr->ia_mtime);
+	}
+
+	if (attr->ia_valid & ATTR_UID) {
+		mask |= AFS_SET_OWNER;
+		owner = from_kuid(&init_user_ns, attr->ia_uid);
+	}
+
+	if (attr->ia_valid & ATTR_GID) {
+		mask |= AFS_SET_GROUP;
+		group = from_kgid(&init_user_ns, attr->ia_gid);
+	}
+
+	if (attr->ia_valid & ATTR_MODE) {
+		mask |= AFS_SET_MODE;
+		mode = attr->ia_mode & S_IALLUGO;
+	}
+
+	x->mask		= htonl(mask);
+	x->mode		= htonl(mode);
+	x->mtime_client	= u64_to_xdr(mtime);
+	x->owner	= u64_to_xdr(owner);
+	x->group	= u64_to_xdr(group);
+	return bp + xdr_size(x);
+}
+
+/*
+ * Decode a YFSFetchVolumeStatus block.
+ */
+static void xdr_decode_YFSFetchVolumeStatus(const __be32 **_bp,
+					    struct afs_volume_status *vs)
+{
+	const struct yfs_xdr_YFSFetchVolumeStatus *x = (const void *)*_bp;
+	u32 flags;
+
+	vs->vid			= xdr_to_u64(x->vid);
+	vs->parent_id		= xdr_to_u64(x->parent_id);
+	flags			= ntohl(x->flags);
+	vs->online		= flags & yfs_FVSOnline;
+	vs->in_service		= flags & yfs_FVSInservice;
+	vs->blessed		= flags & yfs_FVSBlessed;
+	vs->needs_salvage	= flags & yfs_FVSNeedsSalvage;
+	vs->type		= ntohl(x->type);
+	vs->min_quota		= 0;
+	vs->max_quota		= xdr_to_u64(x->max_quota);
+	vs->blocks_in_use	= xdr_to_u64(x->blocks_in_use);
+	vs->part_blocks_avail	= xdr_to_u64(x->part_blocks_avail);
+	vs->part_max_blocks	= xdr_to_u64(x->part_max_blocks);
+	vs->vol_copy_date	= xdr_to_u64(x->vol_copy_date);
+	vs->vol_backup_date	= xdr_to_u64(x->vol_backup_date);
+	*_bp += sizeof(*x) / sizeof(__be32);
+}
+
+/*
+ * deliver reply data to an FS.FetchStatus
+ */
+static int yfs_deliver_fs_fetch_status_vnode(struct afs_call *call)
+{
+	struct afs_vnode *vnode = call->reply[0];
+	const __be32 *bp;
+	int ret;
+
+	ret = afs_transfer_reply(call);
+	if (ret < 0)
+		return ret;
+
+	_enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
+
+	/* unmarshall the reply once we've received all of it */
+	bp = call->buffer;
+	ret = yfs_decode_status(call, &bp, &vnode->status, vnode,
+				&call->expected_version, NULL);
+	if (ret < 0)
+		return ret;
+	xdr_decode_YFSCallBack(call, vnode, &bp);
+	xdr_decode_YFSVolSync(&bp, call->reply[1]);
+
+	_leave(" = 0 [done]");
+	return 0;
+}
+
+/*
+ * YFS.FetchStatus operation type
+ */
+static const struct afs_call_type yfs_RXYFSFetchStatus_vnode = {
+	.name		= "YFS.FetchStatus(vnode)",
+	.op		= yfs_FS_FetchStatus,
+	.deliver	= yfs_deliver_fs_fetch_status_vnode,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * Fetch the status information for a file.
+ */
+int yfs_fs_fetch_file_status(struct afs_fs_cursor *fc, struct afs_volsync *volsync,
+			     bool new_inode)
+{
+	struct afs_vnode *vnode = fc->vnode;
+	struct afs_call *call;
+	struct afs_net *net = afs_v2net(vnode);
+	__be32 *bp;
+
+	_enter(",%x,{%llx:%llu},,",
+	       key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
+
+	call = afs_alloc_flat_call(net, &yfs_RXYFSFetchStatus_vnode,
+				   sizeof(__be32) * 2 +
+				   sizeof(struct yfs_xdr_YFSFid),
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSCallBack) +
+				   sizeof(struct yfs_xdr_YFSVolSync));
+	if (!call) {
+		fc->ac.error = -ENOMEM;
+		return -ENOMEM;
+	}
+
+	call->key = fc->key;
+	call->reply[0] = vnode;
+	call->reply[1] = volsync;
+	call->expected_version = new_inode ? 1 : vnode->status.data_version;
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp = xdr_encode_u32(bp, YFSFETCHSTATUS);
+	bp = xdr_encode_u32(bp, 0); /* RPC flags */
+	bp = xdr_encode_YFSFid(bp, &vnode->fid);
+	yfs_check_req(call, bp);
+
+	call->cb_break = fc->cb_break;
+	afs_use_fs_server(call, fc->cbi);
+	trace_afs_make_fs_call(call, &vnode->fid);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * Deliver reply data to an YFS.FetchData64.
+ */
+static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
+{
+	struct afs_vnode *vnode = call->reply[0];
+	struct afs_read *req = call->reply[2];
+	const __be32 *bp;
+	unsigned int size;
+	int ret;
+
+	_enter("{%u,%zu/%llu}",
+	       call->unmarshall, iov_iter_count(&call->iter), req->actual_len);
+
+	switch (call->unmarshall) {
+	case 0:
+		req->actual_len = 0;
+		req->index = 0;
+		req->offset = req->pos & (PAGE_SIZE - 1);
+		afs_extract_to_tmp64(call);
+		call->unmarshall++;
+
+		/* extract the returned data length */
+	case 1:
+		_debug("extract data length");
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		req->actual_len = be64_to_cpu(call->tmp64);
+		_debug("DATA length: %llu", req->actual_len);
+		req->remain = min(req->len, req->actual_len);
+		if (req->remain == 0)
+			goto no_more_data;
+
+		call->unmarshall++;
+
+	begin_page:
+		ASSERTCMP(req->index, <, req->nr_pages);
+		if (req->remain > PAGE_SIZE - req->offset)
+			size = PAGE_SIZE - req->offset;
+		else
+			size = req->remain;
+		call->bvec[0].bv_len = size;
+		call->bvec[0].bv_offset = req->offset;
+		call->bvec[0].bv_page = req->pages[req->index];
+		iov_iter_bvec(&call->iter, READ, call->bvec, 1, size);
+		ASSERTCMP(size, <=, PAGE_SIZE);
+
+		/* extract the returned data */
+	case 2:
+		_debug("extract data %zu/%llu",
+		       iov_iter_count(&call->iter), req->remain);
+
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+		req->remain -= call->bvec[0].bv_len;
+		req->offset += call->bvec[0].bv_len;
+		ASSERTCMP(req->offset, <=, PAGE_SIZE);
+		if (req->offset == PAGE_SIZE) {
+			req->offset = 0;
+			if (req->page_done)
+				req->page_done(call, req);
+			req->index++;
+			if (req->remain > 0)
+				goto begin_page;
+		}
+
+		ASSERTCMP(req->remain, ==, 0);
+		if (req->actual_len <= req->len)
+			goto no_more_data;
+
+		/* Discard any excess data the server gave us */
+		iov_iter_discard(&call->iter, READ, req->actual_len - req->len);
+		call->unmarshall = 3;
+	case 3:
+		_debug("extract discard %zu/%llu",
+		       iov_iter_count(&call->iter), req->actual_len - req->len);
+
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+	no_more_data:
+		call->unmarshall = 4;
+		afs_extract_to_buf(call,
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSCallBack) +
+				   sizeof(struct yfs_xdr_YFSVolSync));
+
+		/* extract the metadata */
+	case 4:
+		ret = afs_extract_data(call, false);
+		if (ret < 0)
+			return ret;
+
+		bp = call->buffer;
+		ret = yfs_decode_status(call, &bp, &vnode->status, vnode,
+					&vnode->status.data_version, req);
+		if (ret < 0)
+			return ret;
+		xdr_decode_YFSCallBack(call, vnode, &bp);
+		xdr_decode_YFSVolSync(&bp, call->reply[1]);
+
+		call->unmarshall++;
+
+	case 5:
+		break;
+	}
+
+	for (; req->index < req->nr_pages; req->index++) {
+		if (req->offset < PAGE_SIZE)
+			zero_user_segment(req->pages[req->index],
+					  req->offset, PAGE_SIZE);
+		if (req->page_done)
+			req->page_done(call, req);
+		req->offset = 0;
+	}
+
+	_leave(" = 0 [done]");
+	return 0;
+}
+
+static void yfs_fetch_data_destructor(struct afs_call *call)
+{
+	struct afs_read *req = call->reply[2];
+
+	afs_put_read(req);
+	afs_flat_call_destructor(call);
+}
+
+/*
+ * YFS.FetchData64 operation type
+ */
+static const struct afs_call_type yfs_RXYFSFetchData64 = {
+	.name		= "YFS.FetchData64",
+	.op		= yfs_FS_FetchData64,
+	.deliver	= yfs_deliver_fs_fetch_data64,
+	.destructor	= yfs_fetch_data_destructor,
+};
+
+/*
+ * Fetch data from a file.
+ */
+int yfs_fs_fetch_data(struct afs_fs_cursor *fc, struct afs_read *req)
+{
+	struct afs_vnode *vnode = fc->vnode;
+	struct afs_call *call;
+	struct afs_net *net = afs_v2net(vnode);
+	__be32 *bp;
+
+	_enter(",%x,{%llx:%llu},%llx,%llx",
+	       key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode,
+	       req->pos, req->len);
+
+	call = afs_alloc_flat_call(net, &yfs_RXYFSFetchData64,
+				   sizeof(__be32) * 2 +
+				   sizeof(struct yfs_xdr_YFSFid) +
+				   sizeof(struct yfs_xdr_u64) * 2,
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSCallBack) +
+				   sizeof(struct yfs_xdr_YFSVolSync));
+	if (!call)
+		return -ENOMEM;
+
+	call->key = fc->key;
+	call->reply[0] = vnode;
+	call->reply[1] = NULL; /* volsync */
+	call->reply[2] = req;
+	call->expected_version = vnode->status.data_version;
+	call->want_reply_time = true;
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp = xdr_encode_u32(bp, YFSFETCHDATA64);
+	bp = xdr_encode_u32(bp, 0); /* RPC flags */
+	bp = xdr_encode_YFSFid(bp, &vnode->fid);
+	bp = xdr_encode_u64(bp, req->pos);
+	bp = xdr_encode_u64(bp, req->len);
+	yfs_check_req(call, bp);
+
+	refcount_inc(&req->usage);
+	call->cb_break = fc->cb_break;
+	afs_use_fs_server(call, fc->cbi);
+	trace_afs_make_fs_call(call, &vnode->fid);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * Deliver reply data for YFS.CreateFile or YFS.MakeDir.
+ */
+static int yfs_deliver_fs_create_vnode(struct afs_call *call)
+{
+	struct afs_vnode *vnode = call->reply[0];
+	const __be32 *bp;
+	int ret;
+
+	_enter("{%u}", call->unmarshall);
+
+	ret = afs_transfer_reply(call);
+	if (ret < 0)
+		return ret;
+
+	/* unmarshall the reply once we've received all of it */
+	bp = call->buffer;
+	xdr_decode_YFSFid(&bp, call->reply[1]);
+	ret = yfs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL);
+	if (ret < 0)
+		return ret;
+	ret = yfs_decode_status(call, &bp, &vnode->status, vnode,
+				&call->expected_version, NULL);
+	if (ret < 0)
+		return ret;
+	xdr_decode_YFSCallBack_raw(&bp, call->reply[3]);
+	xdr_decode_YFSVolSync(&bp, NULL);
+
+	_leave(" = 0 [done]");
+	return 0;
+}
+
+/*
+ * FS.CreateFile and FS.MakeDir operation type
+ */
+static const struct afs_call_type afs_RXFSCreateFile = {
+	.name		= "YFS.CreateFile",
+	.op		= yfs_FS_CreateFile,
+	.deliver	= yfs_deliver_fs_create_vnode,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * Create a file.
+ */
+int yfs_fs_create_file(struct afs_fs_cursor *fc,
+		       const char *name,
+		       umode_t mode,
+		       u64 current_data_version,
+		       struct afs_fid *newfid,
+		       struct afs_file_status *newstatus,
+		       struct afs_callback *newcb)
+{
+	struct afs_vnode *vnode = fc->vnode;
+	struct afs_call *call;
+	struct afs_net *net = afs_v2net(vnode);
+	size_t namesz, reqsz, rplsz;
+	__be32 *bp;
+
+	_enter("");
+
+	namesz = strlen(name);
+	reqsz = (sizeof(__be32) +
+		 sizeof(__be32) +
+		 sizeof(struct yfs_xdr_YFSFid) +
+		 xdr_strlen(namesz) +
+		 sizeof(struct yfs_xdr_YFSStoreStatus) +
+		 sizeof(__be32));
+	rplsz = (sizeof(struct yfs_xdr_YFSFid) +
+		 sizeof(struct yfs_xdr_YFSFetchStatus) +
+		 sizeof(struct yfs_xdr_YFSFetchStatus) +
+		 sizeof(struct yfs_xdr_YFSCallBack) +
+		 sizeof(struct yfs_xdr_YFSVolSync));
+
+	call = afs_alloc_flat_call(net, &afs_RXFSCreateFile, reqsz, rplsz);
+	if (!call)
+		return -ENOMEM;
+
+	call->key = fc->key;
+	call->reply[0] = vnode;
+	call->reply[1] = newfid;
+	call->reply[2] = newstatus;
+	call->reply[3] = newcb;
+	call->expected_version = current_data_version + 1;
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp = xdr_encode_u32(bp, YFSCREATEFILE);
+	bp = xdr_encode_u32(bp, 0); /* RPC flags */
+	bp = xdr_encode_YFSFid(bp, &vnode->fid);
+	bp = xdr_encode_string(bp, name, namesz);
+	bp = xdr_encode_YFSStoreStatus_mode(bp, mode);
+	bp = xdr_encode_u32(bp, 0); /* ViceLockType */
+	yfs_check_req(call, bp);
+
+	afs_use_fs_server(call, fc->cbi);
+	trace_afs_make_fs_call(call, &vnode->fid);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+static const struct afs_call_type yfs_RXFSMakeDir = {
+	.name		= "YFS.MakeDir",
+	.op		= yfs_FS_MakeDir,
+	.deliver	= yfs_deliver_fs_create_vnode,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * Make a directory.
+ */
+int yfs_fs_make_dir(struct afs_fs_cursor *fc,
+		    const char *name,
+		    umode_t mode,
+		    u64 current_data_version,
+		    struct afs_fid *newfid,
+		    struct afs_file_status *newstatus,
+		    struct afs_callback *newcb)
+{
+	struct afs_vnode *vnode = fc->vnode;
+	struct afs_call *call;
+	struct afs_net *net = afs_v2net(vnode);
+	size_t namesz, reqsz, rplsz;
+	__be32 *bp;
+
+	_enter("");
+
+	namesz = strlen(name);
+	reqsz = (sizeof(__be32) +
+		 sizeof(struct yfs_xdr_RPCFlags) +
+		 sizeof(struct yfs_xdr_YFSFid) +
+		 xdr_strlen(namesz) +
+		 sizeof(struct yfs_xdr_YFSStoreStatus));
+	rplsz = (sizeof(struct yfs_xdr_YFSFid) +
+		 sizeof(struct yfs_xdr_YFSFetchStatus) +
+		 sizeof(struct yfs_xdr_YFSFetchStatus) +
+		 sizeof(struct yfs_xdr_YFSCallBack) +
+		 sizeof(struct yfs_xdr_YFSVolSync));
+
+	call = afs_alloc_flat_call(net, &yfs_RXFSMakeDir, reqsz, rplsz);
+	if (!call)
+		return -ENOMEM;
+
+	call->key = fc->key;
+	call->reply[0] = vnode;
+	call->reply[1] = newfid;
+	call->reply[2] = newstatus;
+	call->reply[3] = newcb;
+	call->expected_version = current_data_version + 1;
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp = xdr_encode_u32(bp, YFSMAKEDIR);
+	bp = xdr_encode_u32(bp, 0); /* RPC flags */
+	bp = xdr_encode_YFSFid(bp, &vnode->fid);
+	bp = xdr_encode_string(bp, name, namesz);
+	bp = xdr_encode_YFSStoreStatus_mode(bp, mode);
+	yfs_check_req(call, bp);
+
+	afs_use_fs_server(call, fc->cbi);
+	trace_afs_make_fs_call(call, &vnode->fid);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * Deliver reply data to a YFS.RemoveFile2 operation.
+ */
+static int yfs_deliver_fs_remove_file2(struct afs_call *call)
+{
+	struct afs_vnode *dvnode = call->reply[0];
+	struct afs_vnode *vnode = call->reply[1];
+	struct afs_fid fid;
+	const __be32 *bp;
+	int ret;
+
+	_enter("{%u}", call->unmarshall);
+
+	ret = afs_transfer_reply(call);
+	if (ret < 0)
+		return ret;
+
+	/* unmarshall the reply once we've received all of it */
+	bp = call->buffer;
+	ret = yfs_decode_status(call, &bp, &dvnode->status, dvnode,
+				&call->expected_version, NULL);
+	if (ret < 0)
+		return ret;
+
+	xdr_decode_YFSFid(&bp, &fid);
+	ret = yfs_decode_status(call, &bp, &vnode->status, vnode, NULL, NULL);
+	if (ret < 0)
+		return ret;
+	/* Was deleted if vnode->status.abort_code == VNOVNODE. */
+
+	xdr_decode_YFSVolSync(&bp, NULL);
+	return 0;
+}
+
+/*
+ * YFS.RemoveFile2 operation type.
+ */
+static const struct afs_call_type yfs_RXYFSRemoveFile2 = {
+	.name		= "YFS.RemoveFile2",
+	.op		= yfs_FS_RemoveFile2,
+	.deliver	= yfs_deliver_fs_remove_file2,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * Remove a file and retrieve new file status.
+ */
+int yfs_fs_remove_file2(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
+			const char *name, u64 current_data_version)
+{
+	struct afs_vnode *dvnode = fc->vnode;
+	struct afs_call *call;
+	struct afs_net *net = afs_v2net(dvnode);
+	size_t namesz;
+	__be32 *bp;
+
+	_enter("");
+
+	namesz = strlen(name);
+
+	call = afs_alloc_flat_call(net, &yfs_RXYFSRemoveFile2,
+				   sizeof(__be32) +
+				   sizeof(struct yfs_xdr_RPCFlags) +
+				   sizeof(struct yfs_xdr_YFSFid) +
+				   xdr_strlen(namesz),
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSFid) +
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSVolSync));
+	if (!call)
+		return -ENOMEM;
+
+	call->key = fc->key;
+	call->reply[0] = dvnode;
+	call->reply[1] = vnode;
+	call->expected_version = current_data_version + 1;
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp = xdr_encode_u32(bp, YFSREMOVEFILE2);
+	bp = xdr_encode_u32(bp, 0); /* RPC flags */
+	bp = xdr_encode_YFSFid(bp, &dvnode->fid);
+	bp = xdr_encode_string(bp, name, namesz);
+	yfs_check_req(call, bp);
+
+	afs_use_fs_server(call, fc->cbi);
+	trace_afs_make_fs_call(call, &dvnode->fid);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * Deliver reply data to a YFS.RemoveFile or YFS.RemoveDir operation.
+ */
+static int yfs_deliver_fs_remove(struct afs_call *call)
+{
+	struct afs_vnode *dvnode = call->reply[0];
+	const __be32 *bp;
+	int ret;
+
+	_enter("{%u}", call->unmarshall);
+
+	ret = afs_transfer_reply(call);
+	if (ret < 0)
+		return ret;
+
+	/* unmarshall the reply once we've received all of it */
+	bp = call->buffer;
+	ret = yfs_decode_status(call, &bp, &dvnode->status, dvnode,
+				&call->expected_version, NULL);
+	if (ret < 0)
+		return ret;
+
+	xdr_decode_YFSVolSync(&bp, NULL);
+	return 0;
+}
+
+/*
+ * FS.RemoveDir and FS.RemoveFile operation types.
+ */
+static const struct afs_call_type yfs_RXYFSRemoveFile = {
+	.name		= "YFS.RemoveFile",
+	.op		= yfs_FS_RemoveFile,
+	.deliver	= yfs_deliver_fs_remove,
+	.destructor	= afs_flat_call_destructor,
+};
+
+static const struct afs_call_type yfs_RXYFSRemoveDir = {
+	.name		= "YFS.RemoveDir",
+	.op		= yfs_FS_RemoveDir,
+	.deliver	= yfs_deliver_fs_remove,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * remove a file or directory
+ */
+int yfs_fs_remove(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
+		  const char *name, bool isdir, u64 current_data_version)
+{
+	struct afs_vnode *dvnode = fc->vnode;
+	struct afs_call *call;
+	struct afs_net *net = afs_v2net(dvnode);
+	size_t namesz;
+	__be32 *bp;
+
+	_enter("");
+
+	namesz = strlen(name);
+	call = afs_alloc_flat_call(
+		net, isdir ? &yfs_RXYFSRemoveDir : &yfs_RXYFSRemoveFile,
+		sizeof(__be32) +
+		sizeof(struct yfs_xdr_RPCFlags) +
+		sizeof(struct yfs_xdr_YFSFid) +
+		xdr_strlen(namesz),
+		sizeof(struct yfs_xdr_YFSFetchStatus) +
+		sizeof(struct yfs_xdr_YFSVolSync));
+	if (!call)
+		return -ENOMEM;
+
+	call->key = fc->key;
+	call->reply[0] = dvnode;
+	call->reply[1] = vnode;
+	call->expected_version = current_data_version + 1;
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp = xdr_encode_u32(bp, isdir ? YFSREMOVEDIR : YFSREMOVEFILE);
+	bp = xdr_encode_u32(bp, 0); /* RPC flags */
+	bp = xdr_encode_YFSFid(bp, &dvnode->fid);
+	bp = xdr_encode_string(bp, name, namesz);
+	yfs_check_req(call, bp);
+
+	afs_use_fs_server(call, fc->cbi);
+	trace_afs_make_fs_call(call, &dvnode->fid);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * Deliver reply data to a YFS.Link operation.
+ */
+static int yfs_deliver_fs_link(struct afs_call *call)
+{
+	struct afs_vnode *dvnode = call->reply[0], *vnode = call->reply[1];
+	const __be32 *bp;
+	int ret;
+
+	_enter("{%u}", call->unmarshall);
+
+	ret = afs_transfer_reply(call);
+	if (ret < 0)
+		return ret;
+
+	/* unmarshall the reply once we've received all of it */
+	bp = call->buffer;
+	ret = yfs_decode_status(call, &bp, &vnode->status, vnode, NULL, NULL);
+	if (ret < 0)
+		return ret;
+	ret = yfs_decode_status(call, &bp, &dvnode->status, dvnode,
+				&call->expected_version, NULL);
+	if (ret < 0)
+		return ret;
+	xdr_decode_YFSVolSync(&bp, NULL);
+	_leave(" = 0 [done]");
+	return 0;
+}
+
+/*
+ * YFS.Link operation type.
+ */
+static const struct afs_call_type yfs_RXYFSLink = {
+	.name		= "YFS.Link",
+	.op		= yfs_FS_Link,
+	.deliver	= yfs_deliver_fs_link,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * Make a hard link.
+ */
+int yfs_fs_link(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
+		const char *name, u64 current_data_version)
+{
+	struct afs_vnode *dvnode = fc->vnode;
+	struct afs_call *call;
+	struct afs_net *net = afs_v2net(vnode);
+	size_t namesz;
+	__be32 *bp;
+
+	_enter("");
+
+	namesz = strlen(name);
+	call = afs_alloc_flat_call(net, &yfs_RXYFSLink,
+				   sizeof(__be32) +
+				   sizeof(struct yfs_xdr_RPCFlags) +
+				   sizeof(struct yfs_xdr_YFSFid) +
+				   xdr_strlen(namesz) +
+				   sizeof(struct yfs_xdr_YFSFid),
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSVolSync));
+	if (!call)
+		return -ENOMEM;
+
+	call->key = fc->key;
+	call->reply[0] = dvnode;
+	call->reply[1] = vnode;
+	call->expected_version = current_data_version + 1;
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp = xdr_encode_u32(bp, YFSLINK);
+	bp = xdr_encode_u32(bp, 0); /* RPC flags */
+	bp = xdr_encode_YFSFid(bp, &dvnode->fid);
+	bp = xdr_encode_string(bp, name, namesz);
+	bp = xdr_encode_YFSFid(bp, &vnode->fid);
+	yfs_check_req(call, bp);
+
+	afs_use_fs_server(call, fc->cbi);
+	trace_afs_make_fs_call(call, &vnode->fid);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * Deliver reply data to a YFS.Symlink operation.
+ */
+static int yfs_deliver_fs_symlink(struct afs_call *call)
+{
+	struct afs_vnode *vnode = call->reply[0];
+	const __be32 *bp;
+	int ret;
+
+	_enter("{%u}", call->unmarshall);
+
+	ret = afs_transfer_reply(call);
+	if (ret < 0)
+		return ret;
+
+	/* unmarshall the reply once we've received all of it */
+	bp = call->buffer;
+	xdr_decode_YFSFid(&bp, call->reply[1]);
+	ret = yfs_decode_status(call, &bp, call->reply[2], NULL, NULL, NULL);
+	if (ret < 0)
+		return ret;
+	ret = yfs_decode_status(call, &bp, &vnode->status, vnode,
+				&call->expected_version, NULL);
+	if (ret < 0)
+		return ret;
+	xdr_decode_YFSVolSync(&bp, NULL);
+
+	_leave(" = 0 [done]");
+	return 0;
+}
+
+/*
+ * YFS.Symlink operation type
+ */
+static const struct afs_call_type yfs_RXYFSSymlink = {
+	.name		= "YFS.Symlink",
+	.op		= yfs_FS_Symlink,
+	.deliver	= yfs_deliver_fs_symlink,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * Create a symbolic link.
+ */
+int yfs_fs_symlink(struct afs_fs_cursor *fc,
+		   const char *name,
+		   const char *contents,
+		   u64 current_data_version,
+		   struct afs_fid *newfid,
+		   struct afs_file_status *newstatus)
+{
+	struct afs_vnode *dvnode = fc->vnode;
+	struct afs_call *call;
+	struct afs_net *net = afs_v2net(dvnode);
+	size_t namesz, contents_sz;
+	__be32 *bp;
+
+	_enter("");
+
+	namesz = strlen(name);
+	contents_sz = strlen(contents);
+	call = afs_alloc_flat_call(net, &yfs_RXYFSSymlink,
+				   sizeof(__be32) +
+				   sizeof(struct yfs_xdr_RPCFlags) +
+				   sizeof(struct yfs_xdr_YFSFid) +
+				   xdr_strlen(namesz) +
+				   xdr_strlen(contents_sz) +
+				   sizeof(struct yfs_xdr_YFSStoreStatus),
+				   sizeof(struct yfs_xdr_YFSFid) +
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSVolSync));
+	if (!call)
+		return -ENOMEM;
+
+	call->key = fc->key;
+	call->reply[0] = dvnode;
+	call->reply[1] = newfid;
+	call->reply[2] = newstatus;
+	call->expected_version = current_data_version + 1;
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp = xdr_encode_u32(bp, YFSSYMLINK);
+	bp = xdr_encode_u32(bp, 0); /* RPC flags */
+	bp = xdr_encode_YFSFid(bp, &dvnode->fid);
+	bp = xdr_encode_string(bp, name, namesz);
+	bp = xdr_encode_string(bp, contents, contents_sz);
+	bp = xdr_encode_YFSStoreStatus_mode(bp, S_IRWXUGO);
+	yfs_check_req(call, bp);
+
+	afs_use_fs_server(call, fc->cbi);
+	trace_afs_make_fs_call(call, &dvnode->fid);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * Deliver reply data to a YFS.Rename operation.
+ */
+static int yfs_deliver_fs_rename(struct afs_call *call)
+{
+	struct afs_vnode *orig_dvnode = call->reply[0];
+	struct afs_vnode *new_dvnode = call->reply[1];
+	const __be32 *bp;
+	int ret;
+
+	_enter("{%u}", call->unmarshall);
+
+	ret = afs_transfer_reply(call);
+	if (ret < 0)
+		return ret;
+
+	/* unmarshall the reply once we've received all of it */
+	bp = call->buffer;
+	ret = yfs_decode_status(call, &bp, &orig_dvnode->status, orig_dvnode,
+				&call->expected_version, NULL);
+	if (ret < 0)
+		return ret;
+	if (new_dvnode != orig_dvnode) {
+		ret = yfs_decode_status(call, &bp, &new_dvnode->status, new_dvnode,
+					&call->expected_version_2, NULL);
+		if (ret < 0)
+			return ret;
+	}
+
+	xdr_decode_YFSVolSync(&bp, NULL);
+	_leave(" = 0 [done]");
+	return 0;
+}
+
+/*
+ * YFS.Rename operation type
+ */
+static const struct afs_call_type yfs_RXYFSRename = {
+	.name		= "FS.Rename",
+	.op		= yfs_FS_Rename,
+	.deliver	= yfs_deliver_fs_rename,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * Rename a file or directory.
+ */
+int yfs_fs_rename(struct afs_fs_cursor *fc,
+		  const char *orig_name,
+		  struct afs_vnode *new_dvnode,
+		  const char *new_name,
+		  u64 current_orig_data_version,
+		  u64 current_new_data_version)
+{
+	struct afs_vnode *orig_dvnode = fc->vnode;
+	struct afs_call *call;
+	struct afs_net *net = afs_v2net(orig_dvnode);
+	size_t o_namesz, n_namesz;
+	__be32 *bp;
+
+	_enter("");
+
+	o_namesz = strlen(orig_name);
+	n_namesz = strlen(new_name);
+	call = afs_alloc_flat_call(net, &yfs_RXYFSRename,
+				   sizeof(__be32) +
+				   sizeof(struct yfs_xdr_RPCFlags) +
+				   sizeof(struct yfs_xdr_YFSFid) +
+				   xdr_strlen(o_namesz) +
+				   sizeof(struct yfs_xdr_YFSFid) +
+				   xdr_strlen(n_namesz),
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSVolSync));
+	if (!call)
+		return -ENOMEM;
+
+	call->key = fc->key;
+	call->reply[0] = orig_dvnode;
+	call->reply[1] = new_dvnode;
+	call->expected_version = current_orig_data_version + 1;
+	call->expected_version_2 = current_new_data_version + 1;
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp = xdr_encode_u32(bp, YFSRENAME);
+	bp = xdr_encode_u32(bp, 0); /* RPC flags */
+	bp = xdr_encode_YFSFid(bp, &orig_dvnode->fid);
+	bp = xdr_encode_string(bp, orig_name, o_namesz);
+	bp = xdr_encode_YFSFid(bp, &new_dvnode->fid);
+	bp = xdr_encode_string(bp, new_name, n_namesz);
+	yfs_check_req(call, bp);
+
+	afs_use_fs_server(call, fc->cbi);
+	trace_afs_make_fs_call(call, &orig_dvnode->fid);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * Deliver reply data to a YFS.StoreData64 operation.
+ */
+static int yfs_deliver_fs_store_data(struct afs_call *call)
+{
+	struct afs_vnode *vnode = call->reply[0];
+	const __be32 *bp;
+	int ret;
+
+	_enter("");
+
+	ret = afs_transfer_reply(call);
+	if (ret < 0)
+		return ret;
+
+	/* unmarshall the reply once we've received all of it */
+	bp = call->buffer;
+	ret = yfs_decode_status(call, &bp, &vnode->status, vnode,
+				&call->expected_version, NULL);
+	if (ret < 0)
+		return ret;
+	xdr_decode_YFSVolSync(&bp, NULL);
+
+	afs_pages_written_back(vnode, call);
+
+	_leave(" = 0 [done]");
+	return 0;
+}
+
+/*
+ * YFS.StoreData64 operation type.
+ */
+static const struct afs_call_type yfs_RXYFSStoreData64 = {
+	.name		= "YFS.StoreData64",
+	.op		= yfs_FS_StoreData64,
+	.deliver	= yfs_deliver_fs_store_data,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * Store a set of pages to a large file.
+ */
+int yfs_fs_store_data(struct afs_fs_cursor *fc, struct address_space *mapping,
+		      pgoff_t first, pgoff_t last,
+		      unsigned offset, unsigned to)
+{
+	struct afs_vnode *vnode = fc->vnode;
+	struct afs_call *call;
+	struct afs_net *net = afs_v2net(vnode);
+	loff_t size, pos, i_size;
+	__be32 *bp;
+
+	_enter(",%x,{%llx:%llu},,",
+	       key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
+
+	size = (loff_t)to - (loff_t)offset;
+	if (first != last)
+		size += (loff_t)(last - first) << PAGE_SHIFT;
+	pos = (loff_t)first << PAGE_SHIFT;
+	pos += offset;
+
+	i_size = i_size_read(&vnode->vfs_inode);
+	if (pos + size > i_size)
+		i_size = size + pos;
+
+	_debug("size %llx, at %llx, i_size %llx",
+	       (unsigned long long)size, (unsigned long long)pos,
+	       (unsigned long long)i_size);
+
+	call = afs_alloc_flat_call(net, &yfs_RXYFSStoreData64,
+				   sizeof(__be32) +
+				   sizeof(__be32) +
+				   sizeof(struct yfs_xdr_YFSFid) +
+				   sizeof(struct yfs_xdr_YFSStoreStatus) +
+				   sizeof(struct yfs_xdr_u64) * 3,
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSVolSync));
+	if (!call)
+		return -ENOMEM;
+
+	call->key = fc->key;
+	call->mapping = mapping;
+	call->reply[0] = vnode;
+	call->first = first;
+	call->last = last;
+	call->first_offset = offset;
+	call->last_to = to;
+	call->send_pages = true;
+	call->expected_version = vnode->status.data_version + 1;
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp = xdr_encode_u32(bp, YFSSTOREDATA64);
+	bp = xdr_encode_u32(bp, 0); /* RPC flags */
+	bp = xdr_encode_YFSFid(bp, &vnode->fid);
+	bp = xdr_encode_YFSStoreStatus_mtime(bp, &vnode->vfs_inode.i_mtime);
+	bp = xdr_encode_u64(bp, pos);
+	bp = xdr_encode_u64(bp, size);
+	bp = xdr_encode_u64(bp, i_size);
+	yfs_check_req(call, bp);
+
+	afs_use_fs_server(call, fc->cbi);
+	trace_afs_make_fs_call(call, &vnode->fid);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * deliver reply data to an FS.StoreStatus
+ */
+static int yfs_deliver_fs_store_status(struct afs_call *call)
+{
+	struct afs_vnode *vnode = call->reply[0];
+	const __be32 *bp;
+	int ret;
+
+	_enter("");
+
+	ret = afs_transfer_reply(call);
+	if (ret < 0)
+		return ret;
+
+	/* unmarshall the reply once we've received all of it */
+	bp = call->buffer;
+	ret = yfs_decode_status(call, &bp, &vnode->status, vnode,
+				&call->expected_version, NULL);
+	if (ret < 0)
+		return ret;
+	xdr_decode_YFSVolSync(&bp, NULL);
+
+	_leave(" = 0 [done]");
+	return 0;
+}
+
+/*
+ * YFS.StoreStatus operation type
+ */
+static const struct afs_call_type yfs_RXYFSStoreStatus = {
+	.name		= "YFS.StoreStatus",
+	.op		= yfs_FS_StoreStatus,
+	.deliver	= yfs_deliver_fs_store_status,
+	.destructor	= afs_flat_call_destructor,
+};
+
+static const struct afs_call_type yfs_RXYFSStoreData64_as_Status = {
+	.name		= "YFS.StoreData64",
+	.op		= yfs_FS_StoreData64,
+	.deliver	= yfs_deliver_fs_store_status,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * Set the attributes on a file, using YFS.StoreData64 rather than
+ * YFS.StoreStatus so as to alter the file size also.
+ */
+static int yfs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr)
+{
+	struct afs_vnode *vnode = fc->vnode;
+	struct afs_call *call;
+	struct afs_net *net = afs_v2net(vnode);
+	__be32 *bp;
+
+	_enter(",%x,{%llx:%llu},,",
+	       key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
+
+	call = afs_alloc_flat_call(net, &yfs_RXYFSStoreData64_as_Status,
+				   sizeof(__be32) * 2 +
+				   sizeof(struct yfs_xdr_YFSFid) +
+				   sizeof(struct yfs_xdr_YFSStoreStatus) +
+				   sizeof(struct yfs_xdr_u64) * 3,
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSVolSync));
+	if (!call)
+		return -ENOMEM;
+
+	call->key = fc->key;
+	call->reply[0] = vnode;
+	call->expected_version = vnode->status.data_version + 1;
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp = xdr_encode_u32(bp, YFSSTOREDATA64);
+	bp = xdr_encode_u32(bp, 0); /* RPC flags */
+	bp = xdr_encode_YFSFid(bp, &vnode->fid);
+	bp = xdr_encode_YFS_StoreStatus(bp, attr);
+	bp = xdr_encode_u64(bp, 0);		/* position of start of write */
+	bp = xdr_encode_u64(bp, 0);		/* size of write */
+	bp = xdr_encode_u64(bp, attr->ia_size);	/* new file length */
+	yfs_check_req(call, bp);
+
+	afs_use_fs_server(call, fc->cbi);
+	trace_afs_make_fs_call(call, &vnode->fid);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * Set the attributes on a file, using YFS.StoreData64 if there's a change in
+ * file size, and YFS.StoreStatus otherwise.
+ */
+int yfs_fs_setattr(struct afs_fs_cursor *fc, struct iattr *attr)
+{
+	struct afs_vnode *vnode = fc->vnode;
+	struct afs_call *call;
+	struct afs_net *net = afs_v2net(vnode);
+	__be32 *bp;
+
+	if (attr->ia_valid & ATTR_SIZE)
+		return yfs_fs_setattr_size(fc, attr);
+
+	_enter(",%x,{%llx:%llu},,",
+	       key_serial(fc->key), vnode->fid.vid, vnode->fid.vnode);
+
+	call = afs_alloc_flat_call(net, &yfs_RXYFSStoreStatus,
+				   sizeof(__be32) * 2 +
+				   sizeof(struct yfs_xdr_YFSFid) +
+				   sizeof(struct yfs_xdr_YFSStoreStatus),
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSVolSync));
+	if (!call)
+		return -ENOMEM;
+
+	call->key = fc->key;
+	call->reply[0] = vnode;
+	call->expected_version = vnode->status.data_version;
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp = xdr_encode_u32(bp, YFSSTORESTATUS);
+	bp = xdr_encode_u32(bp, 0); /* RPC flags */
+	bp = xdr_encode_YFSFid(bp, &vnode->fid);
+	bp = xdr_encode_YFS_StoreStatus(bp, attr);
+	yfs_check_req(call, bp);
+
+	afs_use_fs_server(call, fc->cbi);
+	trace_afs_make_fs_call(call, &vnode->fid);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * Deliver reply data to a YFS.GetVolumeStatus operation.
+ */
+static int yfs_deliver_fs_get_volume_status(struct afs_call *call)
+{
+	const __be32 *bp;
+	char *p;
+	u32 size;
+	int ret;
+
+	_enter("{%u}", call->unmarshall);
+
+	switch (call->unmarshall) {
+	case 0:
+		call->unmarshall++;
+		afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSFetchVolumeStatus));
+
+		/* extract the returned status record */
+	case 1:
+		_debug("extract status");
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		bp = call->buffer;
+		xdr_decode_YFSFetchVolumeStatus(&bp, call->reply[1]);
+		call->unmarshall++;
+		afs_extract_to_tmp(call);
+
+		/* extract the volume name length */
+	case 2:
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		call->count = ntohl(call->tmp);
+		_debug("volname length: %u", call->count);
+		if (call->count >= AFSNAMEMAX)
+			return afs_protocol_error(call, -EBADMSG,
+						  afs_eproto_volname_len);
+		size = (call->count + 3) & ~3; /* It's padded */
+		afs_extract_begin(call, call->reply[2], size);
+		call->unmarshall++;
+
+		/* extract the volume name */
+	case 3:
+		_debug("extract volname");
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		p = call->reply[2];
+		p[call->count] = 0;
+		_debug("volname '%s'", p);
+		afs_extract_to_tmp(call);
+		call->unmarshall++;
+
+		/* extract the offline message length */
+	case 4:
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		call->count = ntohl(call->tmp);
+		_debug("offline msg length: %u", call->count);
+		if (call->count >= AFSNAMEMAX)
+			return afs_protocol_error(call, -EBADMSG,
+						  afs_eproto_offline_msg_len);
+		size = (call->count + 3) & ~3; /* It's padded */
+		afs_extract_begin(call, call->reply[2], size);
+		call->unmarshall++;
+
+		/* extract the offline message */
+	case 5:
+		_debug("extract offline");
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		p = call->reply[2];
+		p[call->count] = 0;
+		_debug("offline '%s'", p);
+
+		afs_extract_to_tmp(call);
+		call->unmarshall++;
+
+		/* extract the message of the day length */
+	case 6:
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		call->count = ntohl(call->tmp);
+		_debug("motd length: %u", call->count);
+		if (call->count >= AFSNAMEMAX)
+			return afs_protocol_error(call, -EBADMSG,
+						  afs_eproto_motd_len);
+		size = (call->count + 3) & ~3; /* It's padded */
+		afs_extract_begin(call, call->reply[2], size);
+		call->unmarshall++;
+
+		/* extract the message of the day */
+	case 7:
+		_debug("extract motd");
+		ret = afs_extract_data(call, false);
+		if (ret < 0)
+			return ret;
+
+		p = call->reply[2];
+		p[call->count] = 0;
+		_debug("motd '%s'", p);
+
+		call->unmarshall++;
+
+	case 8:
+		break;
+	}
+
+	_leave(" = 0 [done]");
+	return 0;
+}
+
+/*
+ * Destroy a YFS.GetVolumeStatus call.
+ */
+static void yfs_get_volume_status_call_destructor(struct afs_call *call)
+{
+	kfree(call->reply[2]);
+	call->reply[2] = NULL;
+	afs_flat_call_destructor(call);
+}
+
+/*
+ * YFS.GetVolumeStatus operation type
+ */
+static const struct afs_call_type yfs_RXYFSGetVolumeStatus = {
+	.name		= "YFS.GetVolumeStatus",
+	.op		= yfs_FS_GetVolumeStatus,
+	.deliver	= yfs_deliver_fs_get_volume_status,
+	.destructor	= yfs_get_volume_status_call_destructor,
+};
+
+/*
+ * fetch the status of a volume
+ */
+int yfs_fs_get_volume_status(struct afs_fs_cursor *fc,
+			     struct afs_volume_status *vs)
+{
+	struct afs_vnode *vnode = fc->vnode;
+	struct afs_call *call;
+	struct afs_net *net = afs_v2net(vnode);
+	__be32 *bp;
+	void *tmpbuf;
+
+	_enter("");
+
+	tmpbuf = kmalloc(AFSOPAQUEMAX, GFP_KERNEL);
+	if (!tmpbuf)
+		return -ENOMEM;
+
+	call = afs_alloc_flat_call(net, &yfs_RXYFSGetVolumeStatus,
+				   sizeof(__be32) * 2 +
+				   sizeof(struct yfs_xdr_u64),
+				   sizeof(struct yfs_xdr_YFSFetchVolumeStatus) +
+				   sizeof(__be32));
+	if (!call) {
+		kfree(tmpbuf);
+		return -ENOMEM;
+	}
+
+	call->key = fc->key;
+	call->reply[0] = vnode;
+	call->reply[1] = vs;
+	call->reply[2] = tmpbuf;
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp = xdr_encode_u32(bp, YFSGETVOLUMESTATUS);
+	bp = xdr_encode_u32(bp, 0); /* RPC flags */
+	bp = xdr_encode_u64(bp, vnode->fid.vid);
+	yfs_check_req(call, bp);
+
+	afs_use_fs_server(call, fc->cbi);
+	trace_afs_make_fs_call(call, &vnode->fid);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * Deliver reply data to an YFS.SetLock, YFS.ExtendLock or YFS.ReleaseLock
+ */
+static int yfs_deliver_fs_xxxx_lock(struct afs_call *call)
+{
+	struct afs_vnode *vnode = call->reply[0];
+	const __be32 *bp;
+	int ret;
+
+	_enter("{%u}", call->unmarshall);
+
+	ret = afs_transfer_reply(call);
+	if (ret < 0)
+		return ret;
+
+	/* unmarshall the reply once we've received all of it */
+	bp = call->buffer;
+	ret = yfs_decode_status(call, &bp, &vnode->status, vnode,
+				&call->expected_version, NULL);
+	if (ret < 0)
+		return ret;
+	xdr_decode_YFSVolSync(&bp, NULL);
+
+	_leave(" = 0 [done]");
+	return 0;
+}
+
+/*
+ * YFS.SetLock operation type
+ */
+static const struct afs_call_type yfs_RXYFSSetLock = {
+	.name		= "YFS.SetLock",
+	.op		= yfs_FS_SetLock,
+	.deliver	= yfs_deliver_fs_xxxx_lock,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * YFS.ExtendLock operation type
+ */
+static const struct afs_call_type yfs_RXYFSExtendLock = {
+	.name		= "YFS.ExtendLock",
+	.op		= yfs_FS_ExtendLock,
+	.deliver	= yfs_deliver_fs_xxxx_lock,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * YFS.ReleaseLock operation type
+ */
+static const struct afs_call_type yfs_RXYFSReleaseLock = {
+	.name		= "YFS.ReleaseLock",
+	.op		= yfs_FS_ReleaseLock,
+	.deliver	= yfs_deliver_fs_xxxx_lock,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * Set a lock on a file
+ */
+int yfs_fs_set_lock(struct afs_fs_cursor *fc, afs_lock_type_t type)
+{
+	struct afs_vnode *vnode = fc->vnode;
+	struct afs_call *call;
+	struct afs_net *net = afs_v2net(vnode);
+	__be32 *bp;
+
+	_enter("");
+
+	call = afs_alloc_flat_call(net, &yfs_RXYFSSetLock,
+				   sizeof(__be32) * 2 +
+				   sizeof(struct yfs_xdr_YFSFid) +
+				   sizeof(__be32),
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSVolSync));
+	if (!call)
+		return -ENOMEM;
+
+	call->key = fc->key;
+	call->reply[0] = vnode;
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp = xdr_encode_u32(bp, YFSSETLOCK);
+	bp = xdr_encode_u32(bp, 0); /* RPC flags */
+	bp = xdr_encode_YFSFid(bp, &vnode->fid);
+	bp = xdr_encode_u32(bp, type);
+	yfs_check_req(call, bp);
+
+	afs_use_fs_server(call, fc->cbi);
+	trace_afs_make_fs_call(call, &vnode->fid);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * extend a lock on a file
+ */
+int yfs_fs_extend_lock(struct afs_fs_cursor *fc)
+{
+	struct afs_vnode *vnode = fc->vnode;
+	struct afs_call *call;
+	struct afs_net *net = afs_v2net(vnode);
+	__be32 *bp;
+
+	_enter("");
+
+	call = afs_alloc_flat_call(net, &yfs_RXYFSExtendLock,
+				   sizeof(__be32) * 2 +
+				   sizeof(struct yfs_xdr_YFSFid),
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSVolSync));
+	if (!call)
+		return -ENOMEM;
+
+	call->key = fc->key;
+	call->reply[0] = vnode;
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp = xdr_encode_u32(bp, YFSEXTENDLOCK);
+	bp = xdr_encode_u32(bp, 0); /* RPC flags */
+	bp = xdr_encode_YFSFid(bp, &vnode->fid);
+	yfs_check_req(call, bp);
+
+	afs_use_fs_server(call, fc->cbi);
+	trace_afs_make_fs_call(call, &vnode->fid);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * release a lock on a file
+ */
+int yfs_fs_release_lock(struct afs_fs_cursor *fc)
+{
+	struct afs_vnode *vnode = fc->vnode;
+	struct afs_call *call;
+	struct afs_net *net = afs_v2net(vnode);
+	__be32 *bp;
+
+	_enter("");
+
+	call = afs_alloc_flat_call(net, &yfs_RXYFSReleaseLock,
+				   sizeof(__be32) * 2 +
+				   sizeof(struct yfs_xdr_YFSFid),
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSVolSync));
+	if (!call)
+		return -ENOMEM;
+
+	call->key = fc->key;
+	call->reply[0] = vnode;
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp = xdr_encode_u32(bp, YFSRELEASELOCK);
+	bp = xdr_encode_u32(bp, 0); /* RPC flags */
+	bp = xdr_encode_YFSFid(bp, &vnode->fid);
+	yfs_check_req(call, bp);
+
+	afs_use_fs_server(call, fc->cbi);
+	trace_afs_make_fs_call(call, &vnode->fid);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * Deliver reply data to an FS.FetchStatus with no vnode.
+ */
+static int yfs_deliver_fs_fetch_status(struct afs_call *call)
+{
+	struct afs_file_status *status = call->reply[1];
+	struct afs_callback *callback = call->reply[2];
+	struct afs_volsync *volsync = call->reply[3];
+	struct afs_vnode *vnode = call->reply[0];
+	const __be32 *bp;
+	int ret;
+
+	ret = afs_transfer_reply(call);
+	if (ret < 0)
+		return ret;
+
+	_enter("{%llx:%llu}", vnode->fid.vid, vnode->fid.vnode);
+
+	/* unmarshall the reply once we've received all of it */
+	bp = call->buffer;
+	ret = yfs_decode_status(call, &bp, status, vnode,
+				&call->expected_version, NULL);
+	if (ret < 0)
+		return ret;
+	xdr_decode_YFSCallBack_raw(&bp, callback);
+	xdr_decode_YFSVolSync(&bp, volsync);
+
+	_leave(" = 0 [done]");
+	return 0;
+}
+
+/*
+ * YFS.FetchStatus operation type
+ */
+static const struct afs_call_type yfs_RXYFSFetchStatus = {
+	.name		= "YFS.FetchStatus",
+	.op		= yfs_FS_FetchStatus,
+	.deliver	= yfs_deliver_fs_fetch_status,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * Fetch the status information for a fid without needing a vnode handle.
+ */
+int yfs_fs_fetch_status(struct afs_fs_cursor *fc,
+			struct afs_net *net,
+			struct afs_fid *fid,
+			struct afs_file_status *status,
+			struct afs_callback *callback,
+			struct afs_volsync *volsync)
+{
+	struct afs_call *call;
+	__be32 *bp;
+
+	_enter(",%x,{%llx:%llu},,",
+	       key_serial(fc->key), fid->vid, fid->vnode);
+
+	call = afs_alloc_flat_call(net, &yfs_RXYFSFetchStatus,
+				   sizeof(__be32) * 2 +
+				   sizeof(struct yfs_xdr_YFSFid),
+				   sizeof(struct yfs_xdr_YFSFetchStatus) +
+				   sizeof(struct yfs_xdr_YFSCallBack) +
+				   sizeof(struct yfs_xdr_YFSVolSync));
+	if (!call) {
+		fc->ac.error = -ENOMEM;
+		return -ENOMEM;
+	}
+
+	call->key = fc->key;
+	call->reply[0] = NULL; /* vnode for fid[0] */
+	call->reply[1] = status;
+	call->reply[2] = callback;
+	call->reply[3] = volsync;
+	call->expected_version = 1; /* vnode->status.data_version */
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp = xdr_encode_u32(bp, YFSFETCHSTATUS);
+	bp = xdr_encode_u32(bp, 0); /* RPC flags */
+	bp = xdr_encode_YFSFid(bp, fid);
+	yfs_check_req(call, bp);
+
+	call->cb_break = fc->cb_break;
+	afs_use_fs_server(call, fc->cbi);
+	trace_afs_make_fs_call(call, fid);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
+
+/*
+ * Deliver reply data to an YFS.InlineBulkStatus call
+ */
+static int yfs_deliver_fs_inline_bulk_status(struct afs_call *call)
+{
+	struct afs_file_status *statuses;
+	struct afs_callback *callbacks;
+	struct afs_vnode *vnode = call->reply[0];
+	const __be32 *bp;
+	u32 tmp;
+	int ret;
+
+	_enter("{%u}", call->unmarshall);
+
+	switch (call->unmarshall) {
+	case 0:
+		afs_extract_to_tmp(call);
+		call->unmarshall++;
+
+		/* Extract the file status count and array in two steps */
+	case 1:
+		_debug("extract status count");
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		tmp = ntohl(call->tmp);
+		_debug("status count: %u/%u", tmp, call->count2);
+		if (tmp != call->count2)
+			return afs_protocol_error(call, -EBADMSG,
+						  afs_eproto_ibulkst_count);
+
+		call->count = 0;
+		call->unmarshall++;
+	more_counts:
+		afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSFetchStatus));
+
+	case 2:
+		_debug("extract status array %u", call->count);
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		bp = call->buffer;
+		statuses = call->reply[1];
+		ret = yfs_decode_status(call, &bp, &statuses[call->count],
+					call->count == 0 ? vnode : NULL,
+					NULL, NULL);
+		if (ret < 0)
+			return ret;
+
+		call->count++;
+		if (call->count < call->count2)
+			goto more_counts;
+
+		call->count = 0;
+		call->unmarshall++;
+		afs_extract_to_tmp(call);
+
+		/* Extract the callback count and array in two steps */
+	case 3:
+		_debug("extract CB count");
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		tmp = ntohl(call->tmp);
+		_debug("CB count: %u", tmp);
+		if (tmp != call->count2)
+			return afs_protocol_error(call, -EBADMSG,
+						  afs_eproto_ibulkst_cb_count);
+		call->count = 0;
+		call->unmarshall++;
+	more_cbs:
+		afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSCallBack));
+
+	case 4:
+		_debug("extract CB array");
+		ret = afs_extract_data(call, true);
+		if (ret < 0)
+			return ret;
+
+		_debug("unmarshall CB array");
+		bp = call->buffer;
+		callbacks = call->reply[2];
+		xdr_decode_YFSCallBack_raw(&bp, &callbacks[call->count]);
+		statuses = call->reply[1];
+		if (call->count == 0 && vnode && statuses[0].abort_code == 0) {
+			bp = call->buffer;
+			xdr_decode_YFSCallBack(call, vnode, &bp);
+		}
+		call->count++;
+		if (call->count < call->count2)
+			goto more_cbs;
+
+		afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSVolSync));
+		call->unmarshall++;
+
+	case 5:
+		ret = afs_extract_data(call, false);
+		if (ret < 0)
+			return ret;
+
+		bp = call->buffer;
+		xdr_decode_YFSVolSync(&bp, call->reply[3]);
+
+		call->unmarshall++;
+
+	case 6:
+		break;
+	}
+
+	_leave(" = 0 [done]");
+	return 0;
+}
+
+/*
+ * FS.InlineBulkStatus operation type
+ */
+static const struct afs_call_type yfs_RXYFSInlineBulkStatus = {
+	.name		= "YFS.InlineBulkStatus",
+	.op		= yfs_FS_InlineBulkStatus,
+	.deliver	= yfs_deliver_fs_inline_bulk_status,
+	.destructor	= afs_flat_call_destructor,
+};
+
+/*
+ * Fetch the status information for up to 1024 files
+ */
+int yfs_fs_inline_bulk_status(struct afs_fs_cursor *fc,
+			      struct afs_net *net,
+			      struct afs_fid *fids,
+			      struct afs_file_status *statuses,
+			      struct afs_callback *callbacks,
+			      unsigned int nr_fids,
+			      struct afs_volsync *volsync)
+{
+	struct afs_call *call;
+	__be32 *bp;
+	int i;
+
+	_enter(",%x,{%llx:%llu},%u",
+	       key_serial(fc->key), fids[0].vid, fids[1].vnode, nr_fids);
+
+	call = afs_alloc_flat_call(net, &yfs_RXYFSInlineBulkStatus,
+				   sizeof(__be32) +
+				   sizeof(__be32) +
+				   sizeof(__be32) +
+				   sizeof(struct yfs_xdr_YFSFid) * nr_fids,
+				   sizeof(struct yfs_xdr_YFSFetchStatus));
+	if (!call) {
+		fc->ac.error = -ENOMEM;
+		return -ENOMEM;
+	}
+
+	call->key = fc->key;
+	call->reply[0] = NULL; /* vnode for fid[0] */
+	call->reply[1] = statuses;
+	call->reply[2] = callbacks;
+	call->reply[3] = volsync;
+	call->count2 = nr_fids;
+
+	/* marshall the parameters */
+	bp = call->request;
+	bp = xdr_encode_u32(bp, YFSINLINEBULKSTATUS);
+	bp = xdr_encode_u32(bp, 0); /* RPCFlags */
+	bp = xdr_encode_u32(bp, nr_fids);
+	for (i = 0; i < nr_fids; i++)
+		bp = xdr_encode_YFSFid(bp, &fids[i]);
+	yfs_check_req(call, bp);
+
+	call->cb_break = fc->cb_break;
+	afs_use_fs_server(call, fc->cbi);
+	trace_afs_make_fs_call(call, &fids[0]);
+	return afs_make_call(&fc->ac, call, GFP_NOFS, false);
+}
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index 8acd56e20a371..ed155042236bb 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -54,6 +54,35 @@ enum afs_fs_operation {
 	afs_FS_StoreData64		= 65538, /* AFS Store file data */
 	afs_FS_GiveUpAllCallBacks	= 65539, /* AFS Give up all our callbacks on a server */
 	afs_FS_GetCapabilities		= 65540, /* AFS Get FS server capabilities */
+
+	yfs_FS_FetchData		= 130,	 /* YFS Fetch file data */
+	yfs_FS_FetchACL			= 64131, /* YFS Fetch file ACL */
+	yfs_FS_FetchStatus		= 64132, /* YFS Fetch file status */
+	yfs_FS_StoreACL			= 64134, /* YFS Store file ACL */
+	yfs_FS_StoreStatus		= 64135, /* YFS Store file status */
+	yfs_FS_RemoveFile		= 64136, /* YFS Remove a file */
+	yfs_FS_CreateFile		= 64137, /* YFS Create a file */
+	yfs_FS_Rename			= 64138, /* YFS Rename or move a file or directory */
+	yfs_FS_Symlink			= 64139, /* YFS Create a symbolic link */
+	yfs_FS_Link			= 64140, /* YFS Create a hard link */
+	yfs_FS_MakeDir			= 64141, /* YFS Create a directory */
+	yfs_FS_RemoveDir		= 64142, /* YFS Remove a directory */
+	yfs_FS_GetVolumeStatus		= 64149, /* YFS Get volume status information */
+	yfs_FS_SetVolumeStatus		= 64150, /* YFS Set volume status information */
+	yfs_FS_SetLock			= 64156, /* YFS Request a file lock */
+	yfs_FS_ExtendLock		= 64157, /* YFS Extend a file lock */
+	yfs_FS_ReleaseLock		= 64158, /* YFS Release a file lock */
+	yfs_FS_Lookup			= 64161, /* YFS lookup file in directory */
+	yfs_FS_FlushCPS			= 64165,
+	yfs_FS_FetchOpaqueACL		= 64168,
+	yfs_FS_WhoAmI			= 64170,
+	yfs_FS_RemoveACL		= 64171,
+	yfs_FS_RemoveFile2		= 64173,
+	yfs_FS_StoreOpaqueACL2		= 64174,
+	yfs_FS_InlineBulkStatus		= 64536, /* YFS Fetch multiple file statuses with errors */
+	yfs_FS_FetchData64		= 64537, /* YFS Fetch file data */
+	yfs_FS_StoreData64		= 64538, /* YFS Store file data */
+	yfs_FS_UpdateSymlink		= 64540,
 };
 
 enum afs_vl_operation {
@@ -156,7 +185,34 @@ enum afs_file_error {
 	EM(afs_FS_FetchData64,			"FS.FetchData64") \
 	EM(afs_FS_StoreData64,			"FS.StoreData64") \
 	EM(afs_FS_GiveUpAllCallBacks,		"FS.GiveUpAllCallBacks") \
-	E_(afs_FS_GetCapabilities,		"FS.GetCapabilities")
+	EM(afs_FS_GetCapabilities,		"FS.GetCapabilities") \
+	EM(yfs_FS_FetchACL,			"YFS.FetchACL") \
+	EM(yfs_FS_FetchStatus,			"YFS.FetchStatus") \
+	EM(yfs_FS_StoreACL,			"YFS.StoreACL") \
+	EM(yfs_FS_StoreStatus,			"YFS.StoreStatus") \
+	EM(yfs_FS_RemoveFile,			"YFS.RemoveFile") \
+	EM(yfs_FS_CreateFile,			"YFS.CreateFile") \
+	EM(yfs_FS_Rename,			"YFS.Rename") \
+	EM(yfs_FS_Symlink,			"YFS.Symlink") \
+	EM(yfs_FS_Link,				"YFS.Link") \
+	EM(yfs_FS_MakeDir,			"YFS.MakeDir") \
+	EM(yfs_FS_RemoveDir,			"YFS.RemoveDir") \
+	EM(yfs_FS_GetVolumeStatus,		"YFS.GetVolumeStatus") \
+	EM(yfs_FS_SetVolumeStatus,		"YFS.SetVolumeStatus") \
+	EM(yfs_FS_SetLock,			"YFS.SetLock") \
+	EM(yfs_FS_ExtendLock,			"YFS.ExtendLock") \
+	EM(yfs_FS_ReleaseLock,			"YFS.ReleaseLock") \
+	EM(yfs_FS_Lookup,			"YFS.Lookup") \
+	EM(yfs_FS_FlushCPS,			"YFS.FlushCPS") \
+	EM(yfs_FS_FetchOpaqueACL,		"YFS.FetchOpaqueACL") \
+	EM(yfs_FS_WhoAmI,			"YFS.WhoAmI") \
+	EM(yfs_FS_RemoveACL,			"YFS.RemoveACL") \
+	EM(yfs_FS_RemoveFile2,			"YFS.RemoveFile2") \
+	EM(yfs_FS_StoreOpaqueACL2,		"YFS.StoreOpaqueACL2") \
+	EM(yfs_FS_InlineBulkStatus,		"YFS.InlineBulkStatus") \
+	EM(yfs_FS_FetchData64,			"YFS.FetchData64") \
+	EM(yfs_FS_StoreData64,			"YFS.StoreData64") \
+	E_(yfs_FS_UpdateSymlink,		"YFS.UpdateSymlink")
 
 #define afs_vl_operations \
 	EM(afs_VL_GetEntryByNameU,		"VL.GetEntryByNameU") \
-- 
GitLab


From 744bcd713a4eabb248246f7deccfad30c579b7f5 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 20 Oct 2018 00:57:58 +0100
Subject: [PATCH 0164/1890] afs: Allow dumping of server cursor on operation
 failure

Provide an option to allow the file or volume location server cursor to be
dumped if the rotation routine falls off the end without managing to
contact a server.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/Kconfig     | 12 ++++++++++
 fs/afs/addr_list.c |  2 ++
 fs/afs/internal.h  |  3 +++
 fs/afs/rotate.c    | 57 ++++++++++++++++++++++++++++++++++++++++++++++
 fs/afs/vl_rotate.c | 53 ++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 127 insertions(+)

diff --git a/fs/afs/Kconfig b/fs/afs/Kconfig
index ebba3b18e5da6..701aaa9b18994 100644
--- a/fs/afs/Kconfig
+++ b/fs/afs/Kconfig
@@ -27,3 +27,15 @@ config AFS_FSCACHE
 	help
 	  Say Y here if you want AFS data to be cached locally on disk through
 	  the generic filesystem cache manager
+
+config AFS_DEBUG_CURSOR
+	bool "AFS server cursor debugging"
+	depends on AFS_FS
+	help
+	  Say Y here to cause the contents of a server cursor to be dumped to
+	  the dmesg log if the server rotation algorithm fails to successfully
+	  contact a server.
+
+	  See <file:Documentation/filesystems/afs.txt> for more information.
+
+	  If unsure, say N.
diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c
index 3f60b40125873..bc5ce31a4ae45 100644
--- a/fs/afs/addr_list.c
+++ b/fs/afs/addr_list.c
@@ -358,6 +358,8 @@ bool afs_iterate_addresses(struct afs_addr_cursor *ac)
 	if (!ac->alist)
 		return false;
 
+	ac->nr_iterations++;
+
 	if (ac->begun) {
 		ac->index++;
 		if (ac->index == ac->alist->nr_addrs)
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index ce79bd514331c..ac9da1e4050ea 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -660,6 +660,7 @@ struct afs_addr_cursor {
 	short			error;
 	bool			begun;		/* T if we've begun iteration */
 	bool			responded;	/* T if the current address responded */
+	unsigned short		nr_iterations;	/* Number of address iterations */
 };
 
 /*
@@ -677,6 +678,7 @@ struct afs_vl_cursor {
 #define AFS_VL_CURSOR_STOP	0x0001		/* Set to cease iteration */
 #define AFS_VL_CURSOR_RETRY	0x0002		/* Set to do a retry */
 #define AFS_VL_CURSOR_RETRIED	0x0004		/* Set if started a retry */
+	unsigned short		nr_iterations;	/* Number of server iterations */
 };
 
 /*
@@ -700,6 +702,7 @@ struct afs_fs_cursor {
 #define AFS_FS_CURSOR_VNOVOL	0x0008		/* Set if seen VNOVOL */
 #define AFS_FS_CURSOR_CUR_ONLY	0x0010		/* Set if current server only (file lock held) */
 #define AFS_FS_CURSOR_NO_VSLEEP	0x0020		/* Set to prevent sleep on VBUSY, VOFFLINE, ... */
+	unsigned short		nr_iterations;	/* Number of server iterations */
 };
 
 /*
diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c
index 41405dde01133..7c4487781637d 100644
--- a/fs/afs/rotate.c
+++ b/fs/afs/rotate.c
@@ -156,6 +156,8 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
 		return false;
 	}
 
+	fc->nr_iterations++;
+
 	/* Evaluate the result of the previous operation, if there was one. */
 	switch (error) {
 	case SHRT_MAX:
@@ -519,6 +521,56 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
 	return false;
 }
 
+/*
+ * Dump cursor state in the case of the error being EDESTADDRREQ.
+ */
+static void afs_dump_edestaddrreq(const struct afs_fs_cursor *fc)
+{
+	static int count;
+	int i;
+
+	if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3)
+		return;
+	count++;
+
+	rcu_read_lock();
+
+	pr_notice("EDESTADDR occurred\n");
+	pr_notice("FC: cbb=%x cbb2=%x fl=%hx err=%hd\n",
+		  fc->cb_break, fc->cb_break_2, fc->flags, fc->error);
+	pr_notice("FC: st=%u ix=%u ni=%u\n",
+		  fc->start, fc->index, fc->nr_iterations);
+
+	if (fc->server_list) {
+		const struct afs_server_list *sl = fc->server_list;
+		pr_notice("FC: SL nr=%u ix=%u vnov=%hx\n",
+			  sl->nr_servers, sl->index, sl->vnovol_mask);
+		for (i = 0; i < sl->nr_servers; i++) {
+			const struct afs_server *s = sl->servers[i].server;
+			pr_notice("FC: server fl=%lx av=%u %pU\n",
+				  s->flags, s->addr_version, &s->uuid);
+			if (s->addresses) {
+				const struct afs_addr_list *a =
+					rcu_dereference(s->addresses);
+				pr_notice("FC:  - av=%u nr=%u/%u/%u ax=%u\n",
+					  a->version,
+					  a->nr_ipv4, a->nr_addrs, a->max_addrs,
+					  a->index);
+				pr_notice("FC:  - pr=%lx yf=%lx\n",
+					  a->probed, a->yfs);
+				if (a == fc->ac.alist)
+					pr_notice("FC:  - current\n");
+			}
+		}
+	}
+
+	pr_notice("AC: as=%u ax=%u ac=%d er=%d b=%u r=%u ni=%u\n",
+		  fc->ac.start, fc->ac.index, fc->ac.abort_code, fc->ac.error,
+		  fc->ac.begun, fc->ac.responded, fc->ac.nr_iterations);
+
+	rcu_read_unlock();
+}
+
 /*
  * Tidy up a filesystem cursor and unlock the vnode.
  */
@@ -526,6 +578,11 @@ int afs_end_vnode_operation(struct afs_fs_cursor *fc)
 {
 	struct afs_net *net = afs_v2net(fc->vnode);
 
+	if (fc->error == -EDESTADDRREQ ||
+	    fc->error == -ENETUNREACH ||
+	    fc->error == -EHOSTUNREACH)
+		afs_dump_edestaddrreq(fc);
+
 	mutex_unlock(&fc->vnode->io_lock);
 
 	afs_end_cursor(&fc->ac);
diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c
index 44a936ad9c7aa..5b99ea7be194a 100644
--- a/fs/afs/vl_rotate.c
+++ b/fs/afs/vl_rotate.c
@@ -83,6 +83,8 @@ bool afs_select_vlserver(struct afs_vl_cursor *vc)
 		return false;
 	}
 
+	vc->nr_iterations++;
+
 	/* Evaluate the result of the previous operation, if there was one. */
 	switch (error) {
 	case SHRT_MAX:
@@ -234,6 +236,52 @@ bool afs_select_vlserver(struct afs_vl_cursor *vc)
 	return false;
 }
 
+/*
+ * Dump cursor state in the case of the error being EDESTADDRREQ.
+ */
+static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc)
+{
+	static int count;
+	int i;
+
+	if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3)
+		return;
+	count++;
+
+	rcu_read_lock();
+	pr_notice("EDESTADDR occurred\n");
+	pr_notice("VC: st=%u ix=%u ni=%hu fl=%hx err=%hd\n",
+		  vc->start, vc->index, vc->nr_iterations, vc->flags, vc->error);
+
+	if (vc->server_list) {
+		const struct afs_vlserver_list *sl = vc->server_list;
+		pr_notice("VC: SL nr=%u ix=%u\n",
+			  sl->nr_servers, sl->index);
+		for (i = 0; i < sl->nr_servers; i++) {
+			const struct afs_vlserver *s = sl->servers[i].server;
+			pr_notice("VC: server fl=%lx %s+%hu\n",
+				  s->flags, s->name, s->port);
+			if (s->addresses) {
+				const struct afs_addr_list *a =
+					rcu_dereference(s->addresses);
+				pr_notice("VC:  - av=%u nr=%u/%u/%u ax=%u\n",
+					  a->version,
+					  a->nr_ipv4, a->nr_addrs, a->max_addrs,
+					  a->index);
+				pr_notice("VC:  - pr=%lx yf=%lx\n",
+					  a->probed, a->yfs);
+				if (a == vc->ac.alist)
+					pr_notice("VC:  - current\n");
+			}
+		}
+	}
+
+	pr_notice("AC: as=%u ax=%u ac=%d er=%d b=%u r=%u ni=%hu\n",
+		  vc->ac.start, vc->ac.index, vc->ac.abort_code, vc->ac.error,
+		  vc->ac.begun, vc->ac.responded, vc->ac.nr_iterations);
+	rcu_read_unlock();
+}
+
 /*
  * Tidy up a volume location server cursor and unlock the vnode.
  */
@@ -241,6 +289,11 @@ int afs_end_vlserver_operation(struct afs_vl_cursor *vc)
 {
 	struct afs_net *net = vc->cell->net;
 
+	if (vc->error == -EDESTADDRREQ ||
+	    vc->error == -ENETUNREACH ||
+	    vc->error == -EHOSTUNREACH)
+		afs_vl_dump_edestaddrreq(vc);
+
 	afs_end_cursor(&vc->ac);
 	afs_put_vlserverlist(net, vc->server_list);
 
-- 
GitLab


From 2feeaf8433c8e68de3d0a06a0ffe7742bcd13c1a Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 20 Oct 2018 00:57:59 +0100
Subject: [PATCH 0165/1890] afs: Eliminate the address pointer from the address
 list cursor

Eliminate the address pointer from the address list cursor as it's
redundant (ac->addrs[ac->index] can be used to find the same address) and
address lists must be replaced rather than being rearranged, so is of
limited value.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/addr_list.c | 2 --
 fs/afs/internal.h  | 1 -
 fs/afs/rxrpc.c     | 2 +-
 fs/afs/server.c    | 2 --
 fs/afs/vl_rotate.c | 2 +-
 fs/afs/volume.c    | 6 +++---
 6 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c
index bc5ce31a4ae45..1536d1d21c335 100644
--- a/fs/afs/addr_list.c
+++ b/fs/afs/addr_list.c
@@ -371,7 +371,6 @@ bool afs_iterate_addresses(struct afs_addr_cursor *ac)
 
 	ac->begun = true;
 	ac->responded = false;
-	ac->addr = &ac->alist->addrs[ac->index];
 	return true;
 }
 
@@ -389,7 +388,6 @@ int afs_end_cursor(struct afs_addr_cursor *ac)
 		afs_put_addrlist(alist);
 	}
 
-	ac->addr = NULL;
 	ac->alist = NULL;
 	ac->begun = false;
 	return ac->error;
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index ac9da1e4050ea..e5b596bd8acf6 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -653,7 +653,6 @@ struct afs_interface {
  */
 struct afs_addr_cursor {
 	struct afs_addr_list	*alist;		/* Current address list (pins ref) */
-	struct sockaddr_rxrpc	*addr;
 	u32			abort_code;
 	unsigned short		start;		/* Starting point in alist->addrs[] */
 	unsigned short		index;		/* Wrapping offset from start to current addr */
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 444ba0d511ef7..42e1ea7372e96 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -359,7 +359,7 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg)
 long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
 		   gfp_t gfp, bool async)
 {
-	struct sockaddr_rxrpc *srx = ac->addr;
+	struct sockaddr_rxrpc *srx = &ac->alist->addrs[ac->index];
 	struct rxrpc_call *rxcall;
 	struct msghdr msg;
 	struct kvec iov[1];
diff --git a/fs/afs/server.c b/fs/afs/server.c
index aa35cfae54400..7c1be8b4dc9a2 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -367,7 +367,6 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
 		.alist	= alist,
 		.start	= alist->index,
 		.index	= 0,
-		.addr	= &alist->addrs[alist->index],
 		.error	= 0,
 	};
 	_enter("%p", server);
@@ -518,7 +517,6 @@ static bool afs_do_probe_fileserver(struct afs_fs_cursor *fc)
 
 	_enter("");
 
-	fc->ac.addr = NULL;
 	fc->ac.start = READ_ONCE(fc->ac.alist->index);
 	fc->ac.index = fc->ac.start;
 	fc->ac.error = 0;
diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c
index 5b99ea7be194a..ead6dedbb5612 100644
--- a/fs/afs/vl_rotate.c
+++ b/fs/afs/vl_rotate.c
@@ -209,7 +209,7 @@ bool afs_select_vlserver(struct afs_vl_cursor *vc)
 	if (!afs_iterate_addresses(&vc->ac))
 		goto next_server;
 
-	_leave(" = t %pISpc", &vc->ac.addr->transport);
+	_leave(" = t %pISpc", &vc->ac.alist->addrs[vc->ac.index].transport);
 	return true;
 
 next_server:
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index f0020e35bf6fe..7527c081726e0 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -88,16 +88,16 @@ static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell,
 			case VL_SERVICE:
 				clear_bit(vc.ac.index, &vc.ac.alist->yfs);
 				set_bit(vc.ac.index, &vc.ac.alist->probed);
-				vc.ac.addr->srx_service = ret;
+				vc.ac.alist->addrs[vc.ac.index].srx_service = ret;
 				break;
 			case YFS_VL_SERVICE:
 				set_bit(vc.ac.index, &vc.ac.alist->yfs);
 				set_bit(vc.ac.index, &vc.ac.alist->probed);
-				vc.ac.addr->srx_service = ret;
+				vc.ac.alist->addrs[vc.ac.index].srx_service = ret;
 				break;
 			}
 		}
-		
+
 		vldb = afs_vl_get_entry_by_name_u(&vc, volname, volnamesz);
 	}
 
-- 
GitLab


From 18ac61853cc4e44eb30e125fc8344a3b25c7b6fe Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 20 Oct 2018 00:57:59 +0100
Subject: [PATCH 0166/1890] afs: Fix callback handling

In some circumstances, the callback interest pointer is NULL, so in such a
case we can't dereference it when checking to see if the callback is
broken.  This causes an oops in some circumstances.

Fix this by replacing the function that worked out the aggregate break
counter with one that actually does the comparison, and then make that
return true (ie. broken) if there is no callback interest as yet (ie. the
pointer is NULL).

Fixes: 68251f0a6818 ("afs: Fix whole-volume callback handling")
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/fsclient.c  | 2 +-
 fs/afs/internal.h  | 9 ++++++---
 fs/afs/security.c  | 7 ++++---
 fs/afs/yfsclient.c | 2 +-
 4 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 3975969719de6..7c75a1813321c 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -269,7 +269,7 @@ static void xdr_decode_AFSCallBack(struct afs_call *call,
 
 	write_seqlock(&vnode->cb_lock);
 
-	if (call->cb_break == afs_cb_break_sum(vnode, cbi)) {
+	if (!afs_cb_is_broken(call->cb_break, vnode, cbi)) {
 		vnode->cb_version	= ntohl(*bp++);
 		cb_expiry		= ntohl(*bp++);
 		vnode->cb_type		= ntohl(*bp++);
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index e5b596bd8acf6..b60d15212975a 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -776,10 +776,13 @@ static inline unsigned int afs_calc_vnode_cb_break(struct afs_vnode *vnode)
 	return vnode->cb_break + vnode->cb_s_break + vnode->cb_v_break;
 }
 
-static inline unsigned int afs_cb_break_sum(struct afs_vnode *vnode,
-					    struct afs_cb_interest *cbi)
+static inline bool afs_cb_is_broken(unsigned int cb_break,
+				    const struct afs_vnode *vnode,
+				    const struct afs_cb_interest *cbi)
 {
-	return vnode->cb_break + cbi->server->cb_s_break + vnode->volume->cb_v_break;
+	return !cbi || cb_break != (vnode->cb_break +
+				    cbi->server->cb_s_break +
+				    vnode->volume->cb_v_break);
 }
 
 /*
diff --git a/fs/afs/security.c b/fs/afs/security.c
index d1ae53fd37399..5f58a9a17e694 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -147,7 +147,8 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
 					break;
 				}
 
-				if (cb_break != afs_cb_break_sum(vnode, vnode->cb_interest)) {
+				if (afs_cb_is_broken(cb_break, vnode,
+						     vnode->cb_interest)) {
 					changed = true;
 					break;
 				}
@@ -177,7 +178,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
 		}
 	}
 
-	if (cb_break != afs_cb_break_sum(vnode, vnode->cb_interest))
+	if (afs_cb_is_broken(cb_break, vnode, vnode->cb_interest))
 		goto someone_else_changed_it;
 
 	/* We need a ref on any permits list we want to copy as we'll have to
@@ -256,7 +257,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key,
 
 	spin_lock(&vnode->lock);
 	zap = rcu_access_pointer(vnode->permit_cache);
-	if (cb_break == afs_cb_break_sum(vnode, vnode->cb_interest) &&
+	if (!afs_cb_is_broken(cb_break, vnode, vnode->cb_interest) &&
 	    zap == permits)
 		rcu_assign_pointer(vnode->permit_cache, replacement);
 	else
diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c
index d5e3f00950406..12658c1363ae4 100644
--- a/fs/afs/yfsclient.c
+++ b/fs/afs/yfsclient.c
@@ -324,7 +324,7 @@ static void xdr_decode_YFSCallBack(struct afs_call *call,
 
 	write_seqlock(&vnode->cb_lock);
 
-	if (call->cb_break == afs_cb_break_sum(vnode, cbi)) {
+	if (!afs_cb_is_broken(call->cb_break, vnode, cbi)) {
 		cb_expiry = xdr_to_u64(xdr->expiration_time);
 		do_div(cb_expiry, 10 * 1000 * 1000);
 		vnode->cb_version	= ntohl(xdr->version);
-- 
GitLab


From 3bf0fb6f33dd545693da5e65f5b1b9b9f0bfc35e Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 20 Oct 2018 00:57:59 +0100
Subject: [PATCH 0167/1890] afs: Probe multiple fileservers simultaneously

Send probes to all the unprobed fileservers in a fileserver list on all
addresses simultaneously in an attempt to find out the fastest route whilst
not getting stuck for 20s on any server or address that we don't get a
reply from.

This alleviates the problem whereby attempting to access a new server can
take a long time because the rotation algorithm ends up rotating through
all servers and addresses until it finds one that responds.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/Makefile            |   4 +-
 fs/afs/addr_list.c         |  40 ++++--
 fs/afs/cmservice.c         | 129 +++++++++++++-----
 fs/afs/fs_probe.c          | 270 ++++++++++++++++++++++++++++++++++++
 fs/afs/fsclient.c          |  27 ++--
 fs/afs/internal.h          |  98 ++++++++++---
 fs/afs/proc.c              |   6 +-
 fs/afs/rotate.c            | 174 +++++++++++++++--------
 fs/afs/rxrpc.c             |  44 +++---
 fs/afs/server.c            | 109 +--------------
 fs/afs/server_list.c       |   6 +-
 fs/afs/vl_list.c           |   6 +-
 fs/afs/vl_probe.c          | 273 +++++++++++++++++++++++++++++++++++++
 fs/afs/vl_rotate.c         | 159 +++++++++++++--------
 fs/afs/vlclient.c          |  35 +++--
 fs/afs/volume.c            |  16 ---
 include/trace/events/afs.h |   4 +-
 17 files changed, 1050 insertions(+), 350 deletions(-)
 create mode 100644 fs/afs/fs_probe.c
 create mode 100644 fs/afs/vl_probe.c

diff --git a/fs/afs/Makefile b/fs/afs/Makefile
index cc942b790cffd..0738e2bf51936 100644
--- a/fs/afs/Makefile
+++ b/fs/afs/Makefile
@@ -17,6 +17,7 @@ kafs-y := \
 	file.o \
 	flock.o \
 	fsclient.o \
+	fs_probe.o \
 	inode.o \
 	main.o \
 	misc.o \
@@ -29,8 +30,9 @@ kafs-y := \
 	super.o \
 	netdevices.o \
 	vlclient.o \
-	vl_rotate.o \
 	vl_list.o \
+	vl_probe.o \
+	vl_rotate.o \
 	volume.o \
 	write.o \
 	xattr.o \
diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c
index 1536d1d21c335..967db336d11ae 100644
--- a/fs/afs/addr_list.c
+++ b/fs/afs/addr_list.c
@@ -303,6 +303,8 @@ void afs_merge_fs_addr4(struct afs_addr_list *alist, __be32 xdr, u16 port)
 			sizeof(alist->addrs[0]) * (alist->nr_addrs - i));
 
 	srx = &alist->addrs[i];
+	srx->srx_family = AF_RXRPC;
+	srx->transport_type = SOCK_DGRAM;
 	srx->transport_len = sizeof(srx->transport.sin);
 	srx->transport.sin.sin_family = AF_INET;
 	srx->transport.sin.sin_port = htons(port);
@@ -341,6 +343,8 @@ void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port)
 			sizeof(alist->addrs[0]) * (alist->nr_addrs - i));
 
 	srx = &alist->addrs[i];
+	srx->srx_family = AF_RXRPC;
+	srx->transport_type = SOCK_DGRAM;
 	srx->transport_len = sizeof(srx->transport.sin6);
 	srx->transport.sin6.sin6_family = AF_INET6;
 	srx->transport.sin6.sin6_port = htons(port);
@@ -353,23 +357,32 @@ void afs_merge_fs_addr6(struct afs_addr_list *alist, __be32 *xdr, u16 port)
  */
 bool afs_iterate_addresses(struct afs_addr_cursor *ac)
 {
-	_enter("%hu+%hd", ac->start, (short)ac->index);
+	unsigned long set, failed;
+	int index;
 
 	if (!ac->alist)
 		return false;
 
+	set = ac->alist->responded;
+	failed = ac->alist->failed;
+	_enter("%lx-%lx-%lx,%d", set, failed, ac->tried, ac->index);
+
 	ac->nr_iterations++;
 
-	if (ac->begun) {
-		ac->index++;
-		if (ac->index == ac->alist->nr_addrs)
-			ac->index = 0;
+	set &= ~(failed | ac->tried);
 
-		if (ac->index == ac->start)
-			return false;
-	}
+	if (!set)
+		return false;
+
+	index = READ_ONCE(ac->alist->preferred);
+	if (test_bit(index, &set))
+		goto selected;
+
+	index = __ffs(set);
 
-	ac->begun = true;
+selected:
+	ac->index = index;
+	set_bit(index, &ac->tried);
 	ac->responded = false;
 	return true;
 }
@@ -383,12 +396,13 @@ int afs_end_cursor(struct afs_addr_cursor *ac)
 
 	alist = ac->alist;
 	if (alist) {
-		if (ac->responded && ac->index != ac->start)
-			WRITE_ONCE(alist->index, ac->index);
+		if (ac->responded &&
+		    ac->index != alist->preferred &&
+		    test_bit(ac->alist->preferred, &ac->tried))
+			WRITE_ONCE(alist->preferred, ac->index);
 		afs_put_addrlist(alist);
+		ac->alist = NULL;
 	}
 
-	ac->alist = NULL;
-	ac->begun = false;
 	return ac->error;
 }
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 8cf8d10daa6c1..8ee5972893ed5 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -122,6 +122,8 @@ bool afs_cm_incoming_call(struct afs_call *call)
 {
 	_enter("{%u, CB.OP %u}", call->service_id, call->operation_ID);
 
+	call->epoch = rxrpc_kernel_get_epoch(call->net->socket, call->rxcall);
+
 	switch (call->operation_ID) {
 	case CBCallBack:
 		call->type = &afs_SRXCBCallBack;
@@ -151,6 +153,91 @@ bool afs_cm_incoming_call(struct afs_call *call)
 	}
 }
 
+/*
+ * Record a probe to the cache manager from a server.
+ */
+static int afs_record_cm_probe(struct afs_call *call, struct afs_server *server)
+{
+	_enter("");
+
+	if (test_bit(AFS_SERVER_FL_HAVE_EPOCH, &server->flags) &&
+	    !test_bit(AFS_SERVER_FL_PROBING, &server->flags)) {
+		if (server->cm_epoch == call->epoch)
+			return 0;
+
+		if (!server->probe.said_rebooted) {
+			pr_notice("kAFS: FS rebooted %pU\n", &server->uuid);
+			server->probe.said_rebooted = true;
+		}
+	}
+
+	spin_lock(&server->probe_lock);
+
+	if (!test_bit(AFS_SERVER_FL_HAVE_EPOCH, &server->flags)) {
+		server->cm_epoch = call->epoch;
+		server->probe.cm_epoch = call->epoch;
+		goto out;
+	}
+
+	if (server->probe.cm_probed &&
+	    call->epoch != server->probe.cm_epoch &&
+	    !server->probe.said_inconsistent) {
+		pr_notice("kAFS: FS endpoints inconsistent %pU\n",
+			  &server->uuid);
+		server->probe.said_inconsistent = true;
+	}
+
+	if (!server->probe.cm_probed || call->epoch == server->cm_epoch)
+		server->probe.cm_epoch = server->cm_epoch;
+
+out:
+	server->probe.cm_probed = true;
+	spin_unlock(&server->probe_lock);
+	return 0;
+}
+
+/*
+ * Find the server record by peer address and record a probe to the cache
+ * manager from a server.
+ */
+static int afs_find_cm_server_by_peer(struct afs_call *call)
+{
+	struct sockaddr_rxrpc srx;
+	struct afs_server *server;
+
+	rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
+
+	server = afs_find_server(call->net, &srx);
+	if (!server) {
+		trace_afs_cm_no_server(call, &srx);
+		return 0;
+	}
+
+	call->cm_server = server;
+	return afs_record_cm_probe(call, server);
+}
+
+/*
+ * Find the server record by server UUID and record a probe to the cache
+ * manager from a server.
+ */
+static int afs_find_cm_server_by_uuid(struct afs_call *call,
+				      struct afs_uuid *uuid)
+{
+	struct afs_server *server;
+
+	rcu_read_lock();
+	server = afs_find_server_by_uuid(call->net, call->request);
+	rcu_read_unlock();
+	if (!server) {
+		trace_afs_cm_no_server_u(call, call->request);
+		return 0;
+	}
+
+	call->cm_server = server;
+	return afs_record_cm_probe(call, server);
+}
+
 /*
  * Clean up a cache manager call.
  */
@@ -187,7 +274,6 @@ static void SRXAFSCB_CallBack(struct work_struct *work)
 static int afs_deliver_cb_callback(struct afs_call *call)
 {
 	struct afs_callback_break *cb;
-	struct sockaddr_rxrpc srx;
 	__be32 *bp;
 	int ret, loop;
 
@@ -276,12 +362,7 @@ static int afs_deliver_cb_callback(struct afs_call *call)
 
 	/* we'll need the file server record as that tells us which set of
 	 * vnodes to operate upon */
-	rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
-	call->cm_server = afs_find_server(call->net, &srx);
-	if (!call->cm_server)
-		trace_afs_cm_no_server(call, &srx);
-
-	return afs_queue_call_work(call);
+	return afs_find_cm_server_by_peer(call);
 }
 
 /*
@@ -305,13 +386,10 @@ static void SRXAFSCB_InitCallBackState(struct work_struct *work)
  */
 static int afs_deliver_cb_init_call_back_state(struct afs_call *call)
 {
-	struct sockaddr_rxrpc srx;
 	int ret;
 
 	_enter("");
 
-	rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
-
 	afs_extract_discard(call, 0);
 	ret = afs_extract_data(call, false);
 	if (ret < 0)
@@ -319,11 +397,7 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call)
 
 	/* we'll need the file server record as that tells us which set of
 	 * vnodes to operate upon */
-	call->cm_server = afs_find_server(call->net, &srx);
-	if (!call->cm_server)
-		trace_afs_cm_no_server(call, &srx);
-
-	return afs_queue_call_work(call);
+	return afs_find_cm_server_by_peer(call);
 }
 
 /*
@@ -384,13 +458,7 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
 
 	/* we'll need the file server record as that tells us which set of
 	 * vnodes to operate upon */
-	rcu_read_lock();
-	call->cm_server = afs_find_server_by_uuid(call->net, call->request);
-	rcu_read_unlock();
-	if (!call->cm_server)
-		trace_afs_cm_no_server_u(call, call->request);
-
-	return afs_queue_call_work(call);
+	return afs_find_cm_server_by_uuid(call, call->request);
 }
 
 /*
@@ -422,8 +490,7 @@ static int afs_deliver_cb_probe(struct afs_call *call)
 
 	if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
 		return afs_io_error(call, afs_io_error_cm_reply);
-
-	return afs_queue_call_work(call);
+	return afs_find_cm_server_by_peer(call);
 }
 
 /*
@@ -503,8 +570,7 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call)
 
 	if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
 		return afs_io_error(call, afs_io_error_cm_reply);
-
-	return afs_queue_call_work(call);
+	return afs_find_cm_server_by_uuid(call, call->request);
 }
 
 /*
@@ -586,8 +652,7 @@ static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call)
 
 	if (!afs_check_call_state(call, AFS_CALL_SV_REPLYING))
 		return afs_io_error(call, afs_io_error_cm_reply);
-
-	return afs_queue_call_work(call);
+	return afs_find_cm_server_by_peer(call);
 }
 
 /*
@@ -596,7 +661,6 @@ static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call)
 static int afs_deliver_yfs_cb_callback(struct afs_call *call)
 {
 	struct afs_callback_break *cb;
-	struct sockaddr_rxrpc srx;
 	struct yfs_xdr_YFSFid *bp;
 	size_t size;
 	int ret, loop;
@@ -664,10 +728,5 @@ static int afs_deliver_yfs_cb_callback(struct afs_call *call)
 	/* We'll need the file server record as that tells us which set of
 	 * vnodes to operate upon.
 	 */
-	rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx);
-	call->cm_server = afs_find_server(call->net, &srx);
-	if (!call->cm_server)
-		trace_afs_cm_no_server(call, &srx);
-
-	return afs_queue_call_work(call);
+	return afs_find_cm_server_by_peer(call);
 }
diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c
new file mode 100644
index 0000000000000..d049cb4597425
--- /dev/null
+++ b/fs/afs/fs_probe.c
@@ -0,0 +1,270 @@
+/* AFS fileserver probing
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include "afs_fs.h"
+#include "internal.h"
+#include "protocol_yfs.h"
+
+static bool afs_fs_probe_done(struct afs_server *server)
+{
+	if (!atomic_dec_and_test(&server->probe_outstanding))
+		return false;
+
+	wake_up_var(&server->probe_outstanding);
+	clear_bit_unlock(AFS_SERVER_FL_PROBING, &server->flags);
+	wake_up_bit(&server->flags, AFS_SERVER_FL_PROBING);
+	return true;
+}
+
+/*
+ * Process the result of probing a fileserver.  This is called after successful
+ * or failed delivery of an FS.GetCapabilities operation.
+ */
+void afs_fileserver_probe_result(struct afs_call *call)
+{
+	struct afs_addr_list *alist = call->alist;
+	struct afs_server *server = call->reply[0];
+	unsigned int server_index = (long)call->reply[1];
+	unsigned int index = call->addr_ix;
+	unsigned int rtt = UINT_MAX;
+	bool have_result = false;
+	u64 _rtt;
+	int ret = call->error;
+
+	_enter("%pU,%u", &server->uuid, index);
+
+	spin_lock(&server->probe_lock);
+
+	switch (ret) {
+	case 0:
+		server->probe.error = 0;
+		goto responded;
+	case -ECONNABORTED:
+		if (!server->probe.responded) {
+			server->probe.abort_code = call->abort_code;
+			server->probe.error = ret;
+		}
+		goto responded;
+	case -ENOMEM:
+	case -ENONET:
+		server->probe.local_failure = true;
+		afs_io_error(call, afs_io_error_fs_probe_fail);
+		goto out;
+	case -ECONNRESET: /* Responded, but call expired. */
+	case -ENETUNREACH:
+	case -EHOSTUNREACH:
+	case -ECONNREFUSED:
+	case -ETIMEDOUT:
+	case -ETIME:
+	default:
+		clear_bit(index, &alist->responded);
+		set_bit(index, &alist->failed);
+		if (!server->probe.responded &&
+		    (server->probe.error == 0 ||
+		     server->probe.error == -ETIMEDOUT ||
+		     server->probe.error == -ETIME))
+			server->probe.error = ret;
+		afs_io_error(call, afs_io_error_fs_probe_fail);
+		goto out;
+	}
+
+responded:
+	set_bit(index, &alist->responded);
+	clear_bit(index, &alist->failed);
+
+	if (call->service_id == YFS_FS_SERVICE) {
+		server->probe.is_yfs = true;
+		set_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
+		alist->addrs[index].srx_service = call->service_id;
+	} else {
+		server->probe.not_yfs = true;
+		if (!server->probe.is_yfs) {
+			clear_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
+			alist->addrs[index].srx_service = call->service_id;
+		}
+	}
+
+	/* Get the RTT and scale it to fit into a 32-bit value that represents
+	 * over a minute of time so that we can access it with one instruction
+	 * on a 32-bit system.
+	 */
+	_rtt = rxrpc_kernel_get_rtt(call->net->socket, call->rxcall);
+	_rtt /= 64;
+	rtt = (_rtt > UINT_MAX) ? UINT_MAX : _rtt;
+	if (rtt < server->probe.rtt) {
+		server->probe.rtt = rtt;
+		alist->preferred = index;
+		have_result = true;
+	}
+
+	smp_wmb(); /* Set rtt before responded. */
+	server->probe.responded = true;
+	set_bit(AFS_SERVER_FL_PROBED, &server->flags);
+out:
+	spin_unlock(&server->probe_lock);
+
+	_debug("probe [%u][%u] %pISpc rtt=%u ret=%d",
+	       server_index, index, &alist->addrs[index].transport,
+	       (unsigned int)rtt, ret);
+
+	have_result |= afs_fs_probe_done(server);
+	if (have_result) {
+		server->probe.have_result = true;
+		wake_up_var(&server->probe.have_result);
+		wake_up_all(&server->probe_wq);
+	}
+}
+
+/*
+ * Probe all of a fileserver's addresses to find out the best route and to
+ * query its capabilities.
+ */
+static int afs_do_probe_fileserver(struct afs_net *net,
+				   struct afs_server *server,
+				   struct key *key,
+				   unsigned int server_index)
+{
+	struct afs_addr_cursor ac = {
+		.index = 0,
+	};
+	int ret;
+
+	_enter("%pU", &server->uuid);
+
+	read_lock(&server->fs_lock);
+	ac.alist = rcu_dereference_protected(server->addresses,
+					     lockdep_is_held(&server->fs_lock));
+	read_unlock(&server->fs_lock);
+
+	atomic_set(&server->probe_outstanding, ac.alist->nr_addrs);
+	memset(&server->probe, 0, sizeof(server->probe));
+	server->probe.rtt = UINT_MAX;
+
+	for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++) {
+		ret = afs_fs_get_capabilities(net, server, &ac, key, server_index,
+					      true);
+		if (ret != -EINPROGRESS) {
+			afs_fs_probe_done(server);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * Send off probes to all unprobed servers.
+ */
+int afs_probe_fileservers(struct afs_net *net, struct key *key,
+			  struct afs_server_list *list)
+{
+	struct afs_server *server;
+	int i, ret;
+
+	for (i = 0; i < list->nr_servers; i++) {
+		server = list->servers[i].server;
+		if (test_bit(AFS_SERVER_FL_PROBED, &server->flags))
+			continue;
+
+		if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &server->flags)) {
+			ret = afs_do_probe_fileserver(net, server, key, i);
+			if (ret)
+				return ret;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * Wait for the first as-yet untried fileserver to respond.
+ */
+int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried)
+{
+	struct wait_queue_entry *waits;
+	struct afs_server *server;
+	unsigned int rtt = UINT_MAX;
+	bool have_responders = false;
+	int pref = -1, i;
+
+	_enter("%u,%lx", slist->nr_servers, untried);
+
+	/* Only wait for servers that have a probe outstanding. */
+	for (i = 0; i < slist->nr_servers; i++) {
+		if (test_bit(i, &untried)) {
+			server = slist->servers[i].server;
+			if (!test_bit(AFS_SERVER_FL_PROBING, &server->flags))
+				__clear_bit(i, &untried);
+			if (server->probe.responded)
+				have_responders = true;
+		}
+	}
+	if (have_responders || !untried)
+		return 0;
+
+	waits = kmalloc(array_size(slist->nr_servers, sizeof(*waits)), GFP_KERNEL);
+	if (!waits)
+		return -ENOMEM;
+
+	for (i = 0; i < slist->nr_servers; i++) {
+		if (test_bit(i, &untried)) {
+			server = slist->servers[i].server;
+			init_waitqueue_entry(&waits[i], current);
+			add_wait_queue(&server->probe_wq, &waits[i]);
+		}
+	}
+
+	for (;;) {
+		bool still_probing = false;
+
+		set_current_state(TASK_INTERRUPTIBLE);
+		for (i = 0; i < slist->nr_servers; i++) {
+			if (test_bit(i, &untried)) {
+				server = slist->servers[i].server;
+				if (server->probe.responded)
+					goto stop;
+				if (test_bit(AFS_SERVER_FL_PROBING, &server->flags))
+					still_probing = true;
+			}
+		}
+
+		if (!still_probing || unlikely(signal_pending(current)))
+			goto stop;
+		schedule();
+	}
+
+stop:
+	set_current_state(TASK_RUNNING);
+
+	for (i = 0; i < slist->nr_servers; i++) {
+		if (test_bit(i, &untried)) {
+			server = slist->servers[i].server;
+			if (server->probe.responded &&
+			    server->probe.rtt < rtt) {
+				pref = i;
+				rtt = server->probe.rtt;
+			}
+
+			remove_wait_queue(&server->probe_wq, &waits[i]);
+		}
+	}
+
+	kfree(waits);
+
+	if (pref == -1 && signal_pending(current))
+		return -ERESTARTSYS;
+
+	if (pref >= 0)
+		slist->preferred = pref;
+	return 0;
+}
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 7c75a1813321c..ca08c83168f5f 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -2006,7 +2006,6 @@ int afs_fs_give_up_all_callbacks(struct afs_net *net,
  */
 static int afs_deliver_fs_get_capabilities(struct afs_call *call)
 {
-	struct afs_server *server = call->reply[0];
 	u32 count;
 	int ret;
 
@@ -2042,15 +2041,18 @@ static int afs_deliver_fs_get_capabilities(struct afs_call *call)
 		break;
 	}
 
-	if (call->service_id == YFS_FS_SERVICE)
-		set_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
-	else
-		clear_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
-
 	_leave(" = 0 [done]");
 	return 0;
 }
 
+static void afs_destroy_fs_get_capabilities(struct afs_call *call)
+{
+	struct afs_server *server = call->reply[0];
+
+	afs_put_server(call->net, server);
+	afs_flat_call_destructor(call);
+}
+
 /*
  * FS.GetCapabilities operation type
  */
@@ -2058,7 +2060,8 @@ static const struct afs_call_type afs_RXFSGetCapabilities = {
 	.name		= "FS.GetCapabilities",
 	.op		= afs_FS_GetCapabilities,
 	.deliver	= afs_deliver_fs_get_capabilities,
-	.destructor	= afs_flat_call_destructor,
+	.done		= afs_fileserver_probe_result,
+	.destructor	= afs_destroy_fs_get_capabilities,
 };
 
 /*
@@ -2068,7 +2071,9 @@ static const struct afs_call_type afs_RXFSGetCapabilities = {
 int afs_fs_get_capabilities(struct afs_net *net,
 			    struct afs_server *server,
 			    struct afs_addr_cursor *ac,
-			    struct key *key)
+			    struct key *key,
+			    unsigned int server_index,
+			    bool async)
 {
 	struct afs_call *call;
 	__be32 *bp;
@@ -2080,8 +2085,10 @@ int afs_fs_get_capabilities(struct afs_net *net,
 		return -ENOMEM;
 
 	call->key = key;
-	call->reply[0] = server;
+	call->reply[0] = afs_get_server(server);
+	call->reply[1] = (void *)(long)server_index;
 	call->upgrade = true;
+	call->want_reply_time = true;
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -2089,7 +2096,7 @@ int afs_fs_get_capabilities(struct afs_net *net,
 
 	/* Can't take a ref on server */
 	trace_afs_make_fs_call(call, NULL);
-	return afs_make_call(ac, call, GFP_NOFS, false);
+	return afs_make_call(ac, call, GFP_NOFS, async);
 }
 
 /*
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index b60d15212975a..5da3b09b75186 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -76,12 +76,13 @@ struct afs_addr_list {
 	u32			version;	/* Version */
 	unsigned char		max_addrs;
 	unsigned char		nr_addrs;
-	unsigned char		index;		/* Address currently in use */
+	unsigned char		preferred;	/* Preferred address */
 	unsigned char		nr_ipv4;	/* Number of IPv4 addresses */
 	enum dns_record_source	source:8;
 	enum dns_lookup_status	status:8;
 	unsigned long		probed;		/* Mask of servers that have been probed */
-	unsigned long		yfs;		/* Mask of servers that are YFS */
+	unsigned long		failed;		/* Mask of addrs that failed locally/ICMP */
+	unsigned long		responded;	/* Mask of addrs that responded */
 	struct sockaddr_rxrpc	addrs[];
 #define AFS_MAX_ADDRESSES ((unsigned int)(sizeof(unsigned long) * 8))
 };
@@ -91,6 +92,7 @@ struct afs_addr_list {
  */
 struct afs_call {
 	const struct afs_call_type *type;	/* type of call */
+	struct afs_addr_list	*alist;		/* Address is alist[addr_ix] */
 	wait_queue_head_t	waitq;		/* processes awaiting completion */
 	struct work_struct	async_work;	/* async I/O processor */
 	struct work_struct	work;		/* actual work processor */
@@ -116,6 +118,7 @@ struct afs_call {
 	spinlock_t		state_lock;
 	int			error;		/* error code */
 	u32			abort_code;	/* Remote abort ID or 0 */
+	u32			epoch;
 	unsigned		request_size;	/* size of request data */
 	unsigned		reply_max;	/* maximum size of reply */
 	unsigned		first_offset;	/* offset into mapping[first] */
@@ -125,13 +128,14 @@ struct afs_call {
 		unsigned	count2;		/* count used in unmarshalling */
 	};
 	unsigned char		unmarshall;	/* unmarshalling phase */
+	unsigned char		addr_ix;	/* Address in ->alist */
 	bool			incoming;	/* T if incoming call */
 	bool			send_pages;	/* T if data from mapping should be sent */
 	bool			need_attention;	/* T if RxRPC poked us */
 	bool			async;		/* T if asynchronous */
 	bool			ret_reply0;	/* T if should return reply[0] on success */
 	bool			upgrade;	/* T to request service upgrade */
-	bool			want_reply_time;	/* T if want reply_time */
+	bool			want_reply_time; /* T if want reply_time */
 	u16			service_id;	/* Actual service ID (after upgrade) */
 	unsigned int		debug_id;	/* Trace ID */
 	u32			operation_ID;	/* operation ID for an incoming call */
@@ -162,6 +166,9 @@ struct afs_call_type {
 
 	/* Work function */
 	void (*work)(struct work_struct *work);
+
+	/* Call done function (gets called immediately on success or failure) */
+	void (*done)(struct afs_call *call);
 };
 
 /*
@@ -376,10 +383,27 @@ struct afs_vlserver {
 	unsigned long		flags;
 #define AFS_VLSERVER_FL_PROBED	0		/* The VL server has been probed */
 #define AFS_VLSERVER_FL_PROBING	1		/* VL server is being probed */
+#define AFS_VLSERVER_FL_IS_YFS	2		/* Server is YFS not AFS */
 	rwlock_t		lock;		/* Lock on addresses */
 	atomic_t		usage;
-	u16			name_len;	/* Length of name */
+
+	/* Probe state */
+	wait_queue_head_t	probe_wq;
+	atomic_t		probe_outstanding;
+	spinlock_t		probe_lock;
+	struct {
+		unsigned int	rtt;		/* RTT as ktime/64 */
+		u32		abort_code;
+		short		error;
+		bool		have_result;
+		bool		responded:1;
+		bool		is_yfs:1;
+		bool		not_yfs:1;
+		bool		local_failure:1;
+	} probe;
+
 	u16			port;
+	u16			name_len;	/* Length of name */
 	char			name[];		/* Server name, case-flattened */
 };
 
@@ -399,6 +423,7 @@ struct afs_vlserver_list {
 	atomic_t		usage;
 	u8			nr_servers;
 	u8			index;		/* Server currently in use */
+	u8			preferred;	/* Preferred server */
 	enum dns_record_source	source:8;
 	enum dns_lookup_status	status:8;
 	rwlock_t		lock;
@@ -461,8 +486,10 @@ struct afs_server {
 #define AFS_SERVER_FL_MAY_HAVE_CB 8		/* May have callbacks on this fileserver */
 #define AFS_SERVER_FL_IS_YFS	9		/* Server is YFS not AFS */
 #define AFS_SERVER_FL_NO_RM2	10		/* Fileserver doesn't support YFS.RemoveFile2 */
+#define AFS_SERVER_FL_HAVE_EPOCH 11		/* ->epoch is valid */
 	atomic_t		usage;
 	u32			addr_version;	/* Address list version */
+	u32			cm_epoch;	/* Server RxRPC epoch */
 
 	/* file service access */
 	rwlock_t		fs_lock;	/* access lock */
@@ -471,6 +498,26 @@ struct afs_server {
 	struct hlist_head	cb_volumes;	/* List of volume interests on this server */
 	unsigned		cb_s_break;	/* Break-everything counter. */
 	rwlock_t		cb_break_lock;	/* Volume finding lock */
+
+	/* Probe state */
+	wait_queue_head_t	probe_wq;
+	atomic_t		probe_outstanding;
+	spinlock_t		probe_lock;
+	struct {
+		unsigned int	rtt;		/* RTT as ktime/64 */
+		u32		abort_code;
+		u32		cm_epoch;
+		short		error;
+		bool		have_result;
+		bool		responded:1;
+		bool		is_yfs:1;
+		bool		not_yfs:1;
+		bool		local_failure:1;
+		bool		no_epoch:1;
+		bool		cm_probed:1;
+		bool		said_rebooted:1;
+		bool		said_inconsistent:1;
+	} probe;
 };
 
 /*
@@ -505,8 +552,8 @@ struct afs_server_entry {
 
 struct afs_server_list {
 	refcount_t		usage;
-	unsigned short		nr_servers;
-	unsigned short		index;		/* Server currently in use */
+	unsigned char		nr_servers;
+	unsigned char		preferred;	/* Preferred server */
 	unsigned short		vnovol_mask;	/* Servers to be skipped due to VNOVOL */
 	unsigned int		seq;		/* Set to ->servers_seq when installed */
 	rwlock_t		lock;
@@ -653,13 +700,12 @@ struct afs_interface {
  */
 struct afs_addr_cursor {
 	struct afs_addr_list	*alist;		/* Current address list (pins ref) */
-	u32			abort_code;
-	unsigned short		start;		/* Starting point in alist->addrs[] */
-	unsigned short		index;		/* Wrapping offset from start to current addr */
-	short			error;
-	bool			begun;		/* T if we've begun iteration */
+	unsigned long		tried;		/* Tried addresses */
+	signed char		index;		/* Current address */
 	bool			responded;	/* T if the current address responded */
 	unsigned short		nr_iterations;	/* Number of address iterations */
+	short			error;
+	u32			abort_code;
 };
 
 /*
@@ -669,9 +715,10 @@ struct afs_vl_cursor {
 	struct afs_addr_cursor	ac;
 	struct afs_cell		*cell;		/* The cell we're querying */
 	struct afs_vlserver_list *server_list;	/* Current server list (pins ref) */
+	struct afs_vlserver	*server;	/* Server on which this resides */
 	struct key		*key;		/* Key for the server */
-	unsigned char		start;		/* Initial index in server list */
-	unsigned char		index;		/* Number of servers tried beyond start */
+	unsigned long		untried;	/* Bitmask of untried servers */
+	short			index;		/* Current server */
 	short			error;
 	unsigned short		flags;
 #define AFS_VL_CURSOR_STOP	0x0001		/* Set to cease iteration */
@@ -689,10 +736,10 @@ struct afs_fs_cursor {
 	struct afs_server_list	*server_list;	/* Current server list (pins ref) */
 	struct afs_cb_interest	*cbi;		/* Server on which this resides (pins ref) */
 	struct key		*key;		/* Key for the server */
+	unsigned long		untried;	/* Bitmask of untried servers */
 	unsigned int		cb_break;	/* cb_break + cb_s_break before the call */
 	unsigned int		cb_break_2;	/* cb_break + cb_s_break (2nd vnode) */
-	unsigned char		start;		/* Initial index in server list */
-	unsigned char		index;		/* Number of servers tried beyond start */
+	short			index;		/* Current server */
 	short			error;
 	unsigned short		flags;
 #define AFS_FS_CURSOR_STOP	0x0001		/* Set to cease iteration */
@@ -888,7 +935,7 @@ extern int afs_fs_release_lock(struct afs_fs_cursor *);
 extern int afs_fs_give_up_all_callbacks(struct afs_net *, struct afs_server *,
 					struct afs_addr_cursor *, struct key *);
 extern int afs_fs_get_capabilities(struct afs_net *, struct afs_server *,
-				   struct afs_addr_cursor *, struct key *);
+				   struct afs_addr_cursor *, struct key *, unsigned int, bool);
 extern int afs_fs_inline_bulk_status(struct afs_fs_cursor *, struct afs_net *,
 				     struct afs_fid *, struct afs_file_status *,
 				     struct afs_callback *, unsigned int,
@@ -897,6 +944,13 @@ extern int afs_fs_fetch_status(struct afs_fs_cursor *, struct afs_net *,
 			       struct afs_fid *, struct afs_file_status *,
 			       struct afs_callback *, struct afs_volsync *);
 
+/*
+ * fs_probe.c
+ */
+extern void afs_fileserver_probe_result(struct afs_call *);
+extern int afs_probe_fileservers(struct afs_net *, struct key *, struct afs_server_list *);
+extern int afs_wait_for_fs_probes(struct afs_server_list *, unsigned long);
+
 /*
  * inode.c
  */
@@ -1013,7 +1067,6 @@ extern int __net_init afs_open_socket(struct afs_net *);
 extern void __net_exit afs_close_socket(struct afs_net *);
 extern void afs_charge_preallocation(struct work_struct *);
 extern void afs_put_call(struct afs_call *);
-extern int afs_queue_call_work(struct afs_call *);
 extern long afs_make_call(struct afs_addr_cursor *, struct afs_call *, gfp_t, bool);
 extern struct afs_call *afs_alloc_flat_call(struct afs_net *,
 					    const struct afs_call_type *,
@@ -1130,7 +1183,6 @@ extern void afs_put_server(struct afs_net *, struct afs_server *);
 extern void afs_manage_servers(struct work_struct *);
 extern void afs_servers_timer(struct timer_list *);
 extern void __net_exit afs_purge_servers(struct afs_net *);
-extern bool afs_probe_fileserver(struct afs_fs_cursor *);
 extern bool afs_check_server_record(struct afs_fs_cursor *, struct afs_server *);
 
 /*
@@ -1160,9 +1212,17 @@ extern void afs_fs_exit(void);
 extern struct afs_vldb_entry *afs_vl_get_entry_by_name_u(struct afs_vl_cursor *,
 							 const char *, int);
 extern struct afs_addr_list *afs_vl_get_addrs_u(struct afs_vl_cursor *, const uuid_t *);
-extern int afs_vl_get_capabilities(struct afs_net *, struct afs_addr_cursor *, struct key *);
+extern int afs_vl_get_capabilities(struct afs_net *, struct afs_addr_cursor *, struct key *,
+				   struct afs_vlserver *, unsigned int, bool);
 extern struct afs_addr_list *afs_yfsvl_get_endpoints(struct afs_vl_cursor *, const uuid_t *);
 
+/*
+ * vl_probe.c
+ */
+extern void afs_vlserver_probe_result(struct afs_call *);
+extern int afs_send_vl_probes(struct afs_net *, struct key *, struct afs_vlserver_list *);
+extern int afs_wait_for_vl_probes(struct afs_vlserver_list *, unsigned long);
+
 /*
  * vl_rotate.c
  */
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index d887f822f4eb5..be2ee3bbd0a95 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -312,7 +312,7 @@ static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v)
 	if (alist) {
 		for (i = 0; i < alist->nr_addrs; i++)
 			seq_printf(m, " %c %pISpc\n",
-				   alist->index == i ? '>' : '-',
+				   alist->preferred == i ? '>' : '-',
 				   &alist->addrs[i].transport);
 	}
 	return 0;
@@ -391,11 +391,11 @@ static int afs_proc_servers_show(struct seq_file *m, void *v)
 		   &server->uuid,
 		   atomic_read(&server->usage),
 		   &alist->addrs[0].transport,
-		   alist->index == 0 ? "*" : "");
+		   alist->preferred == 0 ? "*" : "");
 	for (i = 1; i < alist->nr_addrs; i++)
 		seq_printf(m, "                                         %pISpc%s\n",
 			   &alist->addrs[i].transport,
-			   alist->index == i ? "*" : "");
+			   alist->preferred == i ? "*" : "");
 	return 0;
 }
 
diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c
index 7c4487781637d..00504254c1c24 100644
--- a/fs/afs/rotate.c
+++ b/fs/afs/rotate.c
@@ -18,14 +18,6 @@
 #include "internal.h"
 #include "afs_fs.h"
 
-/*
- * Initialise a filesystem server cursor for iterating over FS servers.
- */
-static void afs_init_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode)
-{
-	memset(fc, 0, sizeof(*fc));
-}
-
 /*
  * Begin an operation on the fileserver.
  *
@@ -35,7 +27,7 @@ static void afs_init_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode
 bool afs_begin_vnode_operation(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
 			       struct key *key)
 {
-	afs_init_fs_cursor(fc, vnode);
+	memset(fc, 0, sizeof(*fc));
 	fc->vnode = vnode;
 	fc->key = key;
 	fc->ac.error = SHRT_MAX;
@@ -66,12 +58,15 @@ static bool afs_start_fs_iteration(struct afs_fs_cursor *fc,
 	fc->server_list = afs_get_serverlist(vnode->volume->servers);
 	read_unlock(&vnode->volume->servers_lock);
 
+	fc->untried = (1UL << fc->server_list->nr_servers) - 1;
+	fc->index = READ_ONCE(fc->server_list->preferred);
+
 	cbi = vnode->cb_interest;
 	if (cbi) {
 		/* See if the vnode's preferred record is still available */
 		for (i = 0; i < fc->server_list->nr_servers; i++) {
 			if (fc->server_list->servers[i].cb_interest == cbi) {
-				fc->start = i;
+				fc->index = i;
 				goto found_interest;
 			}
 		}
@@ -95,12 +90,9 @@ static bool afs_start_fs_iteration(struct afs_fs_cursor *fc,
 
 		afs_put_cb_interest(afs_v2net(vnode), cbi);
 		cbi = NULL;
-	} else {
-		fc->start = READ_ONCE(fc->server_list->index);
 	}
 
 found_interest:
-	fc->index = fc->start;
 	return true;
 }
 
@@ -144,11 +136,12 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
 	struct afs_addr_list *alist;
 	struct afs_server *server;
 	struct afs_vnode *vnode = fc->vnode;
-	int error = fc->ac.error;
+	u32 rtt, abort_code;
+	int error = fc->ac.error, i;
 
-	_enter("%u/%u,%u/%u,%d,%d",
-	       fc->index, fc->start,
-	       fc->ac.index, fc->ac.start,
+	_enter("%lx[%d],%lx[%d],%d,%d",
+	       fc->untried, fc->index,
+	       fc->ac.tried, fc->ac.index,
 	       error, fc->ac.abort_code);
 
 	if (fc->flags & AFS_FS_CURSOR_STOP) {
@@ -345,8 +338,50 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
 	if (!afs_start_fs_iteration(fc, vnode))
 		goto failed;
 
-use_server:
-	_debug("use");
+	_debug("__ VOL %llx __", vnode->volume->vid);
+	error = afs_probe_fileservers(afs_v2net(vnode), fc->key, fc->server_list);
+	if (error < 0)
+		goto failed_set_error;
+
+pick_server:
+	_debug("pick [%lx]", fc->untried);
+
+	error = afs_wait_for_fs_probes(fc->server_list, fc->untried);
+	if (error < 0)
+		goto failed_set_error;
+
+	/* Pick the untried server with the lowest RTT.  If we have outstanding
+	 * callbacks, we stick with the server we're already using if we can.
+	 */
+	if (fc->cbi) {
+		_debug("cbi %u", fc->index);
+		if (test_bit(fc->index, &fc->untried))
+			goto selected_server;
+		afs_put_cb_interest(afs_v2net(vnode), fc->cbi);
+		fc->cbi = NULL;
+		_debug("nocbi");
+	}
+
+	fc->index = -1;
+	rtt = U32_MAX;
+	for (i = 0; i < fc->server_list->nr_servers; i++) {
+		struct afs_server *s = fc->server_list->servers[i].server;
+
+		if (!test_bit(i, &fc->untried) || !s->probe.responded)
+			continue;
+		if (s->probe.rtt < rtt) {
+			fc->index = i;
+			rtt = s->probe.rtt;
+		}
+	}
+
+	if (fc->index == -1)
+		goto no_more_servers;
+
+selected_server:
+	_debug("use %d", fc->index);
+	__clear_bit(fc->index, &fc->untried);
+
 	/* We're starting on a different fileserver from the list.  We need to
 	 * check it, create a callback intercept, find its address list and
 	 * probe its capabilities before we use it.
@@ -379,60 +414,81 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
 
 	memset(&fc->ac, 0, sizeof(fc->ac));
 
-	/* Probe the current fileserver if we haven't done so yet. */
-	if (!test_bit(AFS_SERVER_FL_PROBED, &server->flags)) {
-		fc->ac.alist = afs_get_addrlist(alist);
-
-		if (!afs_probe_fileserver(fc)) {
-			switch (fc->ac.error) {
-			case -ENOMEM:
-			case -ERESTARTSYS:
-			case -EINTR:
-				goto failed;
-			default:
-				goto next_server;
-			}
-		}
-	}
-
 	if (!fc->ac.alist)
 		fc->ac.alist = alist;
 	else
 		afs_put_addrlist(alist);
 
-	fc->ac.start = READ_ONCE(alist->index);
-	fc->ac.index = fc->ac.start;
+	fc->ac.index = -1;
 
 iterate_address:
 	ASSERT(fc->ac.alist);
-	_debug("iterate %d/%d", fc->ac.index, fc->ac.alist->nr_addrs);
 	/* Iterate over the current server's address list to try and find an
 	 * address on which it will respond to us.
 	 */
 	if (!afs_iterate_addresses(&fc->ac))
 		goto next_server;
 
+	_debug("address [%u] %u/%u", fc->index, fc->ac.index, fc->ac.alist->nr_addrs);
+
 	_leave(" = t");
 	return true;
 
 next_server:
 	_debug("next");
 	afs_end_cursor(&fc->ac);
-	afs_put_cb_interest(afs_v2net(vnode), fc->cbi);
-	fc->cbi = NULL;
-	fc->index++;
-	if (fc->index >= fc->server_list->nr_servers)
-		fc->index = 0;
-	if (fc->index != fc->start)
-		goto use_server;
+	goto pick_server;
 
+no_more_servers:
 	/* That's all the servers poked to no good effect.  Try again if some
 	 * of them were busy.
 	 */
 	if (fc->flags & AFS_FS_CURSOR_VBUSY)
 		goto restart_from_beginning;
 
-	goto failed;
+	abort_code = 0;
+	error = -EDESTADDRREQ;
+	for (i = 0; i < fc->server_list->nr_servers; i++) {
+		struct afs_server *s = fc->server_list->servers[i].server;
+		int probe_error = READ_ONCE(s->probe.error);
+
+		switch (probe_error) {
+		case 0:
+			continue;
+		default:
+			if (error == -ETIMEDOUT ||
+			    error == -ETIME)
+				continue;
+		case -ETIMEDOUT:
+		case -ETIME:
+			if (error == -ENOMEM ||
+			    error == -ENONET)
+				continue;
+		case -ENOMEM:
+		case -ENONET:
+			if (error == -ENETUNREACH)
+				continue;
+		case -ENETUNREACH:
+			if (error == -EHOSTUNREACH)
+				continue;
+		case -EHOSTUNREACH:
+			if (error == -ECONNREFUSED)
+				continue;
+		case -ECONNREFUSED:
+			if (error == -ECONNRESET)
+				continue;
+		case -ECONNRESET: /* Responded, but call expired. */
+			if (error == -ECONNABORTED)
+				continue;
+		case -ECONNABORTED:
+			abort_code = s->probe.abort_code;
+			error = probe_error;
+			continue;
+		}
+	}
+
+	if (error == -ECONNABORTED)
+		error = afs_abort_to_error(abort_code);
 
 failed_set_error:
 	fc->error = error;
@@ -480,8 +536,7 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
 
 		memset(&fc->ac, 0, sizeof(fc->ac));
 		fc->ac.alist = alist;
-		fc->ac.start = READ_ONCE(alist->index);
-		fc->ac.index = fc->ac.start;
+		fc->ac.index = -1;
 		goto iterate_address;
 
 	case 0:
@@ -538,13 +593,13 @@ static void afs_dump_edestaddrreq(const struct afs_fs_cursor *fc)
 	pr_notice("EDESTADDR occurred\n");
 	pr_notice("FC: cbb=%x cbb2=%x fl=%hx err=%hd\n",
 		  fc->cb_break, fc->cb_break_2, fc->flags, fc->error);
-	pr_notice("FC: st=%u ix=%u ni=%u\n",
-		  fc->start, fc->index, fc->nr_iterations);
+	pr_notice("FC: ut=%lx ix=%d ni=%u\n",
+		  fc->untried, fc->index, fc->nr_iterations);
 
 	if (fc->server_list) {
 		const struct afs_server_list *sl = fc->server_list;
-		pr_notice("FC: SL nr=%u ix=%u vnov=%hx\n",
-			  sl->nr_servers, sl->index, sl->vnovol_mask);
+		pr_notice("FC: SL nr=%u pr=%u vnov=%hx\n",
+			  sl->nr_servers, sl->preferred, sl->vnovol_mask);
 		for (i = 0; i < sl->nr_servers; i++) {
 			const struct afs_server *s = sl->servers[i].server;
 			pr_notice("FC: server fl=%lx av=%u %pU\n",
@@ -552,22 +607,21 @@ static void afs_dump_edestaddrreq(const struct afs_fs_cursor *fc)
 			if (s->addresses) {
 				const struct afs_addr_list *a =
 					rcu_dereference(s->addresses);
-				pr_notice("FC:  - av=%u nr=%u/%u/%u ax=%u\n",
+				pr_notice("FC:  - av=%u nr=%u/%u/%u pr=%u\n",
 					  a->version,
 					  a->nr_ipv4, a->nr_addrs, a->max_addrs,
-					  a->index);
-				pr_notice("FC:  - pr=%lx yf=%lx\n",
-					  a->probed, a->yfs);
+					  a->preferred);
+				pr_notice("FC:  - pr=%lx R=%lx F=%lx\n",
+					  a->probed, a->responded, a->failed);
 				if (a == fc->ac.alist)
 					pr_notice("FC:  - current\n");
 			}
 		}
 	}
 
-	pr_notice("AC: as=%u ax=%u ac=%d er=%d b=%u r=%u ni=%u\n",
-		  fc->ac.start, fc->ac.index, fc->ac.abort_code, fc->ac.error,
-		  fc->ac.begun, fc->ac.responded, fc->ac.nr_iterations);
-
+	pr_notice("AC: t=%lx ax=%u ac=%d er=%d r=%u ni=%u\n",
+		  fc->ac.tried, fc->ac.index, fc->ac.abort_code, fc->ac.error,
+		  fc->ac.responded, fc->ac.nr_iterations);
 	rcu_read_unlock();
 }
 
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 42e1ea7372e96..59970886690f1 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -43,7 +43,6 @@ int afs_open_socket(struct afs_net *net)
 	struct sockaddr_rxrpc srx;
 	struct socket *socket;
 	unsigned int min_level;
-	u16 service_upgrade[2];
 	int ret;
 
 	_enter("");
@@ -82,13 +81,12 @@ int afs_open_socket(struct afs_net *net)
 	if (ret < 0)
 		goto error_2;
 
-	service_upgrade[0] = CM_SERVICE;
-	service_upgrade[1] = YFS_CM_SERVICE;
-	ret = kernel_setsockopt(socket, SOL_RXRPC, RXRPC_UPGRADEABLE_SERVICE,
-				(void *)service_upgrade, sizeof(service_upgrade));
-	if (ret < 0)
-		goto error_2;
-
+	/* Ideally, we'd turn on service upgrade here, but we can't because
+	 * OpenAFS is buggy and leaks the userStatus field from packet to
+	 * packet and between FS packets and CB packets - so if we try to do an
+	 * upgrade on an FS packet, OpenAFS will leak that into the CB packet
+	 * it sends back to us.
+	 */
 
 	rxrpc_kernel_new_call_notification(socket, afs_rx_new_call,
 					   afs_rx_discard_new_call);
@@ -192,6 +190,7 @@ void afs_put_call(struct afs_call *call)
 
 		afs_put_server(call->net, call->cm_server);
 		afs_put_cb_interest(call->net, call->cbi);
+		afs_put_addrlist(call->alist);
 		kfree(call->request);
 
 		trace_afs_call(call, afs_call_trace_free, 0, o,
@@ -205,21 +204,22 @@ void afs_put_call(struct afs_call *call)
 }
 
 /*
- * Queue the call for actual work.  Returns 0 unconditionally for convenience.
+ * Queue the call for actual work.
  */
-int afs_queue_call_work(struct afs_call *call)
+static void afs_queue_call_work(struct afs_call *call)
 {
-	int u = atomic_inc_return(&call->usage);
+	if (call->type->work) {
+		int u = atomic_inc_return(&call->usage);
 
-	trace_afs_call(call, afs_call_trace_work, u,
-		       atomic_read(&call->net->nr_outstanding_calls),
-		       __builtin_return_address(0));
+		trace_afs_call(call, afs_call_trace_work, u,
+			       atomic_read(&call->net->nr_outstanding_calls),
+			       __builtin_return_address(0));
 
-	INIT_WORK(&call->work, call->type->work);
+		INIT_WORK(&call->work, call->type->work);
 
-	if (!queue_work(afs_wq, &call->work))
-		afs_put_call(call);
-	return 0;
+		if (!queue_work(afs_wq, &call->work))
+			afs_put_call(call);
+	}
 }
 
 /*
@@ -376,6 +376,8 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
 	       atomic_read(&call->net->nr_outstanding_calls));
 
 	call->async = async;
+	call->addr_ix = ac->index;
+	call->alist = afs_get_addrlist(ac->alist);
 
 	/* Work out the length we're going to transmit.  This is awkward for
 	 * calls such as FS.StoreData where there's an extra injection of data
@@ -407,6 +409,7 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
 					 call->debug_id);
 	if (IS_ERR(rxcall)) {
 		ret = PTR_ERR(rxcall);
+		call->error = ret;
 		goto error_kill_call;
 	}
 
@@ -458,6 +461,8 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
 	call->error = ret;
 	trace_afs_call_done(call);
 error_kill_call:
+	if (call->type->done)
+		call->type->done(call);
 	afs_put_call(call);
 	ac->error = ret;
 	_leave(" = %d", ret);
@@ -509,6 +514,7 @@ static void afs_deliver_to_call(struct afs_call *call)
 		state = READ_ONCE(call->state);
 		switch (ret) {
 		case 0:
+			afs_queue_call_work(call);
 			if (state == AFS_CALL_CL_PROC_REPLY) {
 				if (call->cbi)
 					set_bit(AFS_SERVER_FL_MAY_HAVE_CB,
@@ -546,6 +552,8 @@ static void afs_deliver_to_call(struct afs_call *call)
 	}
 
 done:
+	if (call->type->done)
+		call->type->done(call);
 	if (state == AFS_CALL_COMPLETE && call->incoming)
 		afs_put_call(call);
 out:
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 7c1be8b4dc9a2..642afa2e9783c 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -231,6 +231,8 @@ static struct afs_server *afs_alloc_server(struct afs_net *net,
 	rwlock_init(&server->fs_lock);
 	INIT_HLIST_HEAD(&server->cb_volumes);
 	rwlock_init(&server->cb_break_lock);
+	init_waitqueue_head(&server->probe_wq);
+	spin_lock_init(&server->probe_lock);
 
 	afs_inc_servers_outstanding(net);
 	_leave(" = %p", server);
@@ -254,7 +256,7 @@ static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell,
 	ret = -ERESTARTSYS;
 	if (afs_begin_vlserver_operation(&vc, cell, key)) {
 		while (afs_select_vlserver(&vc)) {
-			if (test_bit(vc.ac.index, &vc.ac.alist->yfs))
+			if (test_bit(AFS_VLSERVER_FL_IS_YFS, &vc.server->flags))
 				alist = afs_yfsvl_get_endpoints(&vc, uuid);
 			else
 				alist = afs_vl_get_addrs_u(&vc, uuid);
@@ -365,8 +367,7 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
 	struct afs_addr_list *alist = rcu_access_pointer(server->addresses);
 	struct afs_addr_cursor ac = {
 		.alist	= alist,
-		.start	= alist->index,
-		.index	= 0,
+		.index	= alist->preferred,
 		.error	= 0,
 	};
 	_enter("%p", server);
@@ -374,6 +375,9 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
 	if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags))
 		afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
 
+	wait_var_event(&server->probe_outstanding,
+		       atomic_read(&server->probe_outstanding) == 0);
+
 	call_rcu(&server->rcu, afs_server_rcu);
 	afs_dec_servers_outstanding(net);
 }
@@ -506,105 +510,6 @@ void afs_purge_servers(struct afs_net *net)
 	_leave("");
 }
 
-/*
- * Probe a fileserver to find its capabilities.
- *
- * TODO: Try service upgrade.
- */
-static bool afs_do_probe_fileserver(struct afs_fs_cursor *fc)
-{
-	int i;
-
-	_enter("");
-
-	fc->ac.start = READ_ONCE(fc->ac.alist->index);
-	fc->ac.index = fc->ac.start;
-	fc->ac.error = 0;
-	fc->ac.begun = false;
-
-	while (afs_iterate_addresses(&fc->ac)) {
-		afs_fs_get_capabilities(afs_v2net(fc->vnode), fc->cbi->server,
-					&fc->ac, fc->key);
-		switch (fc->ac.error) {
-		case 0:
-			if (test_bit(AFS_SERVER_FL_IS_YFS, &fc->cbi->server->flags)) {
-				for (i = 0; i < fc->ac.alist->nr_addrs; i++)
-					fc->ac.alist->addrs[i].srx_service =
-						YFS_FS_SERVICE;
-			}
-			afs_end_cursor(&fc->ac);
-			set_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags);
-			return true;
-		case -ECONNABORTED:
-			fc->ac.error = afs_abort_to_error(fc->ac.abort_code);
-			goto error;
-		case -ENOMEM:
-		case -ENONET:
-			goto error;
-		case -ENETUNREACH:
-		case -EHOSTUNREACH:
-		case -ECONNREFUSED:
-		case -ETIMEDOUT:
-		case -ETIME:
-			break;
-		default:
-			fc->ac.error = afs_io_error(NULL, afs_io_error_fs_probe_fail);
-			goto error;
-		}
-	}
-
-error:
-	afs_end_cursor(&fc->ac);
-	return false;
-}
-
-/*
- * If we haven't already, try probing the fileserver to get its capabilities.
- * We try not to instigate parallel probes, but it's possible that the parallel
- * probes will fail due to authentication failure when ours would succeed.
- *
- * TODO: Try sending an anonymous probe if an authenticated probe fails.
- */
-bool afs_probe_fileserver(struct afs_fs_cursor *fc)
-{
-	bool success;
-	int ret, retries = 0;
-
-	_enter("");
-
-retry:
-	if (test_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags)) {
-		_leave(" = t");
-		return true;
-	}
-
-	if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags)) {
-		success = afs_do_probe_fileserver(fc);
-		clear_bit_unlock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags);
-		wake_up_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING);
-		_leave(" = t");
-		return success;
-	}
-
-	_debug("wait");
-	ret = wait_on_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING,
-			  TASK_INTERRUPTIBLE);
-	if (ret == -ERESTARTSYS) {
-		fc->ac.error = ret;
-		_leave(" = f [%d]", ret);
-		return false;
-	}
-
-	retries++;
-	if (retries == 4) {
-		fc->ac.error = -ESTALE;
-		_leave(" = f [stale]");
-		return false;
-	}
-	_debug("retry");
-	goto retry;
-}
-
 /*
  * Get an update for a server's address list.
  */
diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c
index 8a5760aa58321..95d0761cdb34e 100644
--- a/fs/afs/server_list.c
+++ b/fs/afs/server_list.c
@@ -118,11 +118,11 @@ bool afs_annotate_server_list(struct afs_server_list *new,
 	return false;
 
 changed:
-	/* Maintain the same current server as before if possible. */
-	cur = old->servers[old->index].server;
+	/* Maintain the same preferred server as before if possible. */
+	cur = old->servers[old->preferred].server;
 	for (j = 0; j < new->nr_servers; j++) {
 		if (new->servers[j].server == cur) {
-			new->index = j;
+			new->preferred = j;
 			break;
 		}
 	}
diff --git a/fs/afs/vl_list.c b/fs/afs/vl_list.c
index c1e316ba105a9..b4f1a84519b95 100644
--- a/fs/afs/vl_list.c
+++ b/fs/afs/vl_list.c
@@ -23,6 +23,8 @@ struct afs_vlserver *afs_alloc_vlserver(const char *name, size_t name_len,
 	if (vlserver) {
 		atomic_set(&vlserver->usage, 1);
 		rwlock_init(&vlserver->lock);
+		init_waitqueue_head(&vlserver->probe_wq);
+		spin_lock_init(&vlserver->probe_lock);
 		vlserver->name_len = name_len;
 		vlserver->port = port;
 		memcpy(vlserver->name, name, name_len);
@@ -141,7 +143,7 @@ static struct afs_addr_list *afs_extract_vl_addrs(const u8 **_b, const u8 *end,
 
 	/* Start with IPv6 if available. */
 	if (alist->nr_ipv4 < alist->nr_addrs)
-		alist->index = alist->nr_ipv4;
+		alist->preferred = alist->nr_ipv4;
 
 	*_b = b;
 	return alist;
@@ -307,6 +309,8 @@ struct afs_vlserver_list *afs_extract_vlserver_list(struct afs_cell *cell,
 				(vllist->nr_servers - j) * sizeof(struct afs_vlserver_entry));
 		}
 
+		clear_bit(AFS_VLSERVER_FL_PROBED, &server->flags);
+
 		vllist->servers[j].priority = bs.priority;
 		vllist->servers[j].weight = bs.weight;
 		vllist->servers[j].server = server;
diff --git a/fs/afs/vl_probe.c b/fs/afs/vl_probe.c
new file mode 100644
index 0000000000000..c0f616bd70cba
--- /dev/null
+++ b/fs/afs/vl_probe.c
@@ -0,0 +1,273 @@
+/* AFS vlserver probing
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include "afs_fs.h"
+#include "internal.h"
+#include "protocol_yfs.h"
+
+static bool afs_vl_probe_done(struct afs_vlserver *server)
+{
+	if (!atomic_dec_and_test(&server->probe_outstanding))
+		return false;
+
+	wake_up_var(&server->probe_outstanding);
+	clear_bit_unlock(AFS_VLSERVER_FL_PROBING, &server->flags);
+	wake_up_bit(&server->flags, AFS_VLSERVER_FL_PROBING);
+	return true;
+}
+
+/*
+ * Process the result of probing a vlserver.  This is called after successful
+ * or failed delivery of an VL.GetCapabilities operation.
+ */
+void afs_vlserver_probe_result(struct afs_call *call)
+{
+	struct afs_addr_list *alist = call->alist;
+	struct afs_vlserver *server = call->reply[0];
+	unsigned int server_index = (long)call->reply[1];
+	unsigned int index = call->addr_ix;
+	unsigned int rtt = UINT_MAX;
+	bool have_result = false;
+	u64 _rtt;
+	int ret = call->error;
+
+	_enter("%s,%u,%u,%d,%d", server->name, server_index, index, ret, call->abort_code);
+
+	spin_lock(&server->probe_lock);
+
+	switch (ret) {
+	case 0:
+		server->probe.error = 0;
+		goto responded;
+	case -ECONNABORTED:
+		if (!server->probe.responded) {
+			server->probe.abort_code = call->abort_code;
+			server->probe.error = ret;
+		}
+		goto responded;
+	case -ENOMEM:
+	case -ENONET:
+		server->probe.local_failure = true;
+		afs_io_error(call, afs_io_error_vl_probe_fail);
+		goto out;
+	case -ECONNRESET: /* Responded, but call expired. */
+	case -ENETUNREACH:
+	case -EHOSTUNREACH:
+	case -ECONNREFUSED:
+	case -ETIMEDOUT:
+	case -ETIME:
+	default:
+		clear_bit(index, &alist->responded);
+		set_bit(index, &alist->failed);
+		if (!server->probe.responded &&
+		    (server->probe.error == 0 ||
+		     server->probe.error == -ETIMEDOUT ||
+		     server->probe.error == -ETIME))
+			server->probe.error = ret;
+		afs_io_error(call, afs_io_error_vl_probe_fail);
+		goto out;
+	}
+
+responded:
+	set_bit(index, &alist->responded);
+	clear_bit(index, &alist->failed);
+
+	if (call->service_id == YFS_VL_SERVICE) {
+		server->probe.is_yfs = true;
+		set_bit(AFS_VLSERVER_FL_IS_YFS, &server->flags);
+		alist->addrs[index].srx_service = call->service_id;
+	} else {
+		server->probe.not_yfs = true;
+		if (!server->probe.is_yfs) {
+			clear_bit(AFS_VLSERVER_FL_IS_YFS, &server->flags);
+			alist->addrs[index].srx_service = call->service_id;
+		}
+	}
+
+	/* Get the RTT and scale it to fit into a 32-bit value that represents
+	 * over a minute of time so that we can access it with one instruction
+	 * on a 32-bit system.
+	 */
+	_rtt = rxrpc_kernel_get_rtt(call->net->socket, call->rxcall);
+	_rtt /= 64;
+	rtt = (_rtt > UINT_MAX) ? UINT_MAX : _rtt;
+	if (rtt < server->probe.rtt) {
+		server->probe.rtt = rtt;
+		alist->preferred = index;
+		have_result = true;
+	}
+
+	smp_wmb(); /* Set rtt before responded. */
+	server->probe.responded = true;
+	set_bit(AFS_VLSERVER_FL_PROBED, &server->flags);
+out:
+	spin_unlock(&server->probe_lock);
+
+	_debug("probe [%u][%u] %pISpc rtt=%u ret=%d",
+	       server_index, index, &alist->addrs[index].transport,
+	       (unsigned int)rtt, ret);
+
+	have_result |= afs_vl_probe_done(server);
+	if (have_result) {
+		server->probe.have_result = true;
+		wake_up_var(&server->probe.have_result);
+		wake_up_all(&server->probe_wq);
+	}
+}
+
+/*
+ * Probe all of a vlserver's addresses to find out the best route and to
+ * query its capabilities.
+ */
+static int afs_do_probe_vlserver(struct afs_net *net,
+				 struct afs_vlserver *server,
+				 struct key *key,
+				 unsigned int server_index)
+{
+	struct afs_addr_cursor ac = {
+		.index = 0,
+	};
+	int ret;
+
+	_enter("%s", server->name);
+
+	read_lock(&server->lock);
+	ac.alist = rcu_dereference_protected(server->addresses,
+					     lockdep_is_held(&server->lock));
+	read_unlock(&server->lock);
+
+	atomic_set(&server->probe_outstanding, ac.alist->nr_addrs);
+	memset(&server->probe, 0, sizeof(server->probe));
+	server->probe.rtt = UINT_MAX;
+
+	for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++) {
+		ret = afs_vl_get_capabilities(net, &ac, key, server,
+					      server_index, true);
+		if (ret != -EINPROGRESS) {
+			afs_vl_probe_done(server);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * Send off probes to all unprobed servers.
+ */
+int afs_send_vl_probes(struct afs_net *net, struct key *key,
+		       struct afs_vlserver_list *vllist)
+{
+	struct afs_vlserver *server;
+	int i, ret;
+
+	for (i = 0; i < vllist->nr_servers; i++) {
+		server = vllist->servers[i].server;
+		if (test_bit(AFS_VLSERVER_FL_PROBED, &server->flags))
+			continue;
+
+		if (!test_and_set_bit_lock(AFS_VLSERVER_FL_PROBING, &server->flags)) {
+			ret = afs_do_probe_vlserver(net, server, key, i);
+			if (ret)
+				return ret;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * Wait for the first as-yet untried server to respond.
+ */
+int afs_wait_for_vl_probes(struct afs_vlserver_list *vllist,
+			   unsigned long untried)
+{
+	struct wait_queue_entry *waits;
+	struct afs_vlserver *server;
+	unsigned int rtt = UINT_MAX;
+	bool have_responders = false;
+	int pref = -1, i;
+
+	_enter("%u,%lx", vllist->nr_servers, untried);
+
+	/* Only wait for servers that have a probe outstanding. */
+	for (i = 0; i < vllist->nr_servers; i++) {
+		if (test_bit(i, &untried)) {
+			server = vllist->servers[i].server;
+			if (!test_bit(AFS_VLSERVER_FL_PROBING, &server->flags))
+				__clear_bit(i, &untried);
+			if (server->probe.responded)
+				have_responders = true;
+		}
+	}
+	if (have_responders || !untried)
+		return 0;
+
+	waits = kmalloc(array_size(vllist->nr_servers, sizeof(*waits)), GFP_KERNEL);
+	if (!waits)
+		return -ENOMEM;
+
+	for (i = 0; i < vllist->nr_servers; i++) {
+		if (test_bit(i, &untried)) {
+			server = vllist->servers[i].server;
+			init_waitqueue_entry(&waits[i], current);
+			add_wait_queue(&server->probe_wq, &waits[i]);
+		}
+	}
+
+	for (;;) {
+		bool still_probing = false;
+
+		set_current_state(TASK_INTERRUPTIBLE);
+		for (i = 0; i < vllist->nr_servers; i++) {
+			if (test_bit(i, &untried)) {
+				server = vllist->servers[i].server;
+				if (server->probe.responded)
+					goto stop;
+				if (test_bit(AFS_VLSERVER_FL_PROBING, &server->flags))
+					still_probing = true;
+			}
+		}
+
+		if (!still_probing || unlikely(signal_pending(current)))
+			goto stop;
+		schedule();
+	}
+
+stop:
+	set_current_state(TASK_RUNNING);
+
+	for (i = 0; i < vllist->nr_servers; i++) {
+		if (test_bit(i, &untried)) {
+			server = vllist->servers[i].server;
+			if (server->probe.responded &&
+			    server->probe.rtt < rtt) {
+				pref = i;
+				rtt = server->probe.rtt;
+			}
+
+			remove_wait_queue(&server->probe_wq, &waits[i]);
+		}
+	}
+
+	kfree(waits);
+
+	if (pref == -1 && signal_pending(current))
+		return -ERESTARTSYS;
+
+	if (pref >= 0)
+		vllist->preferred = pref;
+
+	_leave(" = 0 [%u]", pref);
+	return 0;
+}
diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c
index ead6dedbb5612..b64a284b99d27 100644
--- a/fs/afs/vl_rotate.c
+++ b/fs/afs/vl_rotate.c
@@ -58,8 +58,8 @@ static bool afs_start_vl_iteration(struct afs_vl_cursor *vc)
 	if (!vc->server_list || !vc->server_list->nr_servers)
 		return false;
 
-	vc->start = READ_ONCE(vc->server_list->index);
-	vc->index = vc->start;
+	vc->untried = (1UL << vc->server_list->nr_servers) - 1;
+	vc->index = -1;
 	return true;
 }
 
@@ -71,11 +71,12 @@ bool afs_select_vlserver(struct afs_vl_cursor *vc)
 {
 	struct afs_addr_list *alist;
 	struct afs_vlserver *vlserver;
-	int error = vc->ac.error;
+	u32 rtt;
+	int error = vc->ac.error, abort_code, i;
 
-	_enter("%u/%u,%u/%u,%d,%d",
-	       vc->index, vc->start,
-	       vc->ac.index, vc->ac.start,
+	_enter("%lx[%d],%lx[%d],%d,%d",
+	       vc->untried, vc->index,
+	       vc->ac.tried, vc->ac.index,
 	       error, vc->ac.abort_code);
 
 	if (vc->flags & AFS_VL_CURSOR_STOP) {
@@ -145,23 +146,52 @@ bool afs_select_vlserver(struct afs_vl_cursor *vc)
 start:
 	_debug("start");
 
-	/* TODO: Consider checking the VL server list */
-
 	if (!afs_start_vl_iteration(vc))
 		goto failed;
 
-use_server:
-	_debug("use");
+	error = afs_send_vl_probes(vc->cell->net, vc->key, vc->server_list);
+	if (error < 0)
+		goto failed_set_error;
+
+pick_server:
+	_debug("pick [%lx]", vc->untried);
+
+	error = afs_wait_for_vl_probes(vc->server_list, vc->untried);
+	if (error < 0)
+		goto failed_set_error;
+
+	/* Pick the untried server with the lowest RTT. */
+	vc->index = vc->server_list->preferred;
+	if (test_bit(vc->index, &vc->untried))
+		goto selected_server;
+
+	vc->index = -1;
+	rtt = U32_MAX;
+	for (i = 0; i < vc->server_list->nr_servers; i++) {
+		struct afs_vlserver *s = vc->server_list->servers[i].server;
+
+		if (!test_bit(i, &vc->untried) || !s->probe.responded)
+			continue;
+		if (s->probe.rtt < rtt) {
+			vc->index = i;
+			rtt = s->probe.rtt;
+		}
+	}
+
+	if (vc->index == -1)
+		goto no_more_servers;
+
+selected_server:
+	_debug("use %d", vc->index);
+	__clear_bit(vc->index, &vc->untried);
+
 	/* We're starting on a different vlserver from the list.  We need to
 	 * check it, find its address list and probe its capabilities before we
 	 * use it.
 	 */
 	ASSERTCMP(vc->ac.alist, ==, NULL);
 	vlserver = vc->server_list->servers[vc->index].server;
-
-	// TODO: Check the vlserver occasionally
-	//if (!afs_check_vlserver_record(vc, vlserver))
-	//	goto failed;
+	vc->server = vlserver;
 
 	_debug("USING VLSERVER: %s", vlserver->name);
 
@@ -173,62 +203,84 @@ bool afs_select_vlserver(struct afs_vl_cursor *vc)
 
 	memset(&vc->ac, 0, sizeof(vc->ac));
 
-	/* Probe the current vlserver if we haven't done so yet. */
-#if 0 // TODO
-	if (!test_bit(AFS_VLSERVER_FL_PROBED, &vlserver->flags)) {
-		vc->ac.alist = afs_get_addrlist(alist);
-
-		if (!afs_probe_vlserver(vc)) {
-			error = vc->ac.error;
-			switch (error) {
-			case -ENOMEM:
-			case -ERESTARTSYS:
-			case -EINTR:
-				goto failed_set_error;
-			default:
-				goto next_server;
-			}
-		}
-	}
-#endif
-
 	if (!vc->ac.alist)
 		vc->ac.alist = alist;
 	else
 		afs_put_addrlist(alist);
 
-	vc->ac.start = READ_ONCE(alist->index);
-	vc->ac.index = vc->ac.start;
+	vc->ac.index = -1;
 
 iterate_address:
 	ASSERT(vc->ac.alist);
-	_debug("iterate %d/%d", vc->ac.index, vc->ac.alist->nr_addrs);
 	/* Iterate over the current server's address list to try and find an
 	 * address on which it will respond to us.
 	 */
 	if (!afs_iterate_addresses(&vc->ac))
 		goto next_server;
 
+	_debug("VL address %d/%d", vc->ac.index, vc->ac.alist->nr_addrs);
+
 	_leave(" = t %pISpc", &vc->ac.alist->addrs[vc->ac.index].transport);
 	return true;
 
 next_server:
 	_debug("next");
 	afs_end_cursor(&vc->ac);
-	vc->index++;
-	if (vc->index >= vc->server_list->nr_servers)
-		vc->index = 0;
-	if (vc->index != vc->start)
-		goto use_server;
+	goto pick_server;
 
+no_more_servers:
 	/* That's all the servers poked to no good effect.  Try again if some
 	 * of them were busy.
 	 */
 	if (vc->flags & AFS_VL_CURSOR_RETRY)
 		goto restart_from_beginning;
 
-	goto failed;
+	abort_code = 0;
+	error = -EDESTADDRREQ;
+	for (i = 0; i < vc->server_list->nr_servers; i++) {
+		struct afs_vlserver *s = vc->server_list->servers[i].server;
+		int probe_error = READ_ONCE(s->probe.error);
+
+		switch (probe_error) {
+		case 0:
+			continue;
+		default:
+			if (error == -ETIMEDOUT ||
+			    error == -ETIME)
+				continue;
+		case -ETIMEDOUT:
+		case -ETIME:
+			if (error == -ENOMEM ||
+			    error == -ENONET)
+				continue;
+		case -ENOMEM:
+		case -ENONET:
+			if (error == -ENETUNREACH)
+				continue;
+		case -ENETUNREACH:
+			if (error == -EHOSTUNREACH)
+				continue;
+		case -EHOSTUNREACH:
+			if (error == -ECONNREFUSED)
+				continue;
+		case -ECONNREFUSED:
+			if (error == -ECONNRESET)
+				continue;
+		case -ECONNRESET: /* Responded, but call expired. */
+			if (error == -ECONNABORTED)
+				continue;
+		case -ECONNABORTED:
+			abort_code = s->probe.abort_code;
+			error = probe_error;
+			continue;
+		}
+	}
+
+	if (error == -ECONNABORTED)
+		error = afs_abort_to_error(abort_code);
 
+failed_set_error:
+	vc->error = error;
 failed:
 	vc->flags |= AFS_VL_CURSOR_STOP;
 	afs_end_cursor(&vc->ac);
@@ -250,8 +302,8 @@ static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc)
 
 	rcu_read_lock();
 	pr_notice("EDESTADDR occurred\n");
-	pr_notice("VC: st=%u ix=%u ni=%hu fl=%hx err=%hd\n",
-		  vc->start, vc->index, vc->nr_iterations, vc->flags, vc->error);
+	pr_notice("VC: ut=%lx ix=%u ni=%hu fl=%hx err=%hd\n",
+		  vc->untried, vc->index, vc->nr_iterations, vc->flags, vc->error);
 
 	if (vc->server_list) {
 		const struct afs_vlserver_list *sl = vc->server_list;
@@ -259,26 +311,25 @@ static void afs_vl_dump_edestaddrreq(const struct afs_vl_cursor *vc)
 			  sl->nr_servers, sl->index);
 		for (i = 0; i < sl->nr_servers; i++) {
 			const struct afs_vlserver *s = sl->servers[i].server;
-			pr_notice("VC: server fl=%lx %s+%hu\n",
-				  s->flags, s->name, s->port);
+			pr_notice("VC: server %s+%hu fl=%lx E=%hd\n",
+				  s->name, s->port, s->flags, s->probe.error);
 			if (s->addresses) {
 				const struct afs_addr_list *a =
 					rcu_dereference(s->addresses);
-				pr_notice("VC:  - av=%u nr=%u/%u/%u ax=%u\n",
-					  a->version,
+				pr_notice("VC:  - nr=%u/%u/%u pf=%u\n",
 					  a->nr_ipv4, a->nr_addrs, a->max_addrs,
-					  a->index);
-				pr_notice("VC:  - pr=%lx yf=%lx\n",
-					  a->probed, a->yfs);
+					  a->preferred);
+				pr_notice("VC:  - pr=%lx R=%lx F=%lx\n",
+					  a->probed, a->responded, a->failed);
 				if (a == vc->ac.alist)
 					pr_notice("VC:  - current\n");
 			}
 		}
 	}
 
-	pr_notice("AC: as=%u ax=%u ac=%d er=%d b=%u r=%u ni=%hu\n",
-		  vc->ac.start, vc->ac.index, vc->ac.abort_code, vc->ac.error,
-		  vc->ac.begun, vc->ac.responded, vc->ac.nr_iterations);
+	pr_notice("AC: t=%lx ax=%u ac=%d er=%d r=%u ni=%u\n",
+		  vc->ac.tried, vc->ac.index, vc->ac.abort_code, vc->ac.error,
+		  vc->ac.responded, vc->ac.nr_iterations);
 	rcu_read_unlock();
 }
 
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index 3127ab9b55210..c3d9e5a5f67ee 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -348,12 +348,18 @@ static int afs_deliver_vl_get_capabilities(struct afs_call *call)
 		break;
 	}
 
-	call->reply[0] = (void *)(unsigned long)call->service_id;
-
 	_leave(" = 0 [done]");
 	return 0;
 }
 
+static void afs_destroy_vl_get_capabilities(struct afs_call *call)
+{
+	struct afs_vlserver *server = call->reply[0];
+
+	afs_put_vlserver(call->net, server);
+	afs_flat_call_destructor(call);
+}
+
 /*
  * VL.GetCapabilities operation type
  */
@@ -361,7 +367,8 @@ static const struct afs_call_type afs_RXVLGetCapabilities = {
 	.name		= "VL.GetCapabilities",
 	.op		= afs_VL_GetCapabilities,
 	.deliver	= afs_deliver_vl_get_capabilities,
-	.destructor	= afs_flat_call_destructor,
+	.done		= afs_vlserver_probe_result,
+	.destructor	= afs_destroy_vl_get_capabilities,
 };
 
 /*
@@ -371,8 +378,12 @@ static const struct afs_call_type afs_RXVLGetCapabilities = {
  * We use this to probe for service upgrade to determine what the server at the
  * other end supports.
  */
-int afs_vl_get_capabilities(struct afs_net *net, struct afs_addr_cursor *ac,
-			    struct key *key)
+int afs_vl_get_capabilities(struct afs_net *net,
+			    struct afs_addr_cursor *ac,
+			    struct key *key,
+			    struct afs_vlserver *server,
+			    unsigned int server_index,
+			    bool async)
 {
 	struct afs_call *call;
 	__be32 *bp;
@@ -384,9 +395,10 @@ int afs_vl_get_capabilities(struct afs_net *net, struct afs_addr_cursor *ac,
 		return -ENOMEM;
 
 	call->key = key;
-	call->upgrade = true; /* Let's see if this is a YFS server */
-	call->reply[0] = (void *)VLGETCAPABILITIES;
-	call->ret_reply0 = true;
+	call->reply[0] = afs_get_vlserver(server);
+	call->reply[1] = (void *)(long)server_index;
+	call->upgrade = true;
+	call->want_reply_time = true;
 
 	/* marshall the parameters */
 	bp = call->request;
@@ -394,7 +406,7 @@ int afs_vl_get_capabilities(struct afs_net *net, struct afs_addr_cursor *ac,
 
 	/* Can't take a ref on server */
 	trace_afs_make_vl_call(call);
-	return afs_make_call(ac, call, GFP_KERNEL, false);
+	return afs_make_call(ac, call, GFP_KERNEL, async);
 }
 
 /*
@@ -591,11 +603,6 @@ static int afs_deliver_yfsvl_get_endpoints(struct afs_call *call)
 	}
 
 	alist = call->reply[0];
-
-	/* Start with IPv6 if available. */
-	if (alist->nr_ipv4 < alist->nr_addrs)
-		alist->index = alist->nr_ipv4;
-
 	_leave(" = 0 [done]");
 	return 0;
 }
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index 7527c081726e0..00975ed3640f8 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -82,22 +82,6 @@ static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell,
 		return ERR_PTR(-ERESTARTSYS);
 
 	while (afs_select_vlserver(&vc)) {
-		if (!test_bit(vc.ac.index, &vc.ac.alist->probed)) {
-			ret = afs_vl_get_capabilities(cell->net, &vc.ac, key);
-			switch (ret) {
-			case VL_SERVICE:
-				clear_bit(vc.ac.index, &vc.ac.alist->yfs);
-				set_bit(vc.ac.index, &vc.ac.alist->probed);
-				vc.ac.alist->addrs[vc.ac.index].srx_service = ret;
-				break;
-			case YFS_VL_SERVICE:
-				set_bit(vc.ac.index, &vc.ac.alist->yfs);
-				set_bit(vc.ac.index, &vc.ac.alist->probed);
-				vc.ac.alist->addrs[vc.ac.index].srx_service = ret;
-				break;
-			}
-		}
-
 		vldb = afs_vl_get_entry_by_name_u(&vc, volname, volnamesz);
 	}
 
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index ed155042236bb..33d291888ba9c 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -137,6 +137,7 @@ enum afs_io_error {
 	afs_io_error_extract,
 	afs_io_error_fs_probe_fail,
 	afs_io_error_vl_lookup_fail,
+	afs_io_error_vl_probe_fail,
 };
 
 enum afs_file_error {
@@ -261,7 +262,8 @@ enum afs_file_error {
 	EM(afs_io_error_cm_reply,		"CM_REPLY")		\
 	EM(afs_io_error_extract,		"EXTRACT")		\
 	EM(afs_io_error_fs_probe_fail,		"FS_PROBE_FAIL")	\
-	E_(afs_io_error_vl_lookup_fail,		"VL_LOOKUP_FAIL")
+	EM(afs_io_error_vl_lookup_fail,		"VL_LOOKUP_FAIL")	\
+	E_(afs_io_error_vl_probe_fail,		"VL_PROBE_FAIL")
 
 #define afs_file_errors							\
 	EM(afs_file_error_dir_bad_magic,	"DIR_BAD_MAGIC")	\
-- 
GitLab


From 7081fb629e434c13f126f191c67ff0c9939d9ef5 Mon Sep 17 00:00:00 2001
From: Roland Dreier <roland@purestorage.com>
Date: Sat, 20 Oct 2018 09:45:04 -0700
Subject: [PATCH 0168/1890] scsi: target: Set response length for REPORT TARGET
 PORT GROUPS

One more place where we can return the length we actually fill in.

Signed-off-by: Roland Dreier <roland@purestorage.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/target/target_core_alua.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c
index e46ca968009c0..4f134b0c3e29e 100644
--- a/drivers/target/target_core_alua.c
+++ b/drivers/target/target_core_alua.c
@@ -268,7 +268,7 @@ target_emulate_report_target_port_groups(struct se_cmd *cmd)
 	}
 	transport_kunmap_data_sg(cmd);
 
-	target_complete_cmd(cmd, GOOD);
+	target_complete_cmd_with_length(cmd, GOOD, rd_len + 4);
 	return 0;
 }
 
-- 
GitLab


From 6110f37fb33d54781e1fa841922c466c8b9005cd Mon Sep 17 00:00:00 2001
From: Roland Dreier <roland@purestorage.com>
Date: Sat, 20 Oct 2018 09:45:16 -0700
Subject: [PATCH 0169/1890] scsi: target: Don't request modules that aren't
 even built

If, for example, I don't enable CONFIG_TCM_PSCSI, then every time I load
the target subsystem, I get an annoying

    Unable to load target_core_pscsi

kernel log message.  Instead let's only request_module() on things if
that code is enabled.

Signed-off-by: Roland Dreier <roland@purestorage.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/target/target_core_transport.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 4cf33e2cc7058..e31e4fc31aa15 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -205,19 +205,19 @@ void transport_subsystem_check_init(void)
 	if (sub_api_initialized)
 		return;
 
-	ret = request_module("target_core_iblock");
+	ret = IS_ENABLED(CONFIG_TCM_IBLOCK) && request_module("target_core_iblock");
 	if (ret != 0)
 		pr_err("Unable to load target_core_iblock\n");
 
-	ret = request_module("target_core_file");
+	ret = IS_ENABLED(CONFIG_TCM_FILEIO) && request_module("target_core_file");
 	if (ret != 0)
 		pr_err("Unable to load target_core_file\n");
 
-	ret = request_module("target_core_pscsi");
+	ret = IS_ENABLED(CONFIG_TCM_PSCSI) && request_module("target_core_pscsi");
 	if (ret != 0)
 		pr_err("Unable to load target_core_pscsi\n");
 
-	ret = request_module("target_core_user");
+	ret = IS_ENABLED(CONFIG_TCM_USER2) && request_module("target_core_user");
 	if (ret != 0)
 		pr_err("Unable to load target_core_user\n");
 
-- 
GitLab


From 1b171b1a29d68bcad1e44b0135a5155570efa3ea Mon Sep 17 00:00:00 2001
From: Sabyasachi Gupta <sabyasachi.linux@gmail.com>
Date: Thu, 18 Oct 2018 21:36:26 +0530
Subject: [PATCH 0170/1890] scsi: mvsas: Use dma_pool_zalloc

Replace dma_pool_alloc + memset with dma_pool_zalloc.

Signed-off-by: Sabyasachi Gupta <sabyasachi.linux@gmail.com>
Reviewed-by: Jack Wang <jinpu.wang@profitbricks.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/mvsas/mv_sas.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/scsi/mvsas/mv_sas.c b/drivers/scsi/mvsas/mv_sas.c
index 3df1428df3172..4753b3c6a2c2a 100644
--- a/drivers/scsi/mvsas/mv_sas.c
+++ b/drivers/scsi/mvsas/mv_sas.c
@@ -790,12 +790,11 @@ static int mvs_task_prep(struct sas_task *task, struct mvs_info *mvi, int is_tmf
 	slot->n_elem = n_elem;
 	slot->slot_tag = tag;
 
-	slot->buf = dma_pool_alloc(mvi->dma_pool, GFP_ATOMIC, &slot->buf_dma);
+	slot->buf = dma_pool_zalloc(mvi->dma_pool, GFP_ATOMIC, &slot->buf_dma);
 	if (!slot->buf) {
 		rc = -ENOMEM;
 		goto err_out_tag;
 	}
-	memset(slot->buf, 0, MVS_SLOT_BUF_SZ);
 
 	tei.task = task;
 	tei.hdr = &mvi->slot[tag];
-- 
GitLab


From 09968e5049b4b9a47413327c56f254aa2812bbc2 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Thu, 18 Oct 2018 14:55:41 -0700
Subject: [PATCH 0171/1890] scsi: 3w-sas: 3w-9xxx: Use unsigned char for cdb

Clang warns a few times:

drivers/scsi/3w-sas.c:386:11: warning: implicit conversion from 'int' to
'char' changes value from 128 to -128 [-Wconstant-conversion]
        cdb[4] = TW_ALLOCATION_LENGTH; /* allocation length */
               ~ ^~~~~~~~~~~~~~~~~~~~

Update cdb's type to unsigned char, which matches the type of the cdb
member in struct TW_Command_Apache.

Link: https://github.com/ClangBuiltLinux/linux/issues/158
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Acked-by: Adam Radford <aradford@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/3w-9xxx.c | 12 ++++++++----
 drivers/scsi/3w-sas.c  |  8 +++++---
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c
index 05293babb0310..2d655a97b959e 100644
--- a/drivers/scsi/3w-9xxx.c
+++ b/drivers/scsi/3w-9xxx.c
@@ -143,7 +143,9 @@ static int twa_poll_status_gone(TW_Device_Extension *tw_dev, u32 flag, int secon
 static int twa_post_command_packet(TW_Device_Extension *tw_dev, int request_id, char internal);
 static int twa_reset_device_extension(TW_Device_Extension *tw_dev);
 static int twa_reset_sequence(TW_Device_Extension *tw_dev, int soft_reset);
-static int twa_scsiop_execute_scsi(TW_Device_Extension *tw_dev, int request_id, char *cdb, int use_sg, TW_SG_Entry *sglistarg);
+static int twa_scsiop_execute_scsi(TW_Device_Extension *tw_dev, int request_id,
+				   unsigned char *cdb, int use_sg,
+				   TW_SG_Entry *sglistarg);
 static void twa_scsiop_execute_scsi_complete(TW_Device_Extension *tw_dev, int request_id);
 static char *twa_string_lookup(twa_message_type *table, unsigned int aen_code);
 
@@ -278,7 +280,7 @@ static int twa_aen_complete(TW_Device_Extension *tw_dev, int request_id)
 static int twa_aen_drain_queue(TW_Device_Extension *tw_dev, int no_check_reset)
 {
 	int request_id = 0;
-	char cdb[TW_MAX_CDB_LEN];
+	unsigned char cdb[TW_MAX_CDB_LEN];
 	TW_SG_Entry sglist[1];
 	int finished = 0, count = 0;
 	TW_Command_Full *full_command_packet;
@@ -423,7 +425,7 @@ static void twa_aen_queue_event(TW_Device_Extension *tw_dev, TW_Command_Apache_H
 /* This function will read the aen queue from the isr */
 static int twa_aen_read_queue(TW_Device_Extension *tw_dev, int request_id)
 {
-	char cdb[TW_MAX_CDB_LEN];
+	unsigned char cdb[TW_MAX_CDB_LEN];
 	TW_SG_Entry sglist[1];
 	TW_Command_Full *full_command_packet;
 	int retval = 1;
@@ -1798,7 +1800,9 @@ static int twa_scsi_queue_lck(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_
 static DEF_SCSI_QCMD(twa_scsi_queue)
 
 /* This function hands scsi cdb's to the firmware */
-static int twa_scsiop_execute_scsi(TW_Device_Extension *tw_dev, int request_id, char *cdb, int use_sg, TW_SG_Entry *sglistarg)
+static int twa_scsiop_execute_scsi(TW_Device_Extension *tw_dev, int request_id,
+				   unsigned char *cdb, int use_sg,
+				   TW_SG_Entry *sglistarg)
 {
 	TW_Command_Full *full_command_packet;
 	TW_Command_Apache *command_packet;
diff --git a/drivers/scsi/3w-sas.c b/drivers/scsi/3w-sas.c
index 266bdac753042..480cf82700e9f 100644
--- a/drivers/scsi/3w-sas.c
+++ b/drivers/scsi/3w-sas.c
@@ -287,7 +287,9 @@ static int twl_post_command_packet(TW_Device_Extension *tw_dev, int request_id)
 } /* End twl_post_command_packet() */
 
 /* This function hands scsi cdb's to the firmware */
-static int twl_scsiop_execute_scsi(TW_Device_Extension *tw_dev, int request_id, char *cdb, int use_sg, TW_SG_Entry_ISO *sglistarg)
+static int twl_scsiop_execute_scsi(TW_Device_Extension *tw_dev, int request_id,
+				   unsigned char *cdb, int use_sg,
+				   TW_SG_Entry_ISO *sglistarg)
 {
 	TW_Command_Full *full_command_packet;
 	TW_Command_Apache *command_packet;
@@ -372,7 +374,7 @@ static int twl_scsiop_execute_scsi(TW_Device_Extension *tw_dev, int request_id,
 /* This function will read the aen queue from the isr */
 static int twl_aen_read_queue(TW_Device_Extension *tw_dev, int request_id)
 {
-	char cdb[TW_MAX_CDB_LEN];
+	unsigned char cdb[TW_MAX_CDB_LEN];
 	TW_SG_Entry_ISO sglist[1];
 	TW_Command_Full *full_command_packet;
 	int retval = 1;
@@ -554,7 +556,7 @@ static int twl_poll_response(TW_Device_Extension *tw_dev, int request_id, int se
 static int twl_aen_drain_queue(TW_Device_Extension *tw_dev, int no_check_reset)
 {
 	int request_id = 0;
-	char cdb[TW_MAX_CDB_LEN];
+	unsigned char cdb[TW_MAX_CDB_LEN];
 	TW_SG_Entry_ISO sglist[1];
 	int finished = 0, count = 0;
 	TW_Command_Full *full_command_packet;
-- 
GitLab


From 50435d4211a784e1577b4af72b07276c3851d324 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 18 Oct 2018 15:45:40 -0700
Subject: [PATCH 0172/1890] scsi: qla2xxx: Modify fall-through annotations

This patch avoids that the compiler complains about missing fall-through
annotations when building with W=1.

Cc: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Acked-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qla2xxx/qla_init.c   | 2 +-
 drivers/scsi/qla2xxx/qla_target.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index c72d8012fe2aa..2ccf9f190c68c 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -680,7 +680,7 @@ static void qla24xx_handle_gnl_done_event(scsi_qla_host_t *vha,
 					    fcport);
 					break;
 				}
-				/* drop through */
+				/* fall through */
 			default:
 				if (fcport_is_smaller(fcport)) {
 					/* local adapter is bigger */
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 39828207bc1d2..78dfece2e89d1 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -4540,7 +4540,7 @@ static int qlt_issue_task_mgmt(struct fc_port *sess, u64 lun,
 	case QLA_TGT_CLEAR_TS:
 	case QLA_TGT_ABORT_TS:
 		abort_cmds_for_lun(vha, lun, a->u.isp24.fcp_hdr.s_id);
-		/* drop through */
+		/* fall through */
 	case QLA_TGT_CLEAR_ACA:
 		h = qlt_find_qphint(vha, mcmd->unpacked_lun);
 		mcmd->qpair = h->qpair;
-- 
GitLab


From 807eb90703e78c0fb853d8e5b90c9947d7a95cba Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 18 Oct 2018 15:45:41 -0700
Subject: [PATCH 0173/1890] scsi: qla2xxx: Improve several kernel-doc headers

This patch avoids that complaints about kernel-doc headers are reported
when building with W=1.

Cc: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Acked-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qla2xxx/qla_iocb.c   | 4 ++--
 drivers/scsi/qla2xxx/qla_isr.c    | 6 +++---
 drivers/scsi/qla2xxx/qla_mbx.c    | 6 +++---
 drivers/scsi/qla2xxx/qla_mr.c     | 6 +++---
 drivers/scsi/qla2xxx/qla_nx.c     | 2 +-
 drivers/scsi/qla2xxx/qla_nx2.c    | 2 +-
 drivers/scsi/qla2xxx/qla_sup.c    | 2 +-
 drivers/scsi/qla2xxx/qla_target.c | 6 +++---
 8 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c
index 86fb8b21aa710..032635321ad6e 100644
--- a/drivers/scsi/qla2xxx/qla_iocb.c
+++ b/drivers/scsi/qla2xxx/qla_iocb.c
@@ -1195,8 +1195,8 @@ qla24xx_walk_and_build_prot_sglist(struct qla_hw_data *ha, srb_t *sp,
  * @sp: SRB command to process
  * @cmd_pkt: Command type 3 IOCB
  * @tot_dsds: Total number of segments to transfer
- * @tot_prot_dsds:
- * @fw_prot_opts:
+ * @tot_prot_dsds: Total number of segments with protection information
+ * @fw_prot_opts: Protection options to be passed to firmware
  */
 inline int
 qla24xx_build_scsi_crc_2_iocbs(srb_t *sp, struct cmd_type_crc_2 *cmd_pkt,
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index d73b04e405902..30d3090842f85 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -25,7 +25,7 @@ static int qla2x00_error_entry(scsi_qla_host_t *, struct rsp_que *,
 
 /**
  * qla2100_intr_handler() - Process interrupts for the ISP2100 and ISP2200.
- * @irq:
+ * @irq: interrupt number
  * @dev_id: SCSI driver HA context
  *
  * Called by system whenever the host adapter generates an interrupt.
@@ -144,7 +144,7 @@ qla2x00_check_reg16_for_disconnect(scsi_qla_host_t *vha, uint16_t reg)
 
 /**
  * qla2300_intr_handler() - Process interrupts for the ISP23xx and ISP63xx.
- * @irq:
+ * @irq: interrupt number
  * @dev_id: SCSI driver HA context
  *
  * Called by system whenever the host adapter generates an interrupt.
@@ -3109,7 +3109,7 @@ qla2xxx_check_risc_status(scsi_qla_host_t *vha)
 
 /**
  * qla24xx_intr_handler() - Process interrupts for the ISP23xx and ISP24xx.
- * @irq:
+ * @irq: interrupt number
  * @dev_id: SCSI driver HA context
  *
  * Called by system whenever the host adapter generates an interrupt.
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index 2f3e5075ae76e..191b6b7c8747d 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -3478,9 +3478,9 @@ qla8044_read_serdes_word(scsi_qla_host_t *vha, uint32_t addr, uint32_t *data)
 /**
  * qla2x00_set_serdes_params() -
  * @vha: HA context
- * @sw_em_1g:
- * @sw_em_2g:
- * @sw_em_4g:
+ * @sw_em_1g: serial link options
+ * @sw_em_2g: serial link options
+ * @sw_em_4g: serial link options
  *
  * Returns
  */
diff --git a/drivers/scsi/qla2xxx/qla_mr.c b/drivers/scsi/qla2xxx/qla_mr.c
index 521a513705549..2d96f3b7e3e36 100644
--- a/drivers/scsi/qla2xxx/qla_mr.c
+++ b/drivers/scsi/qla2xxx/qla_mr.c
@@ -2624,7 +2624,7 @@ qlafx00_status_cont_entry(struct rsp_que *rsp, sts_cont_entry_t *pkt)
  * qlafx00_multistatus_entry() - Process Multi response queue entries.
  * @vha: SCSI driver HA context
  * @rsp: response queue
- * @pkt:
+ * @pkt: received packet
  */
 static void
 qlafx00_multistatus_entry(struct scsi_qla_host *vha,
@@ -2867,7 +2867,7 @@ qlafx00_async_event(scsi_qla_host_t *vha)
 /**
  * qlafx00x_mbx_completion() - Process mailbox command completions.
  * @vha: SCSI driver HA context
- * @mb0:
+ * @mb0: value to be written into mailbox register 0
  */
 static void
 qlafx00_mbx_completion(scsi_qla_host_t *vha, uint32_t mb0)
@@ -2893,7 +2893,7 @@ qlafx00_mbx_completion(scsi_qla_host_t *vha, uint32_t mb0)
 
 /**
  * qlafx00_intr_handler() - Process interrupts for the ISPFX00.
- * @irq:
+ * @irq: interrupt number
  * @dev_id: SCSI driver HA context
  *
  * Called by system whenever the host adapter generates an interrupt.
diff --git a/drivers/scsi/qla2xxx/qla_nx.c b/drivers/scsi/qla2xxx/qla_nx.c
index 121e18b3b9f83..f2f54806f4da9 100644
--- a/drivers/scsi/qla2xxx/qla_nx.c
+++ b/drivers/scsi/qla2xxx/qla_nx.c
@@ -2010,7 +2010,7 @@ qla82xx_mbx_completion(scsi_qla_host_t *vha, uint16_t mb0)
 
 /**
  * qla82xx_intr_handler() - Process interrupts for the ISP23xx and ISP63xx.
- * @irq:
+ * @irq: interrupt number
  * @dev_id: SCSI driver HA context
  *
  * Called by system whenever the host adapter generates an interrupt.
diff --git a/drivers/scsi/qla2xxx/qla_nx2.c b/drivers/scsi/qla2xxx/qla_nx2.c
index 3a2b0282df149..fe856b602e031 100644
--- a/drivers/scsi/qla2xxx/qla_nx2.c
+++ b/drivers/scsi/qla2xxx/qla_nx2.c
@@ -3878,7 +3878,7 @@ qla8044_write_optrom_data(struct scsi_qla_host *vha, uint8_t *buf,
 #define PF_BITS_MASK		(0xF << 16)
 /**
  * qla8044_intr_handler() - Process interrupts for the ISP8044
- * @irq:
+ * @irq: interrupt number
  * @dev_id: SCSI driver HA context
  *
  * Called by system whenever the host adapter generates an interrupt.
diff --git a/drivers/scsi/qla2xxx/qla_sup.c b/drivers/scsi/qla2xxx/qla_sup.c
index 4499c787165f1..2a3055c799fb6 100644
--- a/drivers/scsi/qla2xxx/qla_sup.c
+++ b/drivers/scsi/qla2xxx/qla_sup.c
@@ -2229,7 +2229,7 @@ qla2x00_erase_flash_sector(struct qla_hw_data *ha, uint32_t addr,
 
 /**
  * qla2x00_get_flash_manufacturer() - Read manufacturer ID from flash chip.
- * @ha:
+ * @ha: host adapter
  * @man_id: Flash manufacturer ID
  * @flash_id: Flash ID
  */
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 78dfece2e89d1..c4504740f0e21 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -6598,9 +6598,9 @@ static void qlt_lport_dump(struct scsi_qla_host *vha, u64 wwpn,
  * qla_tgt_lport_register - register lport with external module
  *
  * @target_lport_ptr: pointer for tcm_qla2xxx specific lport data
- * @phys_wwpn:
- * @npiv_wwpn:
- * @npiv_wwnn:
+ * @phys_wwpn: physical port WWPN
+ * @npiv_wwpn: NPIV WWPN
+ * @npiv_wwnn: NPIV WWNN
  * @callback:  lport initialization callback for tcm_qla2xxx code
  */
 int qlt_lport_register(void *target_lport_ptr, u64 phys_wwpn,
-- 
GitLab


From 8f9a214823c9806386760b9f8624a376bbcd5232 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 18 Oct 2018 15:45:42 -0700
Subject: [PATCH 0174/1890] scsi: qla2xxx: Declare local functions 'static'

This patch avoids that the compiler complains about missing declarations
when building with W=1.

Cc: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Acked-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qla2xxx/qla_init.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 2ccf9f190c68c..6fe20c27acc16 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -425,7 +425,7 @@ void qla24xx_handle_adisc_event(scsi_qla_host_t *vha, struct event_arg *ea)
 	__qla24xx_handle_gpdb_event(vha, ea);
 }
 
-int qla_post_els_plogi_work(struct scsi_qla_host *vha, fc_port_t *fcport)
+static int qla_post_els_plogi_work(struct scsi_qla_host *vha, fc_port_t *fcport)
 {
 	struct qla_work_evt *e;
 
@@ -1551,7 +1551,8 @@ void qla24xx_handle_relogin_event(scsi_qla_host_t *vha,
 }
 
 
-void qla_handle_els_plogi_done(scsi_qla_host_t *vha, struct event_arg *ea)
+static void qla_handle_els_plogi_done(scsi_qla_host_t *vha,
+				      struct event_arg *ea)
 {
 	ql_dbg(ql_dbg_disc, vha, 0x2118,
 	    "%s %d %8phC post PRLI\n",
-- 
GitLab


From 109a5987d9ead316523647d6310d609dc95bdaa2 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 18 Oct 2018 15:45:43 -0700
Subject: [PATCH 0175/1890] scsi: qla2xxx: Make qla2x00_sysfs_write_nvram()
 easier to analyze

Modify the unlock statement such that it becomes easier for static
analyzers to analyze it. This patch does not change any functionality.

Cc: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Acked-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qla2xxx/qla_attr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index b28f159fdaee7..0bb9ac6ece920 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -218,7 +218,7 @@ qla2x00_sysfs_write_nvram(struct file *filp, struct kobject *kobj,
 
 	mutex_lock(&ha->optrom_mutex);
 	if (qla2x00_chip_is_down(vha)) {
-		mutex_unlock(&vha->hw->optrom_mutex);
+		mutex_unlock(&ha->optrom_mutex);
 		return -EAGAIN;
 	}
 
-- 
GitLab


From eb023220f4eac1703e22e48ed62310a6565b3a1f Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 18 Oct 2018 15:45:44 -0700
Subject: [PATCH 0176/1890] scsi: qla2xxx: Remove a set-but-not-used variable

This patch does not change any functionality.

Cc: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Acked-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qla2xxx/qla_os.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index dba672f87cb2d..01607d2f2c347 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -1749,7 +1749,7 @@ qla2x00_loop_reset(scsi_qla_host_t *vha)
 static void
 __qla2x00_abort_all_cmds(struct qla_qpair *qp, int res)
 {
-	int cnt, status;
+	int cnt;
 	unsigned long flags;
 	srb_t *sp;
 	scsi_qla_host_t *vha = qp->vha;
@@ -1799,8 +1799,8 @@ __qla2x00_abort_all_cmds(struct qla_qpair *qp, int res)
 					if (!sp_get(sp)) {
 						spin_unlock_irqrestore
 							(qp->qp_lock_ptr, flags);
-						status = qla2xxx_eh_abort(
-						    GET_CMD_SP(sp));
+						qla2xxx_eh_abort(
+							GET_CMD_SP(sp));
 						spin_lock_irqsave
 							(qp->qp_lock_ptr, flags);
 					}
-- 
GitLab


From 5b0af4777b1bf397787f03336f0db34f185ca565 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 18 Oct 2018 15:45:45 -0700
Subject: [PATCH 0177/1890] scsi: qla2xxx: Make sure that
 qlafx00_ioctl_iosb_entry() initializes 'res'

Only one of the two code paths in qlafx00_ioctl_iosb_entry() initializes
the variable 'res'. Make sure that 'res' is initialized before
sp->done(sp, res) is called.

Cc: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Acked-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qla2xxx/qla_mr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/qla2xxx/qla_mr.c b/drivers/scsi/qla2xxx/qla_mr.c
index 2d96f3b7e3e36..b8f967e618915 100644
--- a/drivers/scsi/qla2xxx/qla_mr.c
+++ b/drivers/scsi/qla2xxx/qla_mr.c
@@ -2212,7 +2212,7 @@ qlafx00_ioctl_iosb_entry(scsi_qla_host_t *vha, struct req_que *req,
 	struct bsg_job *bsg_job;
 	struct fc_bsg_reply *bsg_reply;
 	struct srb_iocb *iocb_job;
-	int res;
+	int res = 0;
 	struct qla_mt_iocb_rsp_fx00 fstatus;
 	uint8_t	*fw_sts_ptr;
 
-- 
GitLab


From 2c309aeed62c25661eb2c7d4e4510613a1c7ffc2 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 18 Oct 2018 15:45:46 -0700
Subject: [PATCH 0178/1890] scsi: qla2xxx: Remove two arguments from
 qlafx00_error_entry()

Move a debug statement from qlafx00_error_entry() into its caller. Remove
one unused argument from that function. This patch does not change the
behavior of the qla2xxx driver.

Cc: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Acked-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qla2xxx/qla_mr.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_mr.c b/drivers/scsi/qla2xxx/qla_mr.c
index b8f967e618915..60f964c53c01a 100644
--- a/drivers/scsi/qla2xxx/qla_mr.c
+++ b/drivers/scsi/qla2xxx/qla_mr.c
@@ -2681,12 +2681,10 @@ qlafx00_multistatus_entry(struct scsi_qla_host *vha,
  * @vha: SCSI driver HA context
  * @rsp: response queue
  * @pkt: Entry pointer
- * @estatus:
- * @etype:
  */
 static void
 qlafx00_error_entry(scsi_qla_host_t *vha, struct rsp_que *rsp,
-		    struct sts_entry_fx00 *pkt, uint8_t estatus, uint8_t etype)
+		    struct sts_entry_fx00 *pkt)
 {
 	srb_t *sp;
 	struct qla_hw_data *ha = vha->hw;
@@ -2695,9 +2693,6 @@ qlafx00_error_entry(scsi_qla_host_t *vha, struct rsp_que *rsp,
 	struct req_que *req = NULL;
 	int res = DID_ERROR << 16;
 
-	ql_dbg(ql_dbg_async, vha, 0x507f,
-	    "type of error status in response: 0x%x\n", estatus);
-
 	req = ha->req_q_map[que];
 
 	sp = qla2x00_get_sp_from_handle(vha, func, req, pkt);
@@ -2745,9 +2740,11 @@ qlafx00_process_response_queue(struct scsi_qla_host *vha,
 
 		if (pkt->entry_status != 0 &&
 		    pkt->entry_type != IOCTL_IOSB_TYPE_FX00) {
+			ql_dbg(ql_dbg_async, vha, 0x507f,
+			       "type of error status in response: 0x%x\n",
+			       pkt->entry_status);
 			qlafx00_error_entry(vha, rsp,
-			    (struct sts_entry_fx00 *)pkt, pkt->entry_status,
-			    pkt->entry_type);
+					    (struct sts_entry_fx00 *)pkt);
 			continue;
 		}
 
-- 
GitLab


From 33279c305d071090e529febf043a8a02d2ab451a Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Sun, 21 Oct 2018 09:50:54 +0000
Subject: [PATCH 0179/1890] scsi: mvsas: Remove set but not used variable 'id'

Fixes gcc '-Wunused-but-set-variable' warning:

drivers/scsi/mvsas/mv_sas.c: In function 'mvs_work_queue':
drivers/scsi/mvsas/mv_sas.c:1909:31: warning:
 variable 'id' set but not used [-Wunused-but-set-variable]

It never used since introduction in commit
20b09c2992fe ("[SCSI] mvsas: add support for 94xx; layout change; bug fixes")

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Reviewed-by: John Garry <john.garry@huawei.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/mvsas/mv_sas.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/scsi/mvsas/mv_sas.c b/drivers/scsi/mvsas/mv_sas.c
index 4753b3c6a2c2a..311d23c727cef 100644
--- a/drivers/scsi/mvsas/mv_sas.c
+++ b/drivers/scsi/mvsas/mv_sas.c
@@ -1905,8 +1905,7 @@ static void mvs_work_queue(struct work_struct *work)
 
 		if (phy->phy_event & PHY_PLUG_OUT) {
 			u32 tmp;
-			struct sas_identify_frame *id;
-			id = (struct sas_identify_frame *)phy->frame_rcvd;
+
 			tmp = MVS_CHIP_DISP->read_phy_ctl(mvi, phy_no);
 			phy->phy_event &= ~PHY_PLUG_OUT;
 			if (!(tmp & PHY_READY_MASK)) {
-- 
GitLab


From 409fa67a7f4a68f6990955849c22eaca589f18c0 Mon Sep 17 00:00:00 2001
From: kbuild test robot <fengguang.wu@intel.com>
Date: Tue, 23 Oct 2018 20:20:22 +0800
Subject: [PATCH 0180/1890] ASoC: stm32: sai: fix noderef.cocci warnings

sound/soc/stm/stm32_sai_sub.c:393:26-32: ERROR: application of sizeof to pointer

 sizeof when applied to a pointer typed expression gives the size of
 the pointer

Generated by: scripts/coccinelle/misc/noderef.cocci

Fixes: 8307b2afd386 ("ASoC: stm32: sai: set sai as mclk clock provider")
CC: Olivier Moysan <olivier.moysan@st.com>
Signed-off-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/stm/stm32_sai_sub.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/stm/stm32_sai_sub.c b/sound/soc/stm/stm32_sai_sub.c
index ea05cc91aa05d..211589b0b2ef5 100644
--- a/sound/soc/stm/stm32_sai_sub.c
+++ b/sound/soc/stm/stm32_sai_sub.c
@@ -390,7 +390,7 @@ static int stm32_sai_add_mclk_provider(struct stm32_sai_sub_data *sai)
 	char *mclk_name, *p, *s = (char *)pname;
 	int ret, i = 0;
 
-	mclk = devm_kzalloc(dev, sizeof(mclk), GFP_KERNEL);
+	mclk = devm_kzalloc(dev, sizeof(*mclk), GFP_KERNEL);
 	if (!mclk)
 		return -ENOMEM;
 
-- 
GitLab


From 3511ba7d4ca6f39e2d060bb94e42a41ad1fee7bf Mon Sep 17 00:00:00 2001
From: Keiji Hayashibara <hayashibara.keiji@socionext.com>
Date: Wed, 24 Oct 2018 18:34:29 +0900
Subject: [PATCH 0181/1890] spi: uniphier: fix incorrect property items

This commit fixes incorrect property because it was different
from the actual.
The parameters of '#address-cells' and '#size-cells' were removed,
and 'interrupts', 'pinctrl-names' and 'pinctrl-0' were added.

Fixes: 4dcd5c2781f3 ("spi: add DT bindings for UniPhier SPI controller")
Signed-off-by: Keiji Hayashibara <hayashibara.keiji@socionext.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../devicetree/bindings/spi/spi-uniphier.txt       | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/Documentation/devicetree/bindings/spi/spi-uniphier.txt b/Documentation/devicetree/bindings/spi/spi-uniphier.txt
index 504a4ecfc7b16..b04e66a52de5d 100644
--- a/Documentation/devicetree/bindings/spi/spi-uniphier.txt
+++ b/Documentation/devicetree/bindings/spi/spi-uniphier.txt
@@ -5,18 +5,20 @@ UniPhier SoCs have SCSSI which supports SPI single channel.
 Required properties:
  - compatible: should be "socionext,uniphier-scssi"
  - reg: address and length of the spi master registers
- - #address-cells: must be <1>, see spi-bus.txt
- - #size-cells: must be <0>, see spi-bus.txt
- - clocks: A phandle to the clock for the device.
- - resets: A phandle to the reset control for the device.
+ - interrupts: a single interrupt specifier
+ - pinctrl-names: should be "default"
+ - pinctrl-0: pin control state for the default mode
+ - clocks: a phandle to the clock for the device
+ - resets: a phandle to the reset control for the device
 
 Example:
 
 spi0: spi@54006000 {
 	compatible = "socionext,uniphier-scssi";
 	reg = <0x54006000 0x100>;
-	#address-cells = <1>;
-	#size-cells = <0>;
+	interrupts = <0 39 4>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_spi0>;
 	clocks = <&peri_clk 11>;
 	resets = <&peri_rst 11>;
 };
-- 
GitLab


From 49af5d95b9b3c21a84ad115a9db9acbc036d849a Mon Sep 17 00:00:00 2001
From: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com>
Date: Thu, 27 Sep 2018 13:57:30 -0700
Subject: [PATCH 0182/1890] drm/i915/dp: Fix link retraining comment in
 intel_dp_long_pulse()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Comment claims link needs to be retrained because the connected sink raised
a long pulse to indicate link loss. If the sink did so,
intel_dp_hotplug() would have handled link retraining. Looking at the
logs in Bugzilla referenced in commit '3cf71bc9904d ("drm/i915: Re-apply
Perform link quality check, unconditionally during long pulse"")', the
issue is that the sink does not trigger an interrupt. What we want is
->detect() from user space to check link status and retrain. Ville's
review for the original patch also indicates the same root cause. So,
rewrite the comment.

v2: Patch split and rewrote comment.

Cc: Lyude Paul <lyude@redhat.com>
Cc: Ville SyrjÃ¤lÃ¤ <ville.syrjala@linux.intel.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Jan-Marek Glogowski <glogow@fbihome.de>
References: 3cf71bc9904d ("drm/i915: Re-apply "Perform link quality check, unconditionally during long pulse"")
Signed-off-by: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com>
Reviewed-by: Ville SyrjÃ¤lÃ¤ <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180927205735.16651-1-dhinakaran.pandiyan@intel.com
(cherry picked from commit 9ebd8202393dde9d3678c9ec162c1aa63ba17eac)
Fixes: 399334708b4f ("drm/i915: Re-apply "Perform link quality check, unconditionally during long pulse"")
Cc: stable@vger.kernel.org
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_dp.c | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 3fae4dab295f0..e2a1af0a34928 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -5104,16 +5104,9 @@ intel_dp_long_pulse(struct intel_connector *connector,
 		goto out;
 	} else {
 		/*
-		 * If display is now connected check links status,
-		 * there has been known issues of link loss triggering
-		 * long pulse.
-		 *
-		 * Some sinks (eg. ASUS PB287Q) seem to perform some
-		 * weird HPD ping pong during modesets. So we can apparently
-		 * end up with HPD going low during a modeset, and then
-		 * going back up soon after. And once that happens we must
-		 * retrain the link to get a picture. That's in case no
-		 * userspace component reacted to intermittent HPD dip.
+		 * Some external monitors do not signal loss of link
+		 * synchronization with an IRQ_HPD, so force a link status
+		 * check.
 		 */
 		struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base;
 
-- 
GitLab


From f9776280c29e77a18cbc7ebb6d48f7885e494990 Mon Sep 17 00:00:00 2001
From: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com>
Date: Thu, 27 Sep 2018 13:57:31 -0700
Subject: [PATCH 0183/1890] drm/i915/dp: Restrict link retrain workaround to
 external monitors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit '3cf71bc9904d ("drm/i915: Re-apply "Perform link quality check,
unconditionally during long pulse"")' applies a work around for sinks
that don't signal link loss. The work around does not need to have to be
that broad as the issue was seen with only one particular monitor; limit
this only for external displays as eDP features like PSR turn off the link
and the driver ends up retraining the link seeeing that link is not
synchronized.

Cc: Lyude Paul <lyude@redhat.com>
Cc: Jan-Marek Glogowski <glogow@fbihome.de>
Cc: Ville SyrjÃ¤lÃ¤ <ville.syrjala@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
References: 3cf71bc9904d ("drm/i915: Re-apply "Perform link quality check, unconditionally during long pulse"")
Signed-off-by: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com>
Reviewed-by: Ville SyrjÃ¤lÃ¤ <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180927205735.16651-2-dhinakaran.pandiyan@intel.com
(cherry picked from commit f24f6eb95807bca0dbd8dc5b2f3a4099000f4472)
Fixes: 399334708b4f ("drm/i915: Re-apply "Perform link quality check, unconditionally during long pulse"")
Cc: stable@vger.kernel.org
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_dp.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index e2a1af0a34928..13f9b56a9ce7c 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -5102,12 +5102,13 @@ intel_dp_long_pulse(struct intel_connector *connector,
 		 */
 		status = connector_status_disconnected;
 		goto out;
-	} else {
-		/*
-		 * Some external monitors do not signal loss of link
-		 * synchronization with an IRQ_HPD, so force a link status
-		 * check.
-		 */
+	}
+
+	/*
+	 * Some external monitors do not signal loss of link synchronization
+	 * with an IRQ_HPD, so force a link status check.
+	 */
+	if (!intel_dp_is_edp(intel_dp)) {
 		struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base;
 
 		intel_dp_retrain_link(encoder, ctx);
-- 
GitLab


From b585ebdb5912cf1438d4822f79aaebe36a2d123a Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 20 Sep 2018 11:05:36 -0700
Subject: [PATCH 0184/1890] perf script: Add --insn-trace for instruction
 decoding

Add a --insn-trace short hand option for decoding and disassembling
instruction streams for intel_pt. This automatically pipes the output
into the xed disassembler to generate disassembled instructions.  This
just makes this use model much nicer to use.

Before

  % perf record -e intel_pt// ...
  % perf script --itrace=i0ns --ns -F +insn,-event,-period | xed -F insn: -A -64
   swapper 0 [000] 17276.429606186: ffffffff81010486 pt_config ([kernel.kallsyms])    nopl  %eax, (%rax,%rax,1)
   swapper 0 [000] 17276.429606186: ffffffff8101048b pt_config ([kernel.kallsyms])    add $0x10, %rsp
   swapper 0 [000] 17276.429606186: ffffffff8101048f pt_config ([kernel.kallsyms])    popq  %rbx
   swapper 0 [000] 17276.429606186: ffffffff81010490 pt_config ([kernel.kallsyms])    popq  %rbp
   swapper 0 [000] 17276.429606186: ffffffff81010491 pt_config ([kernel.kallsyms])    popq  %r12
   swapper 0 [000] 17276.429606186: ffffffff81010493 pt_config ([kernel.kallsyms])    popq  %r13
   swapper 0 [000] 17276.429606186: ffffffff81010495 pt_config ([kernel.kallsyms])    popq  %r14
   swapper 0 [000] 17276.429606186: ffffffff81010497 pt_config ([kernel.kallsyms])    popq  %r15
   swapper 0 [000] 17276.429606186: ffffffff81010499 pt_config ([kernel.kallsyms])    retq
   swapper 0 [000] 17276.429606186: ffffffff8101063e pt_event_add ([kernel.kallsyms])         cmpl  $0x1, 0x1b0(%rbx)
   swapper 0 [000] 17276.429606186: ffffffff81010645 pt_event_add ([kernel.kallsyms])         mov $0xffffffea, %eax
   swapper 0 [000] 17276.429606186: ffffffff8101064a pt_event_add ([kernel.kallsyms])         mov $0x0, %edx
   swapper 0 [000] 17276.429606186: ffffffff8101064f pt_event_add ([kernel.kallsyms])         popq  %rbx
   swapper 0 [000] 17276.429606186: ffffffff81010650 pt_event_add ([kernel.kallsyms])         cmovnz %edx, %eax
   swapper 0 [000] 17276.429606186: ffffffff81010653 pt_event_add ([kernel.kallsyms])         jmp 0xffffffff81010635
   swapper 0 [000] 17276.429606186: ffffffff81010635 pt_event_add ([kernel.kallsyms])         retq
   swapper 0 [000] 17276.429606186: ffffffff8115e687 event_sched_in.isra.107 ([kernel.kallsyms])       test %eax, %eax

Now:

  % perf record -e intel_pt// ...
  % perf script --insn-trace --xed
  ... same output ...

XED needs to be installed with:

  $ git clone https://github.com/intelxed/mbuild.git mbuild
  $ git clone https://github.com/intelxed/xed
  $ cd xed
  $ ./mfile.py
  $ ./mfile.py examples
  $ sudo ./mfile.py --prefix=/usr/local install
  $ sudo cp obj/examples/xed /usr/local/bin
  $ xed | head -3
  ERROR: required argument(s) were missing
  Copyright (C) 2017, Intel Corporation. All rights reserved.
  XED version: [v10.0-328-g7d62c8c49b7b]
  $

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: http://lkml.kernel.org/r/20180920180540.14039-2-andi@firstfloor.org
[ Fixed up whitespace damage, added the 'mfile.py examples + cp obj/examples/xed ... ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/build-xed.txt   | 19 +++++++++++++++++++
 tools/perf/Documentation/perf-script.txt |  7 +++++++
 tools/perf/builtin-script.c              | 23 +++++++++++++++++++++++
 3 files changed, 49 insertions(+)
 create mode 100644 tools/perf/Documentation/build-xed.txt

diff --git a/tools/perf/Documentation/build-xed.txt b/tools/perf/Documentation/build-xed.txt
new file mode 100644
index 0000000000000..6222c1e7231fb
--- /dev/null
+++ b/tools/perf/Documentation/build-xed.txt
@@ -0,0 +1,19 @@
+
+For --xed the xed tool is needed. Here is how to install it:
+
+  $ git clone https://github.com/intelxed/mbuild.git mbuild
+  $ git clone https://github.com/intelxed/xed
+  $ cd xed
+  $ ./mfile.py --share
+  $ ./mfile.py examples
+  $ sudo ./mfile.py --prefix=/usr/local install
+  $ sudo ldconfig
+  $ sudo cp obj/examples/xed /usr/local/bin
+
+Basic xed testing:
+
+  $ xed | head -3
+  ERROR: required argument(s) were missing
+  Copyright (C) 2017, Intel Corporation. All rights reserved.
+  XED version: [v10.0-328-g7d62c8c49b7b]
+  $
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index afdafe2110a17..00c655ab4968e 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -383,6 +383,13 @@ include::itrace.txt[]
 	will be printed. Each entry has function name and file/line. Enabled by
 	default, disable with --no-inline.
 
+--insn-trace::
+	Show instruction stream for intel_pt traces. Combine with --xed to
+	show disassembly.
+
+--xed::
+	Run xed disassembler on output. Requires installing the xed disassembler.
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-script-perl[1],
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index ca09b7d2adb7e..411ea175bcaf0 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -44,6 +44,7 @@
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <unistd.h>
+#include <subcmd/pager.h>
 
 #include "sane_ctype.h"
 
@@ -3103,6 +3104,24 @@ static int perf_script__process_auxtrace_info(struct perf_session *session,
 #define perf_script__process_auxtrace_info 0
 #endif
 
+static int parse_insn_trace(const struct option *opt __maybe_unused,
+			    const char *str __maybe_unused,
+			    int unset __maybe_unused)
+{
+	parse_output_fields(NULL, "+insn,-event,-period", 0);
+	itrace_parse_synth_opts(opt, "i0ns", 0);
+	nanosecs = true;
+	return 0;
+}
+
+static int parse_xed(const struct option *opt __maybe_unused,
+		     const char *str __maybe_unused,
+		     int unset __maybe_unused)
+{
+	force_pager("xed -F insn: -A -64 | less");
+	return 0;
+}
+
 int cmd_script(int argc, const char **argv)
 {
 	bool show_full_info = false;
@@ -3187,6 +3206,10 @@ int cmd_script(int argc, const char **argv)
 		    "system-wide collection from all CPUs"),
 	OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
 		   "only consider these symbols"),
+	OPT_CALLBACK_OPTARG(0, "insn-trace", &itrace_synth_opts, NULL, NULL,
+			"Decode instructions from itrace", parse_insn_trace),
+	OPT_CALLBACK_OPTARG(0, "xed", NULL, NULL, NULL,
+			"Run xed disassembler on output", parse_xed),
 	OPT_STRING(0, "stop-bt", &symbol_conf.bt_stop_list_str, "symbol[,symbol...]",
 		   "Stop display of callgraph at these symbols"),
 	OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
-- 
GitLab


From 4eb068157121939f4bc16256a37bcd88f5554123 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 20 Sep 2018 11:05:37 -0700
Subject: [PATCH 0185/1890] perf script: Make itrace script default to all
 calls

By default 'perf script' for itrace outputs sampled instructions or
branches. In my experience this is confusing to users because it's hard
to correlate with real program behavior. The sampling makes sense for
tools like 'perf report' that actually sample to reduce the run time,
but run time is normally not a problem for 'perf script'.  It's better
to give an accurate representation of the program flow.

Default 'perf script' to output all calls for itrace. That's a much saner
default. The old behavior can be still requested with 'perf script'
--itrace=ibxwpe100000

v2: Fix ETM build failure
v3: Really fix ETM build failure (Kim Phillips)

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Kim Phillips <kim.phillips@arm.com>
Cc: Leo Yan <leo.yan@linaro.org>
Link: http://lkml.kernel.org/r/20180920180540.14039-3-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/itrace.txt |  7 ++++---
 tools/perf/builtin-script.c         |  5 ++++-
 tools/perf/util/auxtrace.c          | 17 ++++++++++++-----
 tools/perf/util/auxtrace.h          |  5 ++++-
 tools/perf/util/cs-etm.c            |  3 ++-
 tools/perf/util/intel-bts.c         |  3 ++-
 tools/perf/util/intel-pt.c          |  3 ++-
 7 files changed, 30 insertions(+), 13 deletions(-)

diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt
index a3abe04c779d0..c2182cbabde3a 100644
--- a/tools/perf/Documentation/itrace.txt
+++ b/tools/perf/Documentation/itrace.txt
@@ -11,10 +11,11 @@
 		l	synthesize last branch entries (use with i or x)
 		s       skip initial number of events
 
-	The default is all events i.e. the same as --itrace=ibxwpe
+	The default is all events i.e. the same as --itrace=ibxwpe,
+	except for perf script where it is --itrace=ce
 
-	In addition, the period (default 100000) for instructions events
-	can be specified in units of:
+	In addition, the period (default 100000, except for perf script where it is 1)
+	for instructions events can be specified in units of:
 
 		i	instructions
 		t	ticks
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 411ea175bcaf0..6099c722a6796 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -3131,7 +3131,10 @@ int cmd_script(int argc, const char **argv)
 	char *rec_script_path = NULL;
 	char *rep_script_path = NULL;
 	struct perf_session *session;
-	struct itrace_synth_opts itrace_synth_opts = { .set = false, };
+	struct itrace_synth_opts itrace_synth_opts = {
+		.set = false,
+		.default_no_sample = true,
+	};
 	char *script_path = NULL;
 	const char **__argv;
 	int i, j, err = 0;
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index c4617bcfd521f..72d5ba2479bf1 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -962,16 +962,23 @@ s64 perf_event__process_auxtrace(struct perf_session *session,
 #define PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ	64
 #define PERF_ITRACE_MAX_LAST_BRANCH_SZ		1024
 
-void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts)
+void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts,
+				    bool no_sample)
 {
-	synth_opts->instructions = true;
 	synth_opts->branches = true;
 	synth_opts->transactions = true;
 	synth_opts->ptwrites = true;
 	synth_opts->pwr_events = true;
 	synth_opts->errors = true;
-	synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE;
-	synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
+	if (no_sample) {
+		synth_opts->period_type = PERF_ITRACE_PERIOD_INSTRUCTIONS;
+		synth_opts->period = 1;
+		synth_opts->calls = true;
+	} else {
+		synth_opts->instructions = true;
+		synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE;
+		synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
+	}
 	synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ;
 	synth_opts->last_branch_sz = PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ;
 	synth_opts->initial_skip = 0;
@@ -999,7 +1006,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
 	}
 
 	if (!str) {
-		itrace_synth_opts__set_default(synth_opts);
+		itrace_synth_opts__set_default(synth_opts, false);
 		return 0;
 	}
 
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index d88f6e9eb4611..8e50f96d4b23d 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -58,6 +58,7 @@ enum itrace_period_type {
 /**
  * struct itrace_synth_opts - AUX area tracing synthesis options.
  * @set: indicates whether or not options have been set
+ * @default_no_sample: Default to no sampling.
  * @inject: indicates the event (not just the sample) must be fully synthesized
  *          because 'perf inject' will write it out
  * @instructions: whether to synthesize 'instructions' events
@@ -82,6 +83,7 @@ enum itrace_period_type {
  */
 struct itrace_synth_opts {
 	bool			set;
+	bool			default_no_sample;
 	bool			inject;
 	bool			instructions;
 	bool			branches;
@@ -528,7 +530,8 @@ int perf_event__process_auxtrace_error(struct perf_session *session,
 				       union perf_event *event);
 int itrace_parse_synth_opts(const struct option *opt, const char *str,
 			    int unset);
-void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts);
+void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts,
+				    bool no_sample);
 
 size_t perf_event__fprintf_auxtrace_error(union perf_event *event, FILE *fp);
 void perf_session__auxtrace_error_inc(struct perf_session *session,
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 2ae640257fdbb..3b37d66dc5337 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -1432,7 +1432,8 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
 	if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
 		etm->synth_opts = *session->itrace_synth_opts;
 	} else {
-		itrace_synth_opts__set_default(&etm->synth_opts);
+		itrace_synth_opts__set_default(&etm->synth_opts,
+				session->itrace_synth_opts->default_no_sample);
 		etm->synth_opts.callchain = false;
 	}
 
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index 7f0c83b6332bf..3b3a3d55dca18 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c
@@ -910,7 +910,8 @@ int intel_bts_process_auxtrace_info(union perf_event *event,
 	if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
 		bts->synth_opts = *session->itrace_synth_opts;
 	} else {
-		itrace_synth_opts__set_default(&bts->synth_opts);
+		itrace_synth_opts__set_default(&bts->synth_opts,
+				session->itrace_synth_opts->default_no_sample);
 		if (session->itrace_synth_opts)
 			bts->synth_opts.thread_stack =
 				session->itrace_synth_opts->thread_stack;
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 48c1d415c6b06..ffa385a029b3f 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -2559,7 +2559,8 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
 	if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
 		pt->synth_opts = *session->itrace_synth_opts;
 	} else {
-		itrace_synth_opts__set_default(&pt->synth_opts);
+		itrace_synth_opts__set_default(&pt->synth_opts,
+				session->itrace_synth_opts->default_no_sample);
 		if (use_browser != -1) {
 			pt->synth_opts.branches = false;
 			pt->synth_opts.callchain = true;
-- 
GitLab


From d1b1552e15d41297abcaf3812378e3391d44fa6b Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 20 Sep 2018 11:05:38 -0700
Subject: [PATCH 0186/1890] tools script: Add --call-trace and --call-ret-trace

Add short cut options to print PT call trace and call-ret-trace, for
calls and call and returns. Roughly corresponds to ftrace function
tracer and function graph tracer.

Just makes these common use cases nicer to use.

% perf record -a -e intel_pt// sleep 1
% perf script --call-trace
	    perf   900 [000] 194167.205652203: ([kernel.kallsyms])          perf_pmu_enable
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])          __x86_indirect_thunk_rax
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])          event_filter_match
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])          group_sched_in
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])              __x86_indirect_thunk_rax
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])              event_sched_in.isra.107
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])                  perf_event_set_state.part.71
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])                      perf_event_update_time
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])                  perf_pmu_disable
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])                  perf_log_itrace_start
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])                  __x86_indirect_thunk_rax
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])                      perf_event_update_userpage

% perf script --call-ret-trace
	    perf   900 [000] 194167.205652203:   tr strt     ([unknown])        pt_config
            perf   900 [000] 194167.205652203:   return      ([kernel.kallsyms])            pt_config
            perf   900 [000] 194167.205652203:   return      ([kernel.kallsyms])            pt_event_add
            perf   900 [000] 194167.205652203:   call        ([kernel.kallsyms])            perf_pmu_enable
            perf   900 [000] 194167.205652203:   return      ([kernel.kallsyms])            perf_pmu_nop_void
            perf   900 [000] 194167.205652203:   return      ([kernel.kallsyms])            event_sched_in.isra.107
            perf   900 [000] 194167.205652203:   call        ([kernel.kallsyms])            __x86_indirect_thunk_rax
            perf   900 [000] 194167.205652203:   return      ([kernel.kallsyms])            perf_pmu_nop_int
            perf   900 [000] 194167.205652203:   return      ([kernel.kallsyms])            group_sched_in
            perf   900 [000] 194167.205652203:   call        ([kernel.kallsyms])            event_filter_match
            perf   900 [000] 194167.205652203:   return      ([kernel.kallsyms])            event_filter_match
            perf   900 [000] 194167.205652203:   call        ([kernel.kallsyms])            group_sched_in
            perf   900 [000] 194167.205652203:   call        ([kernel.kallsyms])                __x86_indirect_thunk_rax
            perf   900 [000] 194167.205652203:   return      ([kernel.kallsyms])                perf_pmu_nop_txn
            perf   900 [000] 194167.205652203:   call        ([kernel.kallsyms])                event_sched_in.isra.107
            perf   900 [000] 194167.205652203:   call        ([kernel.kallsyms])                    perf_event_set_state.part.71

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: Leo Yan <leo.yan@linaro.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Kim Phillips <kim.phillips@arm.com>
Link: http://lkml.kernel.org/r/20180920180540.14039-4-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-script.txt |  7 +++++++
 tools/perf/builtin-script.c              | 24 ++++++++++++++++++++++++
 2 files changed, 31 insertions(+)

diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 00c655ab4968e..805baabd238eb 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -390,6 +390,13 @@ include::itrace.txt[]
 --xed::
 	Run xed disassembler on output. Requires installing the xed disassembler.
 
+--call-trace::
+	Show call stream for intel_pt traces. The CPUs are interleaved, but
+	can be filtered with -C.
+
+--call-ret-trace::
+	Show call and return stream for intel_pt traces.
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-script-perl[1],
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 6099c722a6796..566e1450898a2 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -3122,6 +3122,26 @@ static int parse_xed(const struct option *opt __maybe_unused,
 	return 0;
 }
 
+static int parse_call_trace(const struct option *opt __maybe_unused,
+			    const char *str __maybe_unused,
+			    int unset __maybe_unused)
+{
+	parse_output_fields(NULL, "-ip,-addr,-event,-period,+callindent", 0);
+	itrace_parse_synth_opts(opt, "cewp", 0);
+	nanosecs = true;
+	return 0;
+}
+
+static int parse_callret_trace(const struct option *opt __maybe_unused,
+			    const char *str __maybe_unused,
+			    int unset __maybe_unused)
+{
+	parse_output_fields(NULL, "-ip,-addr,-event,-period,+callindent,+flags", 0);
+	itrace_parse_synth_opts(opt, "crewp", 0);
+	nanosecs = true;
+	return 0;
+}
+
 int cmd_script(int argc, const char **argv)
 {
 	bool show_full_info = false;
@@ -3213,6 +3233,10 @@ int cmd_script(int argc, const char **argv)
 			"Decode instructions from itrace", parse_insn_trace),
 	OPT_CALLBACK_OPTARG(0, "xed", NULL, NULL, NULL,
 			"Run xed disassembler on output", parse_xed),
+	OPT_CALLBACK_OPTARG(0, "call-trace", &itrace_synth_opts, NULL, NULL,
+			"Decode calls from from itrace", parse_call_trace),
+	OPT_CALLBACK_OPTARG(0, "call-ret-trace", &itrace_synth_opts, NULL, NULL,
+			"Decode calls and returns from itrace", parse_callret_trace),
 	OPT_STRING(0, "stop-bt", &symbol_conf.bt_stop_list_str, "symbol[,symbol...]",
 		   "Stop display of callgraph at these symbols"),
 	OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
-- 
GitLab


From 99f753f048b3f02f31a56951781672021af6cd0d Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 20 Sep 2018 11:05:39 -0700
Subject: [PATCH 0187/1890] perf script: Implement --graph-function

Add a ftrace style --graph-function argument to 'perf script' that
allows to print itrace function calls only below a given function. This
makes it easier to find the code of interest in a large trace.

% perf record -e intel_pt//k -a sleep 1
% perf script --graph-function group_sched_in --call-trace
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])          group_sched_in
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])              __x86_indirect_thunk_rax
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])              event_sched_in.isra.107
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])                  perf_event_set_state.part.71
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])                      perf_event_update_time
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])                  perf_pmu_disable
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])                  perf_log_itrace_start
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])                  __x86_indirect_thunk_rax
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])                      perf_event_update_userpage
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])                          calc_timer_values
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])                              sched_clock_cpu
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])                          __x86_indirect_thunk_rax
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])                          arch_perf_update_userpage
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])                              __fentry__
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])                              using_native_sched_clock
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])                              sched_clock_stable
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])                  perf_pmu_enable
            perf   900 [000] 194167.205652203: ([kernel.kallsyms])              __x86_indirect_thunk_rax
         swapper     0 [001] 194167.205660693: ([kernel.kallsyms])          group_sched_in
         swapper     0 [001] 194167.205660693: ([kernel.kallsyms])              __x86_indirect_thunk_rax
         swapper     0 [001] 194167.205660693: ([kernel.kallsyms])              event_sched_in.isra.107
         swapper     0 [001] 194167.205660693: ([kernel.kallsyms])                  perf_event_set_state.part.71
         swapper     0 [001] 194167.205660693: ([kernel.kallsyms])                      perf_event_update_time
         swapper     0 [001] 194167.205660693: ([kernel.kallsyms])                  perf_pmu_disable
         swapper     0 [001] 194167.205660693: ([kernel.kallsyms])                  perf_log_itrace_start
         swapper     0 [001] 194167.205660693: ([kernel.kallsyms])                  __x86_indirect_thunk_rax
         swapper     0 [001] 194167.205660693: ([kernel.kallsyms])                      perf_event_update_userpage
         swapper     0 [001] 194167.205660693: ([kernel.kallsyms])                          calc_timer_values
         swapper     0 [001] 194167.205660693: ([kernel.kallsyms])                              sched_clock_cpu
         swapper     0 [001] 194167.205660693: ([kernel.kallsyms])                          __x86_indirect_thunk_rax
         swapper     0 [001] 194167.205660693: ([kernel.kallsyms])                          arch_perf_update_userpage
         swapper     0 [001] 194167.205660693: ([kernel.kallsyms])                              __fentry__
         swapper     0 [001] 194167.205660693: ([kernel.kallsyms])                              using_native_sched_clock
         swapper     0 [001] 194167.205660693: ([kernel.kallsyms])                              sched_clock_stable

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: Leo Yan <leo.yan@linaro.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Kim Phillips <kim.phillips@arm.com>
Link: http://lkml.kernel.org/r/20180920180540.14039-5-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-script.txt |  4 +
 tools/perf/builtin-script.c              | 96 +++++++++++++++++++-----
 tools/perf/util/symbol.h                 |  3 +-
 tools/perf/util/thread.h                 |  2 +
 4 files changed, 86 insertions(+), 19 deletions(-)

diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 805baabd238eb..a2b37ce48094d 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -397,6 +397,10 @@ include::itrace.txt[]
 --call-ret-trace::
 	Show call and return stream for intel_pt traces.
 
+--graph-function::
+	For itrace only show specified functions and their callees for
+	itrace. Multiple functions can be separated by comma.
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-script-perl[1],
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 566e1450898a2..9d2249ea75e3c 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -1105,6 +1105,35 @@ static int perf_sample__fprintf_addr(struct perf_sample *sample,
 	return printed;
 }
 
+static const char *resolve_branch_sym(struct perf_sample *sample,
+				      struct perf_evsel *evsel,
+				      struct thread *thread,
+				      struct addr_location *al,
+				      u64 *ip)
+{
+	struct addr_location addr_al;
+	struct perf_event_attr *attr = &evsel->attr;
+	const char *name = NULL;
+
+	if (sample->flags & (PERF_IP_FLAG_CALL | PERF_IP_FLAG_TRACE_BEGIN)) {
+		if (sample_addr_correlates_sym(attr)) {
+			thread__resolve(thread, &addr_al, sample);
+			if (addr_al.sym)
+				name = addr_al.sym->name;
+			else
+				*ip = sample->addr;
+		} else {
+			*ip = sample->addr;
+		}
+	} else if (sample->flags & (PERF_IP_FLAG_RETURN | PERF_IP_FLAG_TRACE_END)) {
+		if (al->sym)
+			name = al->sym->name;
+		else
+			*ip = sample->ip;
+	}
+	return name;
+}
+
 static int perf_sample__fprintf_callindent(struct perf_sample *sample,
 					   struct perf_evsel *evsel,
 					   struct thread *thread,
@@ -1112,7 +1141,6 @@ static int perf_sample__fprintf_callindent(struct perf_sample *sample,
 {
 	struct perf_event_attr *attr = &evsel->attr;
 	size_t depth = thread_stack__depth(thread);
-	struct addr_location addr_al;
 	const char *name = NULL;
 	static int spacing;
 	int len = 0;
@@ -1126,22 +1154,7 @@ static int perf_sample__fprintf_callindent(struct perf_sample *sample,
 	if (thread->ts && sample->flags & PERF_IP_FLAG_RETURN)
 		depth += 1;
 
-	if (sample->flags & (PERF_IP_FLAG_CALL | PERF_IP_FLAG_TRACE_BEGIN)) {
-		if (sample_addr_correlates_sym(attr)) {
-			thread__resolve(thread, &addr_al, sample);
-			if (addr_al.sym)
-				name = addr_al.sym->name;
-			else
-				ip = sample->addr;
-		} else {
-			ip = sample->addr;
-		}
-	} else if (sample->flags & (PERF_IP_FLAG_RETURN | PERF_IP_FLAG_TRACE_END)) {
-		if (al->sym)
-			name = al->sym->name;
-		else
-			ip = sample->ip;
-	}
+	name = resolve_branch_sym(sample, evsel, thread, al, &ip);
 
 	if (PRINT_FIELD(DSO) && !(PRINT_FIELD(IP) || PRINT_FIELD(ADDR))) {
 		dlen += fprintf(fp, "(");
@@ -1647,6 +1660,47 @@ static void perf_sample__fprint_metric(struct perf_script *script,
 	}
 }
 
+static bool show_event(struct perf_sample *sample,
+		       struct perf_evsel *evsel,
+		       struct thread *thread,
+		       struct addr_location *al)
+{
+	int depth = thread_stack__depth(thread);
+
+	if (!symbol_conf.graph_function)
+		return true;
+
+	if (thread->filter) {
+		if (depth <= thread->filter_entry_depth) {
+			thread->filter = false;
+			return false;
+		}
+		return true;
+	} else {
+		const char *s = symbol_conf.graph_function;
+		u64 ip;
+		const char *name = resolve_branch_sym(sample, evsel, thread, al,
+				&ip);
+		unsigned nlen;
+
+		if (!name)
+			return false;
+		nlen = strlen(name);
+		while (*s) {
+			unsigned len = strcspn(s, ",");
+			if (nlen == len && !strncmp(name, s, len)) {
+				thread->filter = true;
+				thread->filter_entry_depth = depth;
+				return true;
+			}
+			s += len;
+			if (*s == ',')
+				s++;
+		}
+		return false;
+	}
+}
+
 static void process_event(struct perf_script *script,
 			  struct perf_sample *sample, struct perf_evsel *evsel,
 			  struct addr_location *al,
@@ -1661,6 +1715,9 @@ static void process_event(struct perf_script *script,
 	if (output[type].fields == 0)
 		return;
 
+	if (!show_event(sample, evsel, thread, al))
+		return;
+
 	++es->samples;
 
 	perf_sample__fprintf_start(sample, thread, evsel,
@@ -3237,6 +3294,8 @@ int cmd_script(int argc, const char **argv)
 			"Decode calls from from itrace", parse_call_trace),
 	OPT_CALLBACK_OPTARG(0, "call-ret-trace", &itrace_synth_opts, NULL, NULL,
 			"Decode calls and returns from itrace", parse_callret_trace),
+	OPT_STRING(0, "graph-function", &symbol_conf.graph_function, "symbol[,symbol...]",
+			"Only print symbols and callees with --call-trace/--call-ret-trace"),
 	OPT_STRING(0, "stop-bt", &symbol_conf.bt_stop_list_str, "symbol[,symbol...]",
 		   "Stop display of callgraph at these symbols"),
 	OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
@@ -3494,7 +3553,8 @@ int cmd_script(int argc, const char **argv)
 	script.session = session;
 	script__setup_sample_type(&script);
 
-	if (output[PERF_TYPE_HARDWARE].fields & PERF_OUTPUT_CALLINDENT)
+	if ((output[PERF_TYPE_HARDWARE].fields & PERF_OUTPUT_CALLINDENT) ||
+	    symbol_conf.graph_function)
 		itrace_synth_opts.thread_stack = true;
 
 	session->itrace_synth_opts = &itrace_synth_opts;
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index f25fae4b5743c..d726a8a7bb1b0 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -123,7 +123,8 @@ struct symbol_conf {
 	const char	*vmlinux_name,
 			*kallsyms_name,
 			*source_prefix,
-			*field_sep;
+			*field_sep,
+			*graph_function;
 	const char	*default_guest_vmlinux_name,
 			*default_guest_kallsyms,
 			*default_guest_modules;
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 07606aa6998d9..36c09a9904e66 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -42,6 +42,8 @@ struct thread {
 	void				*addr_space;
 	struct unwind_libunwind_ops	*unwind_libunwind_ops;
 #endif
+	bool			filter;
+	int			filter_entry_depth;
 };
 
 struct machine;
-- 
GitLab


From fe57120e18a1f9124ca758c89cc54f91333d1847 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Mon, 24 Sep 2018 10:07:32 -0700
Subject: [PATCH 0188/1890] perf script: Support total cycles count

For 'perf script' brstackinsn also print a running cycles count.  This
makes it easier to calculate cycle deltas for code sections measured
with LBRs.

% perf record -b -a sleep 1
% perf script -F +brstackinsn
...
        00007f73ecc41083        insn: 74 06                     # PRED 9 cycles [17] 1.11 IPC
        00007f73ecc4108b        insn: a8 10
        00007f73ecc4108d        insn: 74 71                     # PRED 1 cycles [18] 1.00 IPC
        00007f73ecc41100        insn: 48 8b 46 10
        00007f73ecc41104        insn: 4c 8b 38
        00007f73ecc41107        insn: 4d 85 ff
        00007f73ecc4110a        insn: 0f 84 b0 00 00 00
        00007f73ecc41110        insn: 83 43 58 01
        00007f73ecc41114        insn: 48 89 df
        00007f73ecc41117        insn: e8 94 73 04 00            # PRED 6 cycles [24] 1.00 IPC

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Link: http://lkml.kernel.org/r/20180924170732.GA28040@tassilo.jf.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-script.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 9d2249ea75e3c..b5bc85bd0bbea 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -913,7 +913,7 @@ static int grab_bb(u8 *buffer, u64 start, u64 end,
 
 static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en,
 			    struct perf_insn *x, u8 *inbuf, int len,
-			    int insn, FILE *fp)
+			    int insn, FILE *fp, int *total_cycles)
 {
 	int printed = fprintf(fp, "\t%016" PRIx64 "\t%-30s\t#%s%s%s%s", ip,
 			      dump_insn(x, ip, inbuf, len, NULL),
@@ -922,7 +922,8 @@ static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en,
 			      en->flags.in_tx ? " INTX" : "",
 			      en->flags.abort ? " ABORT" : "");
 	if (en->flags.cycles) {
-		printed += fprintf(fp, " %d cycles", en->flags.cycles);
+		*total_cycles += en->flags.cycles;
+		printed += fprintf(fp, " %d cycles [%d]", en->flags.cycles, *total_cycles);
 		if (insn)
 			printed += fprintf(fp, " %.2f IPC", (float)insn / en->flags.cycles);
 	}
@@ -979,6 +980,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
 	u8 buffer[MAXBB];
 	unsigned off;
 	struct symbol *lastsym = NULL;
+	int total_cycles = 0;
 
 	if (!(br && br->nr))
 		return 0;
@@ -999,7 +1001,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
 		printed += ip__fprintf_sym(br->entries[nr - 1].from, thread,
 					   x.cpumode, x.cpu, &lastsym, attr, fp);
 		printed += ip__fprintf_jump(br->entries[nr - 1].from, &br->entries[nr - 1],
-					    &x, buffer, len, 0, fp);
+					    &x, buffer, len, 0, fp, &total_cycles);
 	}
 
 	/* Print all blocks */
@@ -1027,7 +1029,8 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
 
 			printed += ip__fprintf_sym(ip, thread, x.cpumode, x.cpu, &lastsym, attr, fp);
 			if (ip == end) {
-				printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, insn, fp);
+				printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, insn, fp,
+							    &total_cycles);
 				break;
 			} else {
 				printed += fprintf(fp, "\t%016" PRIx64 "\t%s\n", ip,
-- 
GitLab


From d7b31359ecef8d32540266f39d99892f61d17c4b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=C3=A9na=C3=AFc=20Huard?= <lenaic@lhuard.fr>
Date: Thu, 22 Mar 2018 23:53:05 +0100
Subject: [PATCH 0189/1890] kvm_config: add CONFIG_VIRTIO_MENU
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Make sure that make kvmconfig enables all the virtio drivers even if it is
preceded by a make allnoconfig.

Signed-off-by: LÃ©naÃ¯c Huard <lenaic@lhuard.fr>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 kernel/configs/kvm_guest.config | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/configs/kvm_guest.config b/kernel/configs/kvm_guest.config
index 108fecc20fc14..208481d910903 100644
--- a/kernel/configs/kvm_guest.config
+++ b/kernel/configs/kvm_guest.config
@@ -20,6 +20,7 @@ CONFIG_PARAVIRT=y
 CONFIG_KVM_GUEST=y
 CONFIG_S390_GUEST=y
 CONFIG_VIRTIO=y
+CONFIG_VIRTIO_MENU=y
 CONFIG_VIRTIO_PCI=y
 CONFIG_VIRTIO_BLK=y
 CONFIG_VIRTIO_CONSOLE=y
-- 
GitLab


From 86a559787e6f5cf662c081363f64a20cad654195 Mon Sep 17 00:00:00 2001
From: Wei Wang <wei.w.wang@intel.com>
Date: Mon, 27 Aug 2018 09:32:17 +0800
Subject: [PATCH 0190/1890] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT

Negotiation of the VIRTIO_BALLOON_F_FREE_PAGE_HINT feature indicates the
support of reporting hints of guest free pages to host via virtio-balloon.
Currenlty, only free page blocks of MAX_ORDER - 1 are reported. They are
obtained one by one from the mm free list via the regular allocation
function.

Host requests the guest to report free page hints by sending a new cmd id
to the guest via the free_page_report_cmd_id configuration register. When
the guest starts to report, it first sends a start cmd to host via the
free page vq, which acks to host the cmd id received. When the guest
finishes reporting free pages, a stop cmd is sent to host via the vq.
Host may also send a stop cmd id to the guest to stop the reporting.

VIRTIO_BALLOON_CMD_ID_STOP: Host sends this cmd to stop the guest
reporting.
VIRTIO_BALLOON_CMD_ID_DONE: Host sends this cmd to tell the guest that
the reported pages are ready to be freed.

Why does the guest free the reported pages when host tells it is ready to
free?
This is because freeing pages appears to be expensive for live migration.
free_pages() dirties memory very quickly and makes the live migraion not
converge in some cases. So it is good to delay the free_page operation
when the migration is done, and host sends a command to guest about that.

Why do we need the new VIRTIO_BALLOON_CMD_ID_DONE, instead of reusing
VIRTIO_BALLOON_CMD_ID_STOP?
This is because live migration is usually done in several rounds. At the
end of each round, host needs to send a VIRTIO_BALLOON_CMD_ID_STOP cmd to
the guest to stop (or say pause) the reporting. The guest resumes the
reporting when it receives a new command id at the beginning of the next
round. So we need a new cmd id to distinguish between "stop reporting" and
"ready to free the reported pages".

TODO:
- Add a batch page allocation API to amortize the allocation overhead.

Signed-off-by: Wei Wang <wei.w.wang@intel.com>
Signed-off-by: Liang Li <liang.z.li@intel.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/virtio/virtio_balloon.c     | 364 +++++++++++++++++++++++++---
 include/uapi/linux/virtio_balloon.h |   5 +
 2 files changed, 336 insertions(+), 33 deletions(-)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index d1c1f62837296..a18567889289d 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -41,13 +41,34 @@
 #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256
 #define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80
 
+#define VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG (__GFP_NORETRY | __GFP_NOWARN | \
+					     __GFP_NOMEMALLOC)
+/* The order of free page blocks to report to host */
+#define VIRTIO_BALLOON_FREE_PAGE_ORDER (MAX_ORDER - 1)
+/* The size of a free page block in bytes */
+#define VIRTIO_BALLOON_FREE_PAGE_SIZE \
+	(1 << (VIRTIO_BALLOON_FREE_PAGE_ORDER + PAGE_SHIFT))
+
 #ifdef CONFIG_BALLOON_COMPACTION
 static struct vfsmount *balloon_mnt;
 #endif
 
+enum virtio_balloon_vq {
+	VIRTIO_BALLOON_VQ_INFLATE,
+	VIRTIO_BALLOON_VQ_DEFLATE,
+	VIRTIO_BALLOON_VQ_STATS,
+	VIRTIO_BALLOON_VQ_FREE_PAGE,
+	VIRTIO_BALLOON_VQ_MAX
+};
+
 struct virtio_balloon {
 	struct virtio_device *vdev;
-	struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
+	struct virtqueue *inflate_vq, *deflate_vq, *stats_vq, *free_page_vq;
+
+	/* Balloon's own wq for cpu-intensive work items */
+	struct workqueue_struct *balloon_wq;
+	/* The free page reporting work item submitted to the balloon wq */
+	struct work_struct report_free_page_work;
 
 	/* The balloon servicing is delegated to a freezable workqueue. */
 	struct work_struct update_balloon_stats_work;
@@ -57,6 +78,18 @@ struct virtio_balloon {
 	spinlock_t stop_update_lock;
 	bool stop_update;
 
+	/* The list of allocated free pages, waiting to be given back to mm */
+	struct list_head free_page_list;
+	spinlock_t free_page_list_lock;
+	/* The number of free page blocks on the above list */
+	unsigned long num_free_page_blocks;
+	/* The cmd id received from host */
+	u32 cmd_id_received;
+	/* The cmd id that is actively in use */
+	__virtio32 cmd_id_active;
+	/* Buffer to store the stop sign */
+	__virtio32 cmd_id_stop;
+
 	/* Waiting for host to ack the pages we released. */
 	wait_queue_head_t acked;
 
@@ -320,17 +353,6 @@ static void stats_handle_request(struct virtio_balloon *vb)
 	virtqueue_kick(vq);
 }
 
-static void virtballoon_changed(struct virtio_device *vdev)
-{
-	struct virtio_balloon *vb = vdev->priv;
-	unsigned long flags;
-
-	spin_lock_irqsave(&vb->stop_update_lock, flags);
-	if (!vb->stop_update)
-		queue_work(system_freezable_wq, &vb->update_balloon_size_work);
-	spin_unlock_irqrestore(&vb->stop_update_lock, flags);
-}
-
 static inline s64 towards_target(struct virtio_balloon *vb)
 {
 	s64 target;
@@ -347,6 +369,60 @@ static inline s64 towards_target(struct virtio_balloon *vb)
 	return target - vb->num_pages;
 }
 
+/* Gives back @num_to_return blocks of free pages to mm. */
+static unsigned long return_free_pages_to_mm(struct virtio_balloon *vb,
+					     unsigned long num_to_return)
+{
+	struct page *page;
+	unsigned long num_returned;
+
+	spin_lock_irq(&vb->free_page_list_lock);
+	for (num_returned = 0; num_returned < num_to_return; num_returned++) {
+		page = balloon_page_pop(&vb->free_page_list);
+		if (!page)
+			break;
+		free_pages((unsigned long)page_address(page),
+			   VIRTIO_BALLOON_FREE_PAGE_ORDER);
+	}
+	vb->num_free_page_blocks -= num_returned;
+	spin_unlock_irq(&vb->free_page_list_lock);
+
+	return num_returned;
+}
+
+static void virtballoon_changed(struct virtio_device *vdev)
+{
+	struct virtio_balloon *vb = vdev->priv;
+	unsigned long flags;
+	s64 diff = towards_target(vb);
+
+	if (diff) {
+		spin_lock_irqsave(&vb->stop_update_lock, flags);
+		if (!vb->stop_update)
+			queue_work(system_freezable_wq,
+				   &vb->update_balloon_size_work);
+		spin_unlock_irqrestore(&vb->stop_update_lock, flags);
+	}
+
+	if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
+		virtio_cread(vdev, struct virtio_balloon_config,
+			     free_page_report_cmd_id, &vb->cmd_id_received);
+		if (vb->cmd_id_received == VIRTIO_BALLOON_CMD_ID_DONE) {
+			/* Pass ULONG_MAX to give back all the free pages */
+			return_free_pages_to_mm(vb, ULONG_MAX);
+		} else if (vb->cmd_id_received != VIRTIO_BALLOON_CMD_ID_STOP &&
+			   vb->cmd_id_received !=
+			   virtio32_to_cpu(vdev, vb->cmd_id_active)) {
+			spin_lock_irqsave(&vb->stop_update_lock, flags);
+			if (!vb->stop_update) {
+				queue_work(vb->balloon_wq,
+					   &vb->report_free_page_work);
+			}
+			spin_unlock_irqrestore(&vb->stop_update_lock, flags);
+		}
+	}
+}
+
 static void update_balloon_size(struct virtio_balloon *vb)
 {
 	u32 actual = vb->num_pages;
@@ -389,26 +465,44 @@ static void update_balloon_size_func(struct work_struct *work)
 
 static int init_vqs(struct virtio_balloon *vb)
 {
-	struct virtqueue *vqs[3];
-	vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request };
-	static const char * const names[] = { "inflate", "deflate", "stats" };
-	int err, nvqs;
+	struct virtqueue *vqs[VIRTIO_BALLOON_VQ_MAX];
+	vq_callback_t *callbacks[VIRTIO_BALLOON_VQ_MAX];
+	const char *names[VIRTIO_BALLOON_VQ_MAX];
+	int err;
 
 	/*
-	 * We expect two virtqueues: inflate and deflate, and
-	 * optionally stat.
+	 * Inflateq and deflateq are used unconditionally. The names[]
+	 * will be NULL if the related feature is not enabled, which will
+	 * cause no allocation for the corresponding virtqueue in find_vqs.
 	 */
-	nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2;
-	err = virtio_find_vqs(vb->vdev, nvqs, vqs, callbacks, names, NULL);
+	callbacks[VIRTIO_BALLOON_VQ_INFLATE] = balloon_ack;
+	names[VIRTIO_BALLOON_VQ_INFLATE] = "inflate";
+	callbacks[VIRTIO_BALLOON_VQ_DEFLATE] = balloon_ack;
+	names[VIRTIO_BALLOON_VQ_DEFLATE] = "deflate";
+	names[VIRTIO_BALLOON_VQ_STATS] = NULL;
+	names[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
+
+	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
+		names[VIRTIO_BALLOON_VQ_STATS] = "stats";
+		callbacks[VIRTIO_BALLOON_VQ_STATS] = stats_request;
+	}
+
+	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
+		names[VIRTIO_BALLOON_VQ_FREE_PAGE] = "free_page_vq";
+		callbacks[VIRTIO_BALLOON_VQ_FREE_PAGE] = NULL;
+	}
+
+	err = vb->vdev->config->find_vqs(vb->vdev, VIRTIO_BALLOON_VQ_MAX,
+					 vqs, callbacks, names, NULL, NULL);
 	if (err)
 		return err;
 
-	vb->inflate_vq = vqs[0];
-	vb->deflate_vq = vqs[1];
+	vb->inflate_vq = vqs[VIRTIO_BALLOON_VQ_INFLATE];
+	vb->deflate_vq = vqs[VIRTIO_BALLOON_VQ_DEFLATE];
 	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
 		struct scatterlist sg;
 		unsigned int num_stats;
-		vb->stats_vq = vqs[2];
+		vb->stats_vq = vqs[VIRTIO_BALLOON_VQ_STATS];
 
 		/*
 		 * Prime this virtqueue with one buffer so the hypervisor can
@@ -426,9 +520,145 @@ static int init_vqs(struct virtio_balloon *vb)
 		}
 		virtqueue_kick(vb->stats_vq);
 	}
+
+	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
+		vb->free_page_vq = vqs[VIRTIO_BALLOON_VQ_FREE_PAGE];
+
+	return 0;
+}
+
+static int send_cmd_id_start(struct virtio_balloon *vb)
+{
+	struct scatterlist sg;
+	struct virtqueue *vq = vb->free_page_vq;
+	int err, unused;
+
+	/* Detach all the used buffers from the vq */
+	while (virtqueue_get_buf(vq, &unused))
+		;
+
+	vb->cmd_id_active = cpu_to_virtio32(vb->vdev, vb->cmd_id_received);
+	sg_init_one(&sg, &vb->cmd_id_active, sizeof(vb->cmd_id_active));
+	err = virtqueue_add_outbuf(vq, &sg, 1, &vb->cmd_id_active, GFP_KERNEL);
+	if (!err)
+		virtqueue_kick(vq);
+	return err;
+}
+
+static int send_cmd_id_stop(struct virtio_balloon *vb)
+{
+	struct scatterlist sg;
+	struct virtqueue *vq = vb->free_page_vq;
+	int err, unused;
+
+	/* Detach all the used buffers from the vq */
+	while (virtqueue_get_buf(vq, &unused))
+		;
+
+	sg_init_one(&sg, &vb->cmd_id_stop, sizeof(vb->cmd_id_stop));
+	err = virtqueue_add_outbuf(vq, &sg, 1, &vb->cmd_id_stop, GFP_KERNEL);
+	if (!err)
+		virtqueue_kick(vq);
+	return err;
+}
+
+static int get_free_page_and_send(struct virtio_balloon *vb)
+{
+	struct virtqueue *vq = vb->free_page_vq;
+	struct page *page;
+	struct scatterlist sg;
+	int err, unused;
+	void *p;
+
+	/* Detach all the used buffers from the vq */
+	while (virtqueue_get_buf(vq, &unused))
+		;
+
+	page = alloc_pages(VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG,
+			   VIRTIO_BALLOON_FREE_PAGE_ORDER);
+	/*
+	 * When the allocation returns NULL, it indicates that we have got all
+	 * the possible free pages, so return -EINTR to stop.
+	 */
+	if (!page)
+		return -EINTR;
+
+	p = page_address(page);
+	sg_init_one(&sg, p, VIRTIO_BALLOON_FREE_PAGE_SIZE);
+	/* There is always 1 entry reserved for the cmd id to use. */
+	if (vq->num_free > 1) {
+		err = virtqueue_add_inbuf(vq, &sg, 1, p, GFP_KERNEL);
+		if (unlikely(err)) {
+			free_pages((unsigned long)p,
+				   VIRTIO_BALLOON_FREE_PAGE_ORDER);
+			return err;
+		}
+		virtqueue_kick(vq);
+		spin_lock_irq(&vb->free_page_list_lock);
+		balloon_page_push(&vb->free_page_list, page);
+		vb->num_free_page_blocks++;
+		spin_unlock_irq(&vb->free_page_list_lock);
+	} else {
+		/*
+		 * The vq has no available entry to add this page block, so
+		 * just free it.
+		 */
+		free_pages((unsigned long)p, VIRTIO_BALLOON_FREE_PAGE_ORDER);
+	}
+
 	return 0;
 }
 
+static int send_free_pages(struct virtio_balloon *vb)
+{
+	int err;
+	u32 cmd_id_active;
+
+	while (1) {
+		/*
+		 * If a stop id or a new cmd id was just received from host,
+		 * stop the reporting.
+		 */
+		cmd_id_active = virtio32_to_cpu(vb->vdev, vb->cmd_id_active);
+		if (cmd_id_active != vb->cmd_id_received)
+			break;
+
+		/*
+		 * The free page blocks are allocated and sent to host one by
+		 * one.
+		 */
+		err = get_free_page_and_send(vb);
+		if (err == -EINTR)
+			break;
+		else if (unlikely(err))
+			return err;
+	}
+
+	return 0;
+}
+
+static void report_free_page_func(struct work_struct *work)
+{
+	int err;
+	struct virtio_balloon *vb = container_of(work, struct virtio_balloon,
+						 report_free_page_work);
+	struct device *dev = &vb->vdev->dev;
+
+	/* Start by sending the received cmd id to host with an outbuf. */
+	err = send_cmd_id_start(vb);
+	if (unlikely(err))
+		dev_err(dev, "Failed to send a start id, err = %d\n", err);
+
+	err = send_free_pages(vb);
+	if (unlikely(err))
+		dev_err(dev, "Failed to send a free page, err = %d\n", err);
+
+	/* End by sending a stop id to host with an outbuf. */
+	err = send_cmd_id_stop(vb);
+	if (unlikely(err))
+		dev_err(dev, "Failed to send a stop id, err = %d\n", err);
+}
+
 #ifdef CONFIG_BALLOON_COMPACTION
 /*
  * virtballoon_migratepage - perform the balloon page migration on behalf of
@@ -512,14 +742,23 @@ static struct file_system_type balloon_fs = {
 
 #endif /* CONFIG_BALLOON_COMPACTION */
 
-static unsigned long virtio_balloon_shrinker_scan(struct shrinker *shrinker,
-						  struct shrink_control *sc)
+static unsigned long shrink_free_pages(struct virtio_balloon *vb,
+				       unsigned long pages_to_free)
 {
-	unsigned long pages_to_free, pages_freed = 0;
-	struct virtio_balloon *vb = container_of(shrinker,
-					struct virtio_balloon, shrinker);
+	unsigned long blocks_to_free, blocks_freed;
 
-	pages_to_free = sc->nr_to_scan * VIRTIO_BALLOON_PAGES_PER_PAGE;
+	pages_to_free = round_up(pages_to_free,
+				 1 << VIRTIO_BALLOON_FREE_PAGE_ORDER);
+	blocks_to_free = pages_to_free >> VIRTIO_BALLOON_FREE_PAGE_ORDER;
+	blocks_freed = return_free_pages_to_mm(vb, blocks_to_free);
+
+	return blocks_freed << VIRTIO_BALLOON_FREE_PAGE_ORDER;
+}
+
+static unsigned long shrink_balloon_pages(struct virtio_balloon *vb,
+					  unsigned long pages_to_free)
+{
+	unsigned long pages_freed = 0;
 
 	/*
 	 * One invocation of leak_balloon can deflate at most
@@ -527,12 +766,33 @@ static unsigned long virtio_balloon_shrinker_scan(struct shrinker *shrinker,
 	 * multiple times to deflate pages till reaching pages_to_free.
 	 */
 	while (vb->num_pages && pages_to_free) {
+		pages_freed += leak_balloon(vb, pages_to_free) /
+					VIRTIO_BALLOON_PAGES_PER_PAGE;
 		pages_to_free -= pages_freed;
-		pages_freed += leak_balloon(vb, pages_to_free);
 	}
 	update_balloon_size(vb);
 
-	return pages_freed / VIRTIO_BALLOON_PAGES_PER_PAGE;
+	return pages_freed;
+}
+
+static unsigned long virtio_balloon_shrinker_scan(struct shrinker *shrinker,
+						  struct shrink_control *sc)
+{
+	unsigned long pages_to_free, pages_freed = 0;
+	struct virtio_balloon *vb = container_of(shrinker,
+					struct virtio_balloon, shrinker);
+
+	pages_to_free = sc->nr_to_scan * VIRTIO_BALLOON_PAGES_PER_PAGE;
+
+	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
+		pages_freed = shrink_free_pages(vb, pages_to_free);
+
+	if (pages_freed >= pages_to_free)
+		return pages_freed;
+
+	pages_freed += shrink_balloon_pages(vb, pages_to_free - pages_freed);
+
+	return pages_freed;
 }
 
 static unsigned long virtio_balloon_shrinker_count(struct shrinker *shrinker,
@@ -540,8 +800,12 @@ static unsigned long virtio_balloon_shrinker_count(struct shrinker *shrinker,
 {
 	struct virtio_balloon *vb = container_of(shrinker,
 					struct virtio_balloon, shrinker);
+	unsigned long count;
 
-	return vb->num_pages / VIRTIO_BALLOON_PAGES_PER_PAGE;
+	count = vb->num_pages / VIRTIO_BALLOON_PAGES_PER_PAGE;
+	count += vb->num_free_page_blocks >> VIRTIO_BALLOON_FREE_PAGE_ORDER;
+
+	return count;
 }
 
 static void virtio_balloon_unregister_shrinker(struct virtio_balloon *vb)
@@ -604,6 +868,31 @@ static int virtballoon_probe(struct virtio_device *vdev)
 	}
 	vb->vb_dev_info.inode->i_mapping->a_ops = &balloon_aops;
 #endif
+	if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
+		/*
+		 * There is always one entry reserved for cmd id, so the ring
+		 * size needs to be at least two to report free page hints.
+		 */
+		if (virtqueue_get_vring_size(vb->free_page_vq) < 2) {
+			err = -ENOSPC;
+			goto out_del_vqs;
+		}
+		vb->balloon_wq = alloc_workqueue("balloon-wq",
+					WQ_FREEZABLE | WQ_CPU_INTENSIVE, 0);
+		if (!vb->balloon_wq) {
+			err = -ENOMEM;
+			goto out_del_vqs;
+		}
+		INIT_WORK(&vb->report_free_page_work, report_free_page_func);
+		vb->cmd_id_received = VIRTIO_BALLOON_CMD_ID_STOP;
+		vb->cmd_id_active = cpu_to_virtio32(vb->vdev,
+						  VIRTIO_BALLOON_CMD_ID_STOP);
+		vb->cmd_id_stop = cpu_to_virtio32(vb->vdev,
+						  VIRTIO_BALLOON_CMD_ID_STOP);
+		vb->num_free_page_blocks = 0;
+		spin_lock_init(&vb->free_page_list_lock);
+		INIT_LIST_HEAD(&vb->free_page_list);
+	}
 	/*
 	 * We continue to use VIRTIO_BALLOON_F_DEFLATE_ON_OOM to decide if a
 	 * shrinker needs to be registered to relieve memory pressure.
@@ -611,7 +900,7 @@ static int virtballoon_probe(struct virtio_device *vdev)
 	if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) {
 		err = virtio_balloon_register_shrinker(vb);
 		if (err)
-			goto out_del_vqs;
+			goto out_del_balloon_wq;
 	}
 	virtio_device_ready(vdev);
 
@@ -619,6 +908,9 @@ static int virtballoon_probe(struct virtio_device *vdev)
 		virtballoon_changed(vdev);
 	return 0;
 
+out_del_balloon_wq:
+	if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
+		destroy_workqueue(vb->balloon_wq);
 out_del_vqs:
 	vdev->config->del_vqs(vdev);
 out_free_vb:
@@ -652,6 +944,11 @@ static void virtballoon_remove(struct virtio_device *vdev)
 	cancel_work_sync(&vb->update_balloon_size_work);
 	cancel_work_sync(&vb->update_balloon_stats_work);
 
+	if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT)) {
+		cancel_work_sync(&vb->report_free_page_work);
+		destroy_workqueue(vb->balloon_wq);
+	}
+
 	remove_common(vb);
 #ifdef CONFIG_BALLOON_COMPACTION
 	if (vb->vb_dev_info.inode)
@@ -703,6 +1000,7 @@ static unsigned int features[] = {
 	VIRTIO_BALLOON_F_MUST_TELL_HOST,
 	VIRTIO_BALLOON_F_STATS_VQ,
 	VIRTIO_BALLOON_F_DEFLATE_ON_OOM,
+	VIRTIO_BALLOON_F_FREE_PAGE_HINT,
 };
 
 static struct virtio_driver virtio_balloon_driver = {
diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
index 13b8cb563892b..47c9eb401c083 100644
--- a/include/uapi/linux/virtio_balloon.h
+++ b/include/uapi/linux/virtio_balloon.h
@@ -34,15 +34,20 @@
 #define VIRTIO_BALLOON_F_MUST_TELL_HOST	0 /* Tell before reclaiming pages */
 #define VIRTIO_BALLOON_F_STATS_VQ	1 /* Memory Stats virtqueue */
 #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM	2 /* Deflate balloon on OOM */
+#define VIRTIO_BALLOON_F_FREE_PAGE_HINT	3 /* VQ to report free pages */
 
 /* Size of a PFN in the balloon interface. */
 #define VIRTIO_BALLOON_PFN_SHIFT 12
 
+#define VIRTIO_BALLOON_CMD_ID_STOP	0
+#define VIRTIO_BALLOON_CMD_ID_DONE	1
 struct virtio_balloon_config {
 	/* Number of pages host wants Guest to give up. */
 	__u32 num_pages;
 	/* Number of pages we've actually got in balloon. */
 	__u32 actual;
+	/* Free page report command id, readonly by guest */
+	__u32 free_page_report_cmd_id;
 };
 
 #define VIRTIO_BALLOON_S_SWAP_IN  0   /* Amount of memory swapped in */
-- 
GitLab


From d95f58f4a6ca002126c36c530fb096519c94baef Mon Sep 17 00:00:00 2001
From: Wei Wang <wei.w.wang@intel.com>
Date: Mon, 27 Aug 2018 09:32:18 +0800
Subject: [PATCH 0191/1890] mm/page_poison: expose page_poisoning_enabled to
 kernel modules

In some usages, e.g. virtio-balloon, a kernel module needs to know if
page poisoning is in use. This patch exposes the page_poisoning_enabled
function to kernel modules.

Signed-off-by: Wei Wang <wei.w.wang@intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 mm/page_poison.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/mm/page_poison.c b/mm/page_poison.c
index aa2b3d34e8eaa..830f60489b14b 100644
--- a/mm/page_poison.c
+++ b/mm/page_poison.c
@@ -17,6 +17,11 @@ static int __init early_page_poison_param(char *buf)
 }
 early_param("page_poison", early_page_poison_param);
 
+/**
+ * page_poisoning_enabled - check if page poisoning is enabled
+ *
+ * Return true if page poisoning is enabled, or false if not.
+ */
 bool page_poisoning_enabled(void)
 {
 	/*
@@ -29,6 +34,7 @@ bool page_poisoning_enabled(void)
 		(!IS_ENABLED(CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC) &&
 		debug_pagealloc_enabled()));
 }
+EXPORT_SYMBOL_GPL(page_poisoning_enabled);
 
 static void poison_page(struct page *page)
 {
-- 
GitLab


From 2e991629bcf55a43681aec1ee096eeb03cf81709 Mon Sep 17 00:00:00 2001
From: Wei Wang <wei.w.wang@intel.com>
Date: Mon, 27 Aug 2018 09:32:19 +0800
Subject: [PATCH 0192/1890] virtio-balloon: VIRTIO_BALLOON_F_PAGE_POISON

The VIRTIO_BALLOON_F_PAGE_POISON feature bit is used to indicate if the
guest is using page poisoning. Guest writes to the poison_val config
field to tell host about the page poisoning value that is in use.

Suggested-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Wei Wang <wei.w.wang@intel.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/virtio/virtio_balloon.c     | 10 ++++++++++
 include/uapi/linux/virtio_balloon.h |  3 +++
 2 files changed, 13 insertions(+)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index a18567889289d..728ecd1eea305 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -825,6 +825,7 @@ static int virtio_balloon_register_shrinker(struct virtio_balloon *vb)
 static int virtballoon_probe(struct virtio_device *vdev)
 {
 	struct virtio_balloon *vb;
+	__u32 poison_val;
 	int err;
 
 	if (!vdev->config->get) {
@@ -892,6 +893,11 @@ static int virtballoon_probe(struct virtio_device *vdev)
 		vb->num_free_page_blocks = 0;
 		spin_lock_init(&vb->free_page_list_lock);
 		INIT_LIST_HEAD(&vb->free_page_list);
+		if (virtio_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON)) {
+			memset(&poison_val, PAGE_POISON, sizeof(poison_val));
+			virtio_cwrite(vb->vdev, struct virtio_balloon_config,
+				      poison_val, &poison_val);
+		}
 	}
 	/*
 	 * We continue to use VIRTIO_BALLOON_F_DEFLATE_ON_OOM to decide if a
@@ -992,6 +998,9 @@ static int virtballoon_restore(struct virtio_device *vdev)
 
 static int virtballoon_validate(struct virtio_device *vdev)
 {
+	if (!page_poisoning_enabled())
+		__virtio_clear_bit(vdev, VIRTIO_BALLOON_F_PAGE_POISON);
+
 	__virtio_clear_bit(vdev, VIRTIO_F_IOMMU_PLATFORM);
 	return 0;
 }
@@ -1001,6 +1010,7 @@ static unsigned int features[] = {
 	VIRTIO_BALLOON_F_STATS_VQ,
 	VIRTIO_BALLOON_F_DEFLATE_ON_OOM,
 	VIRTIO_BALLOON_F_FREE_PAGE_HINT,
+	VIRTIO_BALLOON_F_PAGE_POISON,
 };
 
 static struct virtio_driver virtio_balloon_driver = {
diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
index 47c9eb401c083..a1966cd7b6774 100644
--- a/include/uapi/linux/virtio_balloon.h
+++ b/include/uapi/linux/virtio_balloon.h
@@ -35,6 +35,7 @@
 #define VIRTIO_BALLOON_F_STATS_VQ	1 /* Memory Stats virtqueue */
 #define VIRTIO_BALLOON_F_DEFLATE_ON_OOM	2 /* Deflate balloon on OOM */
 #define VIRTIO_BALLOON_F_FREE_PAGE_HINT	3 /* VQ to report free pages */
+#define VIRTIO_BALLOON_F_PAGE_POISON	4 /* Guest is using page poisoning */
 
 /* Size of a PFN in the balloon interface. */
 #define VIRTIO_BALLOON_PFN_SHIFT 12
@@ -48,6 +49,8 @@ struct virtio_balloon_config {
 	__u32 actual;
 	/* Free page report command id, readonly by guest */
 	__u32 free_page_report_cmd_id;
+	/* Stores PAGE_POISON if page poisoning is in use */
+	__u32 poison_val;
 };
 
 #define VIRTIO_BALLOON_S_SWAP_IN  0   /* Amount of memory swapped in */
-- 
GitLab


From 355c8db13be409695956c666e839f654a99cfc2d Mon Sep 17 00:00:00 2001
From: Evan Quan <evan.quan@amd.com>
Date: Mon, 22 Oct 2018 14:35:40 +0800
Subject: [PATCH 0193/1890] drm/amd/powerplay: commit get_performance_level API
 as DAL needed

This can suppress the error reported on driver loading. Also these
are empty APIs as Vega12/Vega20 has no performance levels.

Signed-off-by: Evan Quan <evan.quan@amd.com>
Reviewed-by: Feifei Xu <Feifei.Xu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c | 8 ++++++++
 drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 9 +++++++++
 2 files changed, 17 insertions(+)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
index 9600e2f226e98..74bc37308dc09 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
@@ -2356,6 +2356,13 @@ static int vega12_gfx_off_control(struct pp_hwmgr *hwmgr, bool enable)
 		return vega12_disable_gfx_off(hwmgr);
 }
 
+static int vega12_get_performance_level(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *state,
+				PHM_PerformanceLevelDesignation designation, uint32_t index,
+				PHM_PerformanceLevel *level)
+{
+	return 0;
+}
+
 static const struct pp_hwmgr_func vega12_hwmgr_funcs = {
 	.backend_init = vega12_hwmgr_backend_init,
 	.backend_fini = vega12_hwmgr_backend_fini,
@@ -2406,6 +2413,7 @@ static const struct pp_hwmgr_func vega12_hwmgr_funcs = {
 	.register_irq_handlers = smu9_register_irq_handlers,
 	.start_thermal_controller = vega12_start_thermal_controller,
 	.powergate_gfx = vega12_gfx_off_control,
+	.get_performance_level = vega12_get_performance_level,
 };
 
 int vega12_hwmgr_init(struct pp_hwmgr *hwmgr)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
index 6e0b2b8df4551..4c9a1a9ef04b8 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
@@ -2041,6 +2041,13 @@ int vega20_display_clock_voltage_request(struct pp_hwmgr *hwmgr,
 	return result;
 }
 
+static int vega20_get_performance_level(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *state,
+				PHM_PerformanceLevelDesignation designation, uint32_t index,
+				PHM_PerformanceLevel *level)
+{
+	return 0;
+}
+
 static int vega20_notify_smc_display_config_after_ps_adjustment(
 		struct pp_hwmgr *hwmgr)
 {
@@ -3487,6 +3494,8 @@ static const struct pp_hwmgr_func vega20_hwmgr_funcs = {
 		vega20_set_watermarks_for_clocks_ranges,
 	.display_clock_voltage_request =
 		vega20_display_clock_voltage_request,
+	.get_performance_level =
+		vega20_get_performance_level,
 	/* UMD pstate, profile related */
 	.force_dpm_level =
 		vega20_dpm_force_dpm_level,
-- 
GitLab


From 6f059c641b31076248ba89d0f7e0e753946a8099 Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Fri, 19 Oct 2018 10:38:10 +0800
Subject: [PATCH 0194/1890] drm/amd/display: Fix Null point error if smu ip was
 disabled

from AI, SMU Ip is not indispensable to driver and can be
disabled by user via module parameter ip_block_mask.
so the pp_handle may be NULL.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
index 0fab64a2a9150..12001a006b2d8 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c
@@ -101,7 +101,7 @@ bool dm_pp_apply_display_requirements(
 			adev->pm.pm_display_cfg.displays[i].controller_id = dc_cfg->pipe_idx + 1;
 		}
 
-		if (adev->powerplay.pp_funcs->display_configuration_change)
+		if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->display_configuration_change)
 			adev->powerplay.pp_funcs->display_configuration_change(
 				adev->powerplay.pp_handle,
 				&adev->pm.pm_display_cfg);
@@ -304,7 +304,7 @@ bool dm_pp_get_clock_levels_by_type(
 	struct amd_pp_simple_clock_info validation_clks = { 0 };
 	uint32_t i;
 
-	if (adev->powerplay.pp_funcs->get_clock_by_type) {
+	if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_clock_by_type) {
 		if (adev->powerplay.pp_funcs->get_clock_by_type(pp_handle,
 			dc_to_pp_clock_type(clk_type), &pp_clks)) {
 		/* Error in pplib. Provide default values. */
@@ -315,7 +315,7 @@ bool dm_pp_get_clock_levels_by_type(
 
 	pp_to_dc_clock_levels(&pp_clks, dc_clks, clk_type);
 
-	if (adev->powerplay.pp_funcs->get_display_mode_validation_clocks) {
+	if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_display_mode_validation_clocks) {
 		if (adev->powerplay.pp_funcs->get_display_mode_validation_clocks(
 						pp_handle, &validation_clks)) {
 			/* Error in pplib. Provide default values. */
@@ -398,6 +398,9 @@ bool dm_pp_get_clock_levels_by_type_with_voltage(
 	struct pp_clock_levels_with_voltage pp_clk_info = {0};
 	const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
 
+	if (!pp_funcs || !pp_funcs->get_clock_by_type_with_voltage)
+		return false;
+
 	if (pp_funcs->get_clock_by_type_with_voltage(pp_handle,
 						     dc_to_pp_clock_type(clk_type),
 						     &pp_clk_info))
@@ -438,7 +441,7 @@ bool dm_pp_apply_clock_for_voltage_request(
 	if (!pp_clock_request.clock_type)
 		return false;
 
-	if (adev->powerplay.pp_funcs->display_clock_voltage_request)
+	if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->display_clock_voltage_request)
 		ret = adev->powerplay.pp_funcs->display_clock_voltage_request(
 			adev->powerplay.pp_handle,
 			&pp_clock_request);
@@ -455,7 +458,7 @@ bool dm_pp_get_static_clocks(
 	struct amd_pp_clock_info pp_clk_info = {0};
 	int ret = 0;
 
-	if (adev->powerplay.pp_funcs->get_current_clocks)
+	if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_current_clocks)
 		ret = adev->powerplay.pp_funcs->get_current_clocks(
 			adev->powerplay.pp_handle,
 			&pp_clk_info);
@@ -505,6 +508,9 @@ void pp_rv_set_wm_ranges(struct pp_smu *pp,
 	wm_with_clock_ranges.num_wm_dmif_sets = ranges->num_reader_wm_sets;
 	wm_with_clock_ranges.num_wm_mcif_sets = ranges->num_writer_wm_sets;
 
+	if (!pp_funcs || !pp_funcs->set_watermarks_for_clocks_ranges)
+		return;
+
 	for (i = 0; i < wm_with_clock_ranges.num_wm_dmif_sets; i++) {
 		if (ranges->reader_wm_sets[i].wm_inst > 3)
 			wm_dce_clocks[i].wm_set_id = WM_SET_A;
-- 
GitLab


From 7179d24040d66de22e4710e943256be9e1045feb Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Fri, 19 Oct 2018 10:46:53 +0800
Subject: [PATCH 0195/1890] drm/amdgpu: Fix null point error

need to check adev->powerplay.pp_funcs first, becasue from
AI, the smu ip can be disabled by user, and the pp_handle
is null in this case.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c | 6 ++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c  | 6 ++++--
 4 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
index 297a5490ad8c0..0a4fba196b843 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
@@ -135,7 +135,8 @@ static int acp_poweroff(struct generic_pm_domain *genpd)
 	 * 2. power off the acp tiles
 	 * 3. check and enter ulv state
 	 */
-		if (adev->powerplay.pp_funcs->set_powergating_by_smu)
+		if (adev->powerplay.pp_funcs &&
+			adev->powerplay.pp_funcs->set_powergating_by_smu)
 			amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true);
 	}
 	return 0;
@@ -517,7 +518,8 @@ static int acp_set_powergating_state(void *handle,
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	bool enable = state == AMD_PG_STATE_GATE ? true : false;
 
-	if (adev->powerplay.pp_funcs->set_powergating_by_smu)
+	if (adev->powerplay.pp_funcs &&
+		adev->powerplay.pp_funcs->set_powergating_by_smu)
 		amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, enable);
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 790fd5408ddff..1a656b8657f73 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -392,7 +392,7 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
 	if (!(adev->powerplay.pp_feature & PP_GFXOFF_MASK))
 		return;
 
-	if (!adev->powerplay.pp_funcs->set_powergating_by_smu)
+	if (!adev->powerplay.pp_funcs || !adev->powerplay.pp_funcs->set_powergating_by_smu)
 		return;
 
 
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index 14649f8475f3f..fd23ba1226a57 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -280,7 +280,7 @@ void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev,
 		return;
 
 	if (enable && adev->pg_flags & AMD_PG_SUPPORT_MMHUB) {
-		if (adev->powerplay.pp_funcs->set_powergating_by_smu)
+		if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_powergating_by_smu)
 			amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GMC, true);
 
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 04fa3d972636b..7a8c9172d30a9 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1366,7 +1366,8 @@ static int sdma_v4_0_hw_init(void *handle)
 	int r;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	if (adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs->set_powergating_by_smu)
+	if (adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs &&
+			adev->powerplay.pp_funcs->set_powergating_by_smu)
 		amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false);
 
 	sdma_v4_0_init_golden_registers(adev);
@@ -1386,7 +1387,8 @@ static int sdma_v4_0_hw_fini(void *handle)
 	sdma_v4_0_ctx_switch_enable(adev, false);
 	sdma_v4_0_enable(adev, false);
 
-	if (adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs->set_powergating_by_smu)
+	if (adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs
+			&& adev->powerplay.pp_funcs->set_powergating_by_smu)
 		amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, true);
 
 	return 0;
-- 
GitLab


From 1a0e348e5693b7acde02e50319b3237657b7981a Mon Sep 17 00:00:00 2001
From: David Francis <David.Francis@amd.com>
Date: Tue, 25 Sep 2018 11:23:31 -0400
Subject: [PATCH 0196/1890] drm/amd/display: Disable 4k 60 HDMI on DCE11

[Why]
Carrizo and Stoney have severe corruption when trying to power
4k 60 monitors over HDMI connectors that support 4k 60.

Carrizo and Stoney require retimers and redrivers to support 4k 60
over HDMI.  This driver does not currently support these.  Thus, 4k 60
HDMI (and all other modes requiring over 300MHz) should be disabled.

[How]
Reduce the dce11 HDMI pixel clock cap to 300000kHz.

Signed-off-by: David Francis <David.Francis@amd.com>
Reviewed-by: Roman Li <Roman.Li@amd.com>
Acked-by: Bhawanpreet Lakha <Bhawanpreet.Lakha@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
index de190935f0a45..e3624ca245748 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
@@ -568,7 +568,7 @@ static struct input_pixel_processor *dce110_ipp_create(
 
 static const struct encoder_feature_support link_enc_feature = {
 		.max_hdmi_deep_color = COLOR_DEPTH_121212,
-		.max_hdmi_pixel_clock = 594000,
+		.max_hdmi_pixel_clock = 300000,
 		.flags.bits.IS_HBR2_CAPABLE = true,
 		.flags.bits.IS_TPS3_CAPABLE = true
 };
-- 
GitLab


From 4542d623c7134bc1738f8a68ccb6dd546f1c264f Mon Sep 17 00:00:00 2001
From: Greg Edwards <gedwards@ddn.com>
Date: Wed, 22 Aug 2018 13:21:53 -0600
Subject: [PATCH 0197/1890] vhost/scsi: truncate T10 PI iov_iter to prot_bytes

Commands with protection information included were not truncating the
protection iov_iter to the number of protection bytes in the command.
This resulted in vhost_scsi mis-calculating the size of the protection
SGL in vhost_scsi_calc_sgls(), and including both the protection and
data SG entries in the protection SGL.

Fixes: 09b13fa8c1a1 ("vhost/scsi: Add ANY_LAYOUT support in vhost_scsi_handle_vq")
Signed-off-by: Greg Edwards <gedwards@ddn.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Fixes: 09b13fa8c1a1093e9458549ac8bb203a7c65c62a
Cc: stable@vger.kernel.org
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
---
 drivers/vhost/scsi.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index c24bb690680b4..e7e3ae13516d7 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -964,7 +964,8 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
 				prot_bytes = vhost32_to_cpu(vq, v_req_pi.pi_bytesin);
 			}
 			/*
-			 * Set prot_iter to data_iter, and advance past any
+			 * Set prot_iter to data_iter and truncate it to
+			 * prot_bytes, and advance data_iter past any
 			 * preceeding prot_bytes that may be present.
 			 *
 			 * Also fix up the exp_data_len to reflect only the
@@ -973,6 +974,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
 			if (prot_bytes) {
 				exp_data_len -= prot_bytes;
 				prot_iter = data_iter;
+				iov_iter_truncate(&prot_iter, prot_bytes);
 				iov_iter_advance(&data_iter, prot_bytes);
 			}
 			tag = vhost64_to_cpu(vq, v_req_pi.tag);
-- 
GitLab


From 0d02dbd68c47b66367130b696baef7246720791c Mon Sep 17 00:00:00 2001
From: Bijan Mottahedeh <bijan.mottahedeh@oracle.com>
Date: Mon, 17 Sep 2018 17:09:47 -0700
Subject: [PATCH 0198/1890] vhost/scsi: Respond to control queue operations

The vhost-scsi driver currently does not handle any control queue
operations. In particular, vhost_scsi_ctl_handle_kick, merely prints out
a debug message but does nothing else. This can cause guest VMs to hang.

As part of SCSI recovery from an error, e.g., an I/O timeout, the SCSI
midlayer attempts to abort the failed operation. The SCSI virtio driver
translates the abort to a SCSI TMF request that gets put on the control
queue (virtscsi_abort -> virtscsi_tmf). The SCSI virtio driver then
waits indefinitely for this request to be completed, but it never will
because vhost-scsi never responds to that request.

To avoid a hang, always respond to control queue operations; explicitly
reject TMF requests, and return a no-op response to event requests.

Signed-off-by: Bijan Mottahedeh <bijan.mottahedeh@oracle.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/vhost/scsi.c | 190 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 190 insertions(+)

diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index e7e3ae13516d7..1c33d6e391525 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -1050,9 +1050,199 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
 	mutex_unlock(&vq->mutex);
 }
 
+static void
+vhost_scsi_send_tmf_resp(struct vhost_scsi *vs,
+			   struct vhost_virtqueue *vq,
+			   int head, unsigned int out)
+{
+	struct virtio_scsi_ctrl_tmf_resp __user *resp;
+	struct virtio_scsi_ctrl_tmf_resp rsp;
+	int ret;
+
+	pr_debug("%s\n", __func__);
+	memset(&rsp, 0, sizeof(rsp));
+	rsp.response = VIRTIO_SCSI_S_FUNCTION_REJECTED;
+	resp = vq->iov[out].iov_base;
+	ret = __copy_to_user(resp, &rsp, sizeof(rsp));
+	if (!ret)
+		vhost_add_used_and_signal(&vs->dev, vq, head, 0);
+	else
+		pr_err("Faulted on virtio_scsi_ctrl_tmf_resp\n");
+}
+
+static void
+vhost_scsi_send_an_resp(struct vhost_scsi *vs,
+			   struct vhost_virtqueue *vq,
+			   int head, unsigned int out)
+{
+	struct virtio_scsi_ctrl_an_resp __user *resp;
+	struct virtio_scsi_ctrl_an_resp rsp;
+	int ret;
+
+	pr_debug("%s\n", __func__);
+	memset(&rsp, 0, sizeof(rsp));	/* event_actual = 0 */
+	rsp.response = VIRTIO_SCSI_S_OK;
+	resp = vq->iov[out].iov_base;
+	ret = __copy_to_user(resp, &rsp, sizeof(rsp));
+	if (!ret)
+		vhost_add_used_and_signal(&vs->dev, vq, head, 0);
+	else
+		pr_err("Faulted on virtio_scsi_ctrl_an_resp\n");
+}
+
+static void
+vhost_scsi_ctl_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
+{
+	union {
+		__virtio32 type;
+		struct virtio_scsi_ctrl_an_req an;
+		struct virtio_scsi_ctrl_tmf_req tmf;
+	} v_req;
+	struct iov_iter out_iter;
+	unsigned int out = 0, in = 0;
+	int head;
+	size_t req_size, rsp_size, typ_size;
+	size_t out_size, in_size;
+	u8 *lunp;
+	void *req;
+
+	mutex_lock(&vq->mutex);
+	/*
+	 * We can handle the vq only after the endpoint is setup by calling the
+	 * VHOST_SCSI_SET_ENDPOINT ioctl.
+	 */
+	if (!vq->private_data)
+		goto out;
+
+	vhost_disable_notify(&vs->dev, vq);
+
+	for (;;) {
+		head = vhost_get_vq_desc(vq, vq->iov,
+					 ARRAY_SIZE(vq->iov), &out, &in,
+					 NULL, NULL);
+		pr_debug("vhost_get_vq_desc: head: %d, out: %u in: %u\n",
+			 head, out, in);
+		/* On error, stop handling until the next kick. */
+		if (unlikely(head < 0))
+			break;
+		/* Nothing new?  Wait for eventfd to tell us they refilled. */
+		if (head == vq->num) {
+			if (unlikely(vhost_enable_notify(&vs->dev, vq))) {
+				vhost_disable_notify(&vs->dev, vq);
+				continue;
+			}
+			break;
+		}
+
+		/*
+		 * Get the size of request and response buffers.
+		 */
+		out_size = iov_length(vq->iov, out);
+		in_size = iov_length(&vq->iov[out], in);
+
+		/*
+		 * Copy over the virtio-scsi request header, which for a
+		 * ANY_LAYOUT enabled guest may span multiple iovecs, or a
+		 * single iovec may contain both the header + outgoing
+		 * WRITE payloads.
+		 *
+		 * copy_from_iter() will advance out_iter, so that it will
+		 * point at the start of the outgoing WRITE payload, if
+		 * DMA_TO_DEVICE is set.
+		 */
+		iov_iter_init(&out_iter, WRITE, vq->iov, out, out_size);
+
+		req = &v_req.type;
+		typ_size = sizeof(v_req.type);
+
+		if (unlikely(!copy_from_iter_full(req, typ_size, &out_iter))) {
+			vq_err(vq, "Faulted on copy_from_iter tmf type\n");
+			/*
+			 * The size of the response buffer varies based on
+			 * the request type and must be validated against it.
+			 * Since the request type is not known, don't send
+			 * a response.
+			 */
+			continue;
+		}
+
+		switch (v_req.type) {
+		case VIRTIO_SCSI_T_TMF:
+			req = &v_req.tmf;
+			lunp = &v_req.tmf.lun[0];
+			req_size = sizeof(struct virtio_scsi_ctrl_tmf_req);
+			rsp_size = sizeof(struct virtio_scsi_ctrl_tmf_resp);
+			break;
+		case VIRTIO_SCSI_T_AN_QUERY:
+		case VIRTIO_SCSI_T_AN_SUBSCRIBE:
+			req = &v_req.an;
+			lunp = &v_req.an.lun[0];
+			req_size = sizeof(struct virtio_scsi_ctrl_an_req);
+			rsp_size = sizeof(struct virtio_scsi_ctrl_an_resp);
+			break;
+		default:
+			vq_err(vq, "Unknown control request %d", v_req.type);
+			continue;
+		}
+
+		/*
+		 * Check for a sane response buffer so we can report early
+		 * errors back to the guest.
+		 */
+		if (unlikely(in_size < rsp_size)) {
+			vq_err(vq,
+			       "Resp buf too small, need min %zu bytes got %zu",
+			       rsp_size, in_size);
+			/*
+			 * Notifications are disabled at this point;
+			 * continue so they can be eventually enabled
+			 * when processing terminates.
+			 */
+			continue;
+		}
+
+		if (unlikely(out_size < req_size)) {
+			vq_err(vq,
+			       "Req buf too small, need min %zu bytes got %zu",
+			       req_size, out_size);
+			vhost_scsi_send_bad_target(vs, vq, head, out);
+			continue;
+		}
+
+		req += typ_size;
+		req_size -= typ_size;
+
+		if (unlikely(!copy_from_iter_full(req, req_size, &out_iter))) {
+			vq_err(vq, "Faulted on copy_from_iter\n");
+			vhost_scsi_send_bad_target(vs, vq, head, out);
+			continue;
+		}
+
+		/* virtio-scsi spec requires byte 0 of the lun to be 1 */
+		if (unlikely(*lunp != 1)) {
+			vq_err(vq, "Illegal virtio-scsi lun: %u\n", *lunp);
+			vhost_scsi_send_bad_target(vs, vq, head, out);
+			continue;
+		}
+
+		if (v_req.type == VIRTIO_SCSI_T_TMF) {
+			pr_debug("%s tmf %d\n", __func__, v_req.tmf.subtype);
+			vhost_scsi_send_tmf_resp(vs, vq, head, out);
+		} else
+			vhost_scsi_send_an_resp(vs, vq, head, out);
+	}
+out:
+	mutex_unlock(&vq->mutex);
+}
+
 static void vhost_scsi_ctl_handle_kick(struct vhost_work *work)
 {
+	struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
+						poll.work);
+	struct vhost_scsi *vs = container_of(vq->dev, struct vhost_scsi, dev);
+
 	pr_debug("%s: The handling func for control queue.\n", __func__);
+	vhost_scsi_ctl_handle_vq(vs, vq);
 }
 
 static void
-- 
GitLab


From 3f8ca2e115e55af4c15d97dda635e948d2e380be Mon Sep 17 00:00:00 2001
From: Bijan Mottahedeh <bijan.mottahedeh@oracle.com>
Date: Mon, 17 Sep 2018 17:09:48 -0700
Subject: [PATCH 0199/1890] vhost/scsi: Extract common handling code from
 control queue handler

Prepare to change the request queue handler to use common handling
routines.

Signed-off-by: Bijan Mottahedeh <bijan.mottahedeh@oracle.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/vhost/scsi.c | 271 +++++++++++++++++++++++++++----------------
 1 file changed, 172 insertions(+), 99 deletions(-)

diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index 1c33d6e391525..4cd03a1d7f21a 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -203,6 +203,19 @@ struct vhost_scsi {
 	int vs_events_nr; /* num of pending events, protected by vq->mutex */
 };
 
+/*
+ * Context for processing request and control queue operations.
+ */
+struct vhost_scsi_ctx {
+	int head;
+	unsigned int out, in;
+	size_t req_size, rsp_size;
+	size_t out_size, in_size;
+	u8 *target, *lunp;
+	void *req;
+	struct iov_iter out_iter;
+};
+
 static struct workqueue_struct *vhost_scsi_workqueue;
 
 /* Global spinlock to protect vhost_scsi TPG list for vhost IOCTL access */
@@ -1050,10 +1063,107 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
 	mutex_unlock(&vq->mutex);
 }
 
+static int
+vhost_scsi_get_desc(struct vhost_scsi *vs, struct vhost_virtqueue *vq,
+		    struct vhost_scsi_ctx *vc)
+{
+	int ret = -ENXIO;
+
+	vc->head = vhost_get_vq_desc(vq, vq->iov,
+				     ARRAY_SIZE(vq->iov), &vc->out, &vc->in,
+				     NULL, NULL);
+
+	pr_debug("vhost_get_vq_desc: head: %d, out: %u in: %u\n",
+		 vc->head, vc->out, vc->in);
+
+	/* On error, stop handling until the next kick. */
+	if (unlikely(vc->head < 0))
+		goto done;
+
+	/* Nothing new?  Wait for eventfd to tell us they refilled. */
+	if (vc->head == vq->num) {
+		if (unlikely(vhost_enable_notify(&vs->dev, vq))) {
+			vhost_disable_notify(&vs->dev, vq);
+			ret = -EAGAIN;
+		}
+		goto done;
+	}
+
+	/*
+	 * Get the size of request and response buffers.
+	 */
+	vc->out_size = iov_length(vq->iov, vc->out);
+	vc->in_size = iov_length(&vq->iov[vc->out], vc->in);
+
+	/*
+	 * Copy over the virtio-scsi request header, which for a
+	 * ANY_LAYOUT enabled guest may span multiple iovecs, or a
+	 * single iovec may contain both the header + outgoing
+	 * WRITE payloads.
+	 *
+	 * copy_from_iter() will advance out_iter, so that it will
+	 * point at the start of the outgoing WRITE payload, if
+	 * DMA_TO_DEVICE is set.
+	 */
+	iov_iter_init(&vc->out_iter, WRITE, vq->iov, vc->out, vc->out_size);
+	ret = 0;
+
+done:
+	return ret;
+}
+
+static int
+vhost_scsi_chk_size(struct vhost_virtqueue *vq, struct vhost_scsi_ctx *vc)
+{
+	if (unlikely(vc->in_size < vc->rsp_size)) {
+		vq_err(vq,
+		       "Response buf too small, need min %zu bytes got %zu",
+		       vc->rsp_size, vc->in_size);
+		return -EINVAL;
+	} else if (unlikely(vc->out_size < vc->req_size)) {
+		vq_err(vq,
+		       "Request buf too small, need min %zu bytes got %zu",
+		       vc->req_size, vc->out_size);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int
+vhost_scsi_get_req(struct vhost_virtqueue *vq, struct vhost_scsi_ctx *vc,
+		   struct vhost_scsi_tpg **tpgp)
+{
+	int ret = -EIO;
+
+	if (unlikely(!copy_from_iter_full(vc->req, vc->req_size,
+					  &vc->out_iter)))
+		vq_err(vq, "Faulted on copy_from_iter\n");
+	else if (unlikely(*vc->lunp != 1))
+		/* virtio-scsi spec requires byte 0 of the lun to be 1 */
+		vq_err(vq, "Illegal virtio-scsi lun: %u\n", *vc->lunp);
+	else {
+		struct vhost_scsi_tpg **vs_tpg, *tpg;
+
+		vs_tpg = vq->private_data;	/* validated at handler entry */
+
+		tpg = READ_ONCE(vs_tpg[*vc->target]);
+		if (unlikely(!tpg))
+			vq_err(vq, "Target 0x%x does not exist\n", *vc->target);
+		else {
+			if (tpgp)
+				*tpgp = tpg;
+			ret = 0;
+		}
+	}
+
+	return ret;
+}
+
 static void
 vhost_scsi_send_tmf_resp(struct vhost_scsi *vs,
-			   struct vhost_virtqueue *vq,
-			   int head, unsigned int out)
+			 struct vhost_virtqueue *vq,
+			 struct vhost_scsi_ctx *vc)
 {
 	struct virtio_scsi_ctrl_tmf_resp __user *resp;
 	struct virtio_scsi_ctrl_tmf_resp rsp;
@@ -1062,18 +1172,18 @@ vhost_scsi_send_tmf_resp(struct vhost_scsi *vs,
 	pr_debug("%s\n", __func__);
 	memset(&rsp, 0, sizeof(rsp));
 	rsp.response = VIRTIO_SCSI_S_FUNCTION_REJECTED;
-	resp = vq->iov[out].iov_base;
+	resp = vq->iov[vc->out].iov_base;
 	ret = __copy_to_user(resp, &rsp, sizeof(rsp));
 	if (!ret)
-		vhost_add_used_and_signal(&vs->dev, vq, head, 0);
+		vhost_add_used_and_signal(&vs->dev, vq, vc->head, 0);
 	else
 		pr_err("Faulted on virtio_scsi_ctrl_tmf_resp\n");
 }
 
 static void
 vhost_scsi_send_an_resp(struct vhost_scsi *vs,
-			   struct vhost_virtqueue *vq,
-			   int head, unsigned int out)
+			struct vhost_virtqueue *vq,
+			struct vhost_scsi_ctx *vc)
 {
 	struct virtio_scsi_ctrl_an_resp __user *resp;
 	struct virtio_scsi_ctrl_an_resp rsp;
@@ -1082,10 +1192,10 @@ vhost_scsi_send_an_resp(struct vhost_scsi *vs,
 	pr_debug("%s\n", __func__);
 	memset(&rsp, 0, sizeof(rsp));	/* event_actual = 0 */
 	rsp.response = VIRTIO_SCSI_S_OK;
-	resp = vq->iov[out].iov_base;
+	resp = vq->iov[vc->out].iov_base;
 	ret = __copy_to_user(resp, &rsp, sizeof(rsp));
 	if (!ret)
-		vhost_add_used_and_signal(&vs->dev, vq, head, 0);
+		vhost_add_used_and_signal(&vs->dev, vq, vc->head, 0);
 	else
 		pr_err("Faulted on virtio_scsi_ctrl_an_resp\n");
 }
@@ -1098,13 +1208,9 @@ vhost_scsi_ctl_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
 		struct virtio_scsi_ctrl_an_req an;
 		struct virtio_scsi_ctrl_tmf_req tmf;
 	} v_req;
-	struct iov_iter out_iter;
-	unsigned int out = 0, in = 0;
-	int head;
-	size_t req_size, rsp_size, typ_size;
-	size_t out_size, in_size;
-	u8 *lunp;
-	void *req;
+	struct vhost_scsi_ctx vc;
+	size_t typ_size;
+	int ret;
 
 	mutex_lock(&vq->mutex);
 	/*
@@ -1114,52 +1220,28 @@ vhost_scsi_ctl_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
 	if (!vq->private_data)
 		goto out;
 
+	memset(&vc, 0, sizeof(vc));
+
 	vhost_disable_notify(&vs->dev, vq);
 
 	for (;;) {
-		head = vhost_get_vq_desc(vq, vq->iov,
-					 ARRAY_SIZE(vq->iov), &out, &in,
-					 NULL, NULL);
-		pr_debug("vhost_get_vq_desc: head: %d, out: %u in: %u\n",
-			 head, out, in);
-		/* On error, stop handling until the next kick. */
-		if (unlikely(head < 0))
-			break;
-		/* Nothing new?  Wait for eventfd to tell us they refilled. */
-		if (head == vq->num) {
-			if (unlikely(vhost_enable_notify(&vs->dev, vq))) {
-				vhost_disable_notify(&vs->dev, vq);
-				continue;
-			}
-			break;
-		}
+		ret = vhost_scsi_get_desc(vs, vq, &vc);
+		if (ret)
+			goto err;
 
 		/*
-		 * Get the size of request and response buffers.
+		 * Get the request type first in order to setup
+		 * other parameters dependent on the type.
 		 */
-		out_size = iov_length(vq->iov, out);
-		in_size = iov_length(&vq->iov[out], in);
-
-		/*
-		 * Copy over the virtio-scsi request header, which for a
-		 * ANY_LAYOUT enabled guest may span multiple iovecs, or a
-		 * single iovec may contain both the header + outgoing
-		 * WRITE payloads.
-		 *
-		 * copy_from_iter() will advance out_iter, so that it will
-		 * point at the start of the outgoing WRITE payload, if
-		 * DMA_TO_DEVICE is set.
-		 */
-		iov_iter_init(&out_iter, WRITE, vq->iov, out, out_size);
-
-		req = &v_req.type;
+		vc.req = &v_req.type;
 		typ_size = sizeof(v_req.type);
 
-		if (unlikely(!copy_from_iter_full(req, typ_size, &out_iter))) {
+		if (unlikely(!copy_from_iter_full(vc.req, typ_size,
+						  &vc.out_iter))) {
 			vq_err(vq, "Faulted on copy_from_iter tmf type\n");
 			/*
-			 * The size of the response buffer varies based on
-			 * the request type and must be validated against it.
+			 * The size of the response buffer depends on the
+			 * request type and must be validated against it.
 			 * Since the request type is not known, don't send
 			 * a response.
 			 */
@@ -1168,17 +1250,19 @@ vhost_scsi_ctl_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
 
 		switch (v_req.type) {
 		case VIRTIO_SCSI_T_TMF:
-			req = &v_req.tmf;
-			lunp = &v_req.tmf.lun[0];
-			req_size = sizeof(struct virtio_scsi_ctrl_tmf_req);
-			rsp_size = sizeof(struct virtio_scsi_ctrl_tmf_resp);
+			vc.req = &v_req.tmf;
+			vc.req_size = sizeof(struct virtio_scsi_ctrl_tmf_req);
+			vc.rsp_size = sizeof(struct virtio_scsi_ctrl_tmf_resp);
+			vc.lunp = &v_req.tmf.lun[0];
+			vc.target = &v_req.tmf.lun[1];
 			break;
 		case VIRTIO_SCSI_T_AN_QUERY:
 		case VIRTIO_SCSI_T_AN_SUBSCRIBE:
-			req = &v_req.an;
-			lunp = &v_req.an.lun[0];
-			req_size = sizeof(struct virtio_scsi_ctrl_an_req);
-			rsp_size = sizeof(struct virtio_scsi_ctrl_an_resp);
+			vc.req = &v_req.an;
+			vc.req_size = sizeof(struct virtio_scsi_ctrl_an_req);
+			vc.rsp_size = sizeof(struct virtio_scsi_ctrl_an_resp);
+			vc.lunp = &v_req.an.lun[0];
+			vc.target = NULL;
 			break;
 		default:
 			vq_err(vq, "Unknown control request %d", v_req.type);
@@ -1186,50 +1270,39 @@ vhost_scsi_ctl_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
 		}
 
 		/*
-		 * Check for a sane response buffer so we can report early
-		 * errors back to the guest.
+		 * Validate the size of request and response buffers.
+		 * Check for a sane response buffer so we can report
+		 * early errors back to the guest.
 		 */
-		if (unlikely(in_size < rsp_size)) {
-			vq_err(vq,
-			       "Resp buf too small, need min %zu bytes got %zu",
-			       rsp_size, in_size);
-			/*
-			 * Notifications are disabled at this point;
-			 * continue so they can be eventually enabled
-			 * when processing terminates.
-			 */
-			continue;
-		}
+		ret = vhost_scsi_chk_size(vq, &vc);
+		if (ret)
+			goto err;
 
-		if (unlikely(out_size < req_size)) {
-			vq_err(vq,
-			       "Req buf too small, need min %zu bytes got %zu",
-			       req_size, out_size);
-			vhost_scsi_send_bad_target(vs, vq, head, out);
-			continue;
-		}
-
-		req += typ_size;
-		req_size -= typ_size;
-
-		if (unlikely(!copy_from_iter_full(req, req_size, &out_iter))) {
-			vq_err(vq, "Faulted on copy_from_iter\n");
-			vhost_scsi_send_bad_target(vs, vq, head, out);
-			continue;
-		}
+		/*
+		 * Get the rest of the request now that its size is known.
+		 */
+		vc.req += typ_size;
+		vc.req_size -= typ_size;
 
-		/* virtio-scsi spec requires byte 0 of the lun to be 1 */
-		if (unlikely(*lunp != 1)) {
-			vq_err(vq, "Illegal virtio-scsi lun: %u\n", *lunp);
-			vhost_scsi_send_bad_target(vs, vq, head, out);
-			continue;
-		}
+		ret = vhost_scsi_get_req(vq, &vc, NULL);
+		if (ret)
+			goto err;
 
-		if (v_req.type == VIRTIO_SCSI_T_TMF) {
-			pr_debug("%s tmf %d\n", __func__, v_req.tmf.subtype);
-			vhost_scsi_send_tmf_resp(vs, vq, head, out);
-		} else
-			vhost_scsi_send_an_resp(vs, vq, head, out);
+		if (v_req.type == VIRTIO_SCSI_T_TMF)
+			vhost_scsi_send_tmf_resp(vs, vq, &vc);
+		else
+			vhost_scsi_send_an_resp(vs, vq, &vc);
+err:
+		/*
+		 * ENXIO:  No more requests, or read error, wait for next kick
+		 * EINVAL: Invalid response buffer, drop the request
+		 * EIO:    Respond with bad target
+		 * EAGAIN: Pending request
+		 */
+		if (ret == -ENXIO)
+			break;
+		else if (ret == -EIO)
+			vhost_scsi_send_bad_target(vs, vq, vc.head, vc.out);
 	}
 out:
 	mutex_unlock(&vq->mutex);
-- 
GitLab


From 09d7583294aada625349b6f80f1e1c730b5a5208 Mon Sep 17 00:00:00 2001
From: Bijan Mottahedeh <bijan.mottahedeh@oracle.com>
Date: Mon, 17 Sep 2018 17:09:49 -0700
Subject: [PATCH 0200/1890] vhost/scsi: Use common handling code in request
 queue handler

Change the request queue handler to use common handling routines same
as the control queue handler.

Signed-off-by: Bijan Mottahedeh <bijan.mottahedeh@oracle.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/vhost/scsi.c | 361 ++++++++++++++++++++-----------------------
 1 file changed, 164 insertions(+), 197 deletions(-)

diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index 4cd03a1d7f21a..50dffe83714c6 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -813,24 +813,120 @@ vhost_scsi_send_bad_target(struct vhost_scsi *vs,
 		pr_err("Faulted on virtio_scsi_cmd_resp\n");
 }
 
+static int
+vhost_scsi_get_desc(struct vhost_scsi *vs, struct vhost_virtqueue *vq,
+		    struct vhost_scsi_ctx *vc)
+{
+	int ret = -ENXIO;
+
+	vc->head = vhost_get_vq_desc(vq, vq->iov,
+				     ARRAY_SIZE(vq->iov), &vc->out, &vc->in,
+				     NULL, NULL);
+
+	pr_debug("vhost_get_vq_desc: head: %d, out: %u in: %u\n",
+		 vc->head, vc->out, vc->in);
+
+	/* On error, stop handling until the next kick. */
+	if (unlikely(vc->head < 0))
+		goto done;
+
+	/* Nothing new?  Wait for eventfd to tell us they refilled. */
+	if (vc->head == vq->num) {
+		if (unlikely(vhost_enable_notify(&vs->dev, vq))) {
+			vhost_disable_notify(&vs->dev, vq);
+			ret = -EAGAIN;
+		}
+		goto done;
+	}
+
+	/*
+	 * Get the size of request and response buffers.
+	 * FIXME: Not correct for BIDI operation
+	 */
+	vc->out_size = iov_length(vq->iov, vc->out);
+	vc->in_size = iov_length(&vq->iov[vc->out], vc->in);
+
+	/*
+	 * Copy over the virtio-scsi request header, which for a
+	 * ANY_LAYOUT enabled guest may span multiple iovecs, or a
+	 * single iovec may contain both the header + outgoing
+	 * WRITE payloads.
+	 *
+	 * copy_from_iter() will advance out_iter, so that it will
+	 * point at the start of the outgoing WRITE payload, if
+	 * DMA_TO_DEVICE is set.
+	 */
+	iov_iter_init(&vc->out_iter, WRITE, vq->iov, vc->out, vc->out_size);
+	ret = 0;
+
+done:
+	return ret;
+}
+
+static int
+vhost_scsi_chk_size(struct vhost_virtqueue *vq, struct vhost_scsi_ctx *vc)
+{
+	if (unlikely(vc->in_size < vc->rsp_size)) {
+		vq_err(vq,
+		       "Response buf too small, need min %zu bytes got %zu",
+		       vc->rsp_size, vc->in_size);
+		return -EINVAL;
+	} else if (unlikely(vc->out_size < vc->req_size)) {
+		vq_err(vq,
+		       "Request buf too small, need min %zu bytes got %zu",
+		       vc->req_size, vc->out_size);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int
+vhost_scsi_get_req(struct vhost_virtqueue *vq, struct vhost_scsi_ctx *vc,
+		   struct vhost_scsi_tpg **tpgp)
+{
+	int ret = -EIO;
+
+	if (unlikely(!copy_from_iter_full(vc->req, vc->req_size,
+					  &vc->out_iter))) {
+		vq_err(vq, "Faulted on copy_from_iter\n");
+	} else if (unlikely(*vc->lunp != 1)) {
+		/* virtio-scsi spec requires byte 0 of the lun to be 1 */
+		vq_err(vq, "Illegal virtio-scsi lun: %u\n", *vc->lunp);
+	} else {
+		struct vhost_scsi_tpg **vs_tpg, *tpg;
+
+		vs_tpg = vq->private_data;	/* validated at handler entry */
+
+		tpg = READ_ONCE(vs_tpg[*vc->target]);
+		if (unlikely(!tpg)) {
+			vq_err(vq, "Target 0x%x does not exist\n", *vc->target);
+		} else {
+			if (tpgp)
+				*tpgp = tpg;
+			ret = 0;
+		}
+	}
+
+	return ret;
+}
+
 static void
 vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
 {
 	struct vhost_scsi_tpg **vs_tpg, *tpg;
 	struct virtio_scsi_cmd_req v_req;
 	struct virtio_scsi_cmd_req_pi v_req_pi;
+	struct vhost_scsi_ctx vc;
 	struct vhost_scsi_cmd *cmd;
-	struct iov_iter out_iter, in_iter, prot_iter, data_iter;
+	struct iov_iter in_iter, prot_iter, data_iter;
 	u64 tag;
 	u32 exp_data_len, data_direction;
-	unsigned int out = 0, in = 0;
-	int head, ret, prot_bytes;
-	size_t req_size, rsp_size = sizeof(struct virtio_scsi_cmd_resp);
-	size_t out_size, in_size;
+	int ret, prot_bytes;
 	u16 lun;
-	u8 *target, *lunp, task_attr;
+	u8 task_attr;
 	bool t10_pi = vhost_has_feature(vq, VIRTIO_SCSI_F_T10_PI);
-	void *req, *cdb;
+	void *cdb;
 
 	mutex_lock(&vq->mutex);
 	/*
@@ -841,85 +937,47 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
 	if (!vs_tpg)
 		goto out;
 
+	memset(&vc, 0, sizeof(vc));
+	vc.rsp_size = sizeof(struct virtio_scsi_cmd_resp);
+
 	vhost_disable_notify(&vs->dev, vq);
 
 	for (;;) {
-		head = vhost_get_vq_desc(vq, vq->iov,
-					 ARRAY_SIZE(vq->iov), &out, &in,
-					 NULL, NULL);
-		pr_debug("vhost_get_vq_desc: head: %d, out: %u in: %u\n",
-			 head, out, in);
-		/* On error, stop handling until the next kick. */
-		if (unlikely(head < 0))
-			break;
-		/* Nothing new?  Wait for eventfd to tell us they refilled. */
-		if (head == vq->num) {
-			if (unlikely(vhost_enable_notify(&vs->dev, vq))) {
-				vhost_disable_notify(&vs->dev, vq);
-				continue;
-			}
-			break;
-		}
-		/*
-		 * Check for a sane response buffer so we can report early
-		 * errors back to the guest.
-		 */
-		if (unlikely(vq->iov[out].iov_len < rsp_size)) {
-			vq_err(vq, "Expecting at least virtio_scsi_cmd_resp"
-				" size, got %zu bytes\n", vq->iov[out].iov_len);
-			break;
-		}
+		ret = vhost_scsi_get_desc(vs, vq, &vc);
+		if (ret)
+			goto err;
+
 		/*
 		 * Setup pointers and values based upon different virtio-scsi
 		 * request header if T10_PI is enabled in KVM guest.
 		 */
 		if (t10_pi) {
-			req = &v_req_pi;
-			req_size = sizeof(v_req_pi);
-			lunp = &v_req_pi.lun[0];
-			target = &v_req_pi.lun[1];
+			vc.req = &v_req_pi;
+			vc.req_size = sizeof(v_req_pi);
+			vc.lunp = &v_req_pi.lun[0];
+			vc.target = &v_req_pi.lun[1];
 		} else {
-			req = &v_req;
-			req_size = sizeof(v_req);
-			lunp = &v_req.lun[0];
-			target = &v_req.lun[1];
+			vc.req = &v_req;
+			vc.req_size = sizeof(v_req);
+			vc.lunp = &v_req.lun[0];
+			vc.target = &v_req.lun[1];
 		}
-		/*
-		 * FIXME: Not correct for BIDI operation
-		 */
-		out_size = iov_length(vq->iov, out);
-		in_size = iov_length(&vq->iov[out], in);
 
 		/*
-		 * Copy over the virtio-scsi request header, which for a
-		 * ANY_LAYOUT enabled guest may span multiple iovecs, or a
-		 * single iovec may contain both the header + outgoing
-		 * WRITE payloads.
-		 *
-		 * copy_from_iter() will advance out_iter, so that it will
-		 * point at the start of the outgoing WRITE payload, if
-		 * DMA_TO_DEVICE is set.
+		 * Validate the size of request and response buffers.
+		 * Check for a sane response buffer so we can report
+		 * early errors back to the guest.
 		 */
-		iov_iter_init(&out_iter, WRITE, vq->iov, out, out_size);
+		ret = vhost_scsi_chk_size(vq, &vc);
+		if (ret)
+			goto err;
 
-		if (unlikely(!copy_from_iter_full(req, req_size, &out_iter))) {
-			vq_err(vq, "Faulted on copy_from_iter\n");
-			vhost_scsi_send_bad_target(vs, vq, head, out);
-			continue;
-		}
-		/* virtio-scsi spec requires byte 0 of the lun to be 1 */
-		if (unlikely(*lunp != 1)) {
-			vq_err(vq, "Illegal virtio-scsi lun: %u\n", *lunp);
-			vhost_scsi_send_bad_target(vs, vq, head, out);
-			continue;
-		}
+		ret = vhost_scsi_get_req(vq, &vc, &tpg);
+		if (ret)
+			goto err;
+
+		ret = -EIO;	/* bad target on any error from here on */
 
-		tpg = READ_ONCE(vs_tpg[*target]);
-		if (unlikely(!tpg)) {
-			/* Target does not exist, fail the request */
-			vhost_scsi_send_bad_target(vs, vq, head, out);
-			continue;
-		}
 		/*
 		 * Determine data_direction by calculating the total outgoing
 		 * iovec sizes + incoming iovec sizes vs. virtio-scsi request +
@@ -937,17 +995,17 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
 		 */
 		prot_bytes = 0;
 
-		if (out_size > req_size) {
+		if (vc.out_size > vc.req_size) {
 			data_direction = DMA_TO_DEVICE;
-			exp_data_len = out_size - req_size;
-			data_iter = out_iter;
-		} else if (in_size > rsp_size) {
+			exp_data_len = vc.out_size - vc.req_size;
+			data_iter = vc.out_iter;
+		} else if (vc.in_size > vc.rsp_size) {
 			data_direction = DMA_FROM_DEVICE;
-			exp_data_len = in_size - rsp_size;
+			exp_data_len = vc.in_size - vc.rsp_size;
 
-			iov_iter_init(&in_iter, READ, &vq->iov[out], in,
-				      rsp_size + exp_data_len);
-			iov_iter_advance(&in_iter, rsp_size);
+			iov_iter_init(&in_iter, READ, &vq->iov[vc.out], vc.in,
+				      vc.rsp_size + exp_data_len);
+			iov_iter_advance(&in_iter, vc.rsp_size);
 			data_iter = in_iter;
 		} else {
 			data_direction = DMA_NONE;
@@ -963,16 +1021,14 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
 				if (data_direction != DMA_TO_DEVICE) {
 					vq_err(vq, "Received non zero pi_bytesout,"
 						" but wrong data_direction\n");
-					vhost_scsi_send_bad_target(vs, vq, head, out);
-					continue;
+					goto err;
 				}
 				prot_bytes = vhost32_to_cpu(vq, v_req_pi.pi_bytesout);
 			} else if (v_req_pi.pi_bytesin) {
 				if (data_direction != DMA_FROM_DEVICE) {
 					vq_err(vq, "Received non zero pi_bytesin,"
 						" but wrong data_direction\n");
-					vhost_scsi_send_bad_target(vs, vq, head, out);
-					continue;
+					goto err;
 				}
 				prot_bytes = vhost32_to_cpu(vq, v_req_pi.pi_bytesin);
 			}
@@ -1011,8 +1067,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
 			vq_err(vq, "Received SCSI CDB with command_size: %d that"
 				" exceeds SCSI_MAX_VARLEN_CDB_SIZE: %d\n",
 				scsi_command_size(cdb), VHOST_SCSI_MAX_CDB_SIZE);
-			vhost_scsi_send_bad_target(vs, vq, head, out);
-			continue;
+				goto err;
 		}
 		cmd = vhost_scsi_get_tag(vq, tpg, cdb, tag, lun, task_attr,
 					 exp_data_len + prot_bytes,
@@ -1020,13 +1075,12 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
 		if (IS_ERR(cmd)) {
 			vq_err(vq, "vhost_scsi_get_tag failed %ld\n",
 			       PTR_ERR(cmd));
-			vhost_scsi_send_bad_target(vs, vq, head, out);
-			continue;
+			goto err;
 		}
 		cmd->tvc_vhost = vs;
 		cmd->tvc_vq = vq;
-		cmd->tvc_resp_iov = vq->iov[out];
-		cmd->tvc_in_iovs = in;
+		cmd->tvc_resp_iov = vq->iov[vc.out];
+		cmd->tvc_in_iovs = vc.in;
 
 		pr_debug("vhost_scsi got command opcode: %#02x, lun: %d\n",
 			 cmd->tvc_cdb[0], cmd->tvc_lun);
@@ -1034,14 +1088,12 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
 			 " %d\n", cmd, exp_data_len, prot_bytes, data_direction);
 
 		if (data_direction != DMA_NONE) {
-			ret = vhost_scsi_mapal(cmd,
-					       prot_bytes, &prot_iter,
-					       exp_data_len, &data_iter);
-			if (unlikely(ret)) {
+			if (unlikely(vhost_scsi_mapal(cmd, prot_bytes,
+						      &prot_iter, exp_data_len,
+						      &data_iter))) {
 				vq_err(vq, "Failed to map iov to sgl\n");
 				vhost_scsi_release_cmd(&cmd->tvc_se_cmd);
-				vhost_scsi_send_bad_target(vs, vq, head, out);
-				continue;
+				goto err;
 			}
 		}
 		/*
@@ -1049,7 +1101,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
 		 * complete the virtio-scsi request in TCM callback context via
 		 * vhost_scsi_queue_data_in() and vhost_scsi_queue_status()
 		 */
-		cmd->tvc_vq_desc = head;
+		cmd->tvc_vq_desc = vc.head;
 		/*
 		 * Dispatch cmd descriptor for cmwq execution in process
 		 * context provided by vhost_scsi_workqueue.  This also ensures
@@ -1058,112 +1110,27 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
 		 */
 		INIT_WORK(&cmd->work, vhost_scsi_submission_work);
 		queue_work(vhost_scsi_workqueue, &cmd->work);
+		ret = 0;
+err:
+		/*
+		 * ENXIO:  No more requests, or read error, wait for next kick
+		 * EINVAL: Invalid response buffer, drop the request
+		 * EIO:    Respond with bad target
+		 * EAGAIN: Pending request
+		 */
+		if (ret == -ENXIO)
+			break;
+		else if (ret == -EIO)
+			vhost_scsi_send_bad_target(vs, vq, vc.head, vc.out);
 	}
 out:
 	mutex_unlock(&vq->mutex);
 }
 
-static int
-vhost_scsi_get_desc(struct vhost_scsi *vs, struct vhost_virtqueue *vq,
-		    struct vhost_scsi_ctx *vc)
-{
-	int ret = -ENXIO;
-
-	vc->head = vhost_get_vq_desc(vq, vq->iov,
-				     ARRAY_SIZE(vq->iov), &vc->out, &vc->in,
-				     NULL, NULL);
-
-	pr_debug("vhost_get_vq_desc: head: %d, out: %u in: %u\n",
-		 vc->head, vc->out, vc->in);
-
-	/* On error, stop handling until the next kick. */
-	if (unlikely(vc->head < 0))
-		goto done;
-
-	/* Nothing new?  Wait for eventfd to tell us they refilled. */
-	if (vc->head == vq->num) {
-		if (unlikely(vhost_enable_notify(&vs->dev, vq))) {
-			vhost_disable_notify(&vs->dev, vq);
-			ret = -EAGAIN;
-		}
-		goto done;
-	}
-
-	/*
-	 * Get the size of request and response buffers.
-	 */
-	vc->out_size = iov_length(vq->iov, vc->out);
-	vc->in_size = iov_length(&vq->iov[vc->out], vc->in);
-
-	/*
-	 * Copy over the virtio-scsi request header, which for a
-	 * ANY_LAYOUT enabled guest may span multiple iovecs, or a
-	 * single iovec may contain both the header + outgoing
-	 * WRITE payloads.
-	 *
-	 * copy_from_iter() will advance out_iter, so that it will
-	 * point at the start of the outgoing WRITE payload, if
-	 * DMA_TO_DEVICE is set.
-	 */
-	iov_iter_init(&vc->out_iter, WRITE, vq->iov, vc->out, vc->out_size);
-	ret = 0;
-
-done:
-	return ret;
-}
-
-static int
-vhost_scsi_chk_size(struct vhost_virtqueue *vq, struct vhost_scsi_ctx *vc)
-{
-	if (unlikely(vc->in_size < vc->rsp_size)) {
-		vq_err(vq,
-		       "Response buf too small, need min %zu bytes got %zu",
-		       vc->rsp_size, vc->in_size);
-		return -EINVAL;
-	} else if (unlikely(vc->out_size < vc->req_size)) {
-		vq_err(vq,
-		       "Request buf too small, need min %zu bytes got %zu",
-		       vc->req_size, vc->out_size);
-		return -EIO;
-	}
-
-	return 0;
-}
-
-static int
-vhost_scsi_get_req(struct vhost_virtqueue *vq, struct vhost_scsi_ctx *vc,
-		   struct vhost_scsi_tpg **tpgp)
-{
-	int ret = -EIO;
-
-	if (unlikely(!copy_from_iter_full(vc->req, vc->req_size,
-					  &vc->out_iter)))
-		vq_err(vq, "Faulted on copy_from_iter\n");
-	else if (unlikely(*vc->lunp != 1))
-		/* virtio-scsi spec requires byte 0 of the lun to be 1 */
-		vq_err(vq, "Illegal virtio-scsi lun: %u\n", *vc->lunp);
-	else {
-		struct vhost_scsi_tpg **vs_tpg, *tpg;
-
-		vs_tpg = vq->private_data;	/* validated at handler entry */
-
-		tpg = READ_ONCE(vs_tpg[*vc->target]);
-		if (unlikely(!tpg))
-			vq_err(vq, "Target 0x%x does not exist\n", *vc->target);
-		else {
-			if (tpgp)
-				*tpgp = tpg;
-			ret = 0;
-		}
-	}
-
-	return ret;
-}
-
 static void
-vhost_scsi_send_tmf_resp(struct vhost_scsi *vs,
-			 struct vhost_virtqueue *vq,
-			 struct vhost_scsi_ctx *vc)
+vhost_scsi_send_tmf_reject(struct vhost_scsi *vs,
+			   struct vhost_virtqueue *vq,
+			   struct vhost_scsi_ctx *vc)
 {
 	struct virtio_scsi_ctrl_tmf_resp __user *resp;
 	struct virtio_scsi_ctrl_tmf_resp rsp;
@@ -1289,7 +1256,7 @@ vhost_scsi_ctl_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
 			goto err;
 
 		if (v_req.type == VIRTIO_SCSI_T_TMF)
-			vhost_scsi_send_tmf_resp(vs, vq, &vc);
+			vhost_scsi_send_tmf_reject(vs, vq, &vc);
 		else
 			vhost_scsi_send_an_resp(vs, vq, &vc);
 err:
-- 
GitLab


From 79f800b2e76923cd8ce0aa659cb5c019d9643bc9 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Fri, 12 Oct 2018 16:46:37 +0100
Subject: [PATCH 0201/1890] MAINTAINERS: remove reference to bogus vsock file

The file drivers/vhost/vsock.h never existed.  Remove the bogus
MAINTAINERS reference.

Fixes: 433fc58e6bf2c8bd97e57153ed28e64fd78207b8 ("VSOCK: Introduce vhost_vsock.ko")
Reported-by: Joe Perches <joe@perches.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index b2f710eee67a7..3b776daf4a854 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -15542,7 +15542,6 @@ F:	net/vmw_vsock/virtio_transport_common.c
 F:	net/vmw_vsock/virtio_transport.c
 F:	drivers/net/vsockmon.c
 F:	drivers/vhost/vsock.c
-F:	drivers/vhost/vsock.h
 F:	tools/testing/vsock/
 
 VIRTIO CONSOLE DRIVER
-- 
GitLab


From 33823f4d63f7a010653d219800539409a78ef4be Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Wed, 24 Oct 2018 14:57:16 -0700
Subject: [PATCH 0202/1890] x86/cpufeatures: Enumerate MOVDIRI instruction

MOVDIRI moves doubleword or quadword from register to memory through
direct store which is implemented by using write combining (WC) for
writing data directly into memory without caching the data.

Programmable agents can handle streaming offload (e.g. high speed packet
processing in network). Hardware implements a doorbell (tail pointer)
register that is updated by software when adding new work-elements to
the streaming offload work-queue.

MOVDIRI can be used as the doorbell write which is a 4-byte or 8-byte
uncachable write to MMIO. MOVDIRI has lower overhead than other ways
to write the doorbell.

Availability of the MOVDIRI instruction is indicated by the presence of
the CPUID feature flag MOVDIRI(CPUID.0x07.0x0:ECX[bit 27]).

Please check the latest Intel Architecture Instruction Set Extensions
and Future Features Programming Reference for more details on the CPUID
feature MOVDIRI flag.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi V Shankar <ravi.v.shankar@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1540418237-125817-2-git-send-email-fenghua.yu@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/cpufeatures.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 89a048c2faec7..90934ee7b79a0 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -331,6 +331,7 @@
 #define X86_FEATURE_LA57		(16*32+16) /* 5-level page tables */
 #define X86_FEATURE_RDPID		(16*32+22) /* RDPID instruction */
 #define X86_FEATURE_CLDEMOTE		(16*32+25) /* CLDEMOTE instruction */
+#define X86_FEATURE_MOVDIRI		(16*32+27) /* MOVDIRI instruction */
 
 /* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */
 #define X86_FEATURE_OVERFLOW_RECOV	(17*32+ 0) /* MCA overflow recovery support */
-- 
GitLab


From ace6485a03266cc3c198ce8e927a1ce0ce139699 Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Wed, 24 Oct 2018 14:57:17 -0700
Subject: [PATCH 0203/1890] x86/cpufeatures: Enumerate MOVDIR64B instruction

MOVDIR64B moves 64-bytes as direct-store with 64-bytes write atomicity.
Direct store is implemented by using write combining (WC) for writing
data directly into memory without caching the data.

In low latency offload (e.g. Non-Volatile Memory, etc), MOVDIR64B writes
work descriptors (and data in some cases) to device-hosted work-queues
atomically without cache pollution.

Availability of the MOVDIR64B instruction is indicated by the
presence of the CPUID feature flag MOVDIR64B (CPUID.0x07.0x0:ECX[bit 28]).

Please check the latest Intel Architecture Instruction Set Extensions
and Future Features Programming Reference for more details on the CPUID
feature MOVDIR64B flag.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi V Shankar <ravi.v.shankar@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1540418237-125817-3-git-send-email-fenghua.yu@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/cpufeatures.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 90934ee7b79a0..28c4a502b4197 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -332,6 +332,7 @@
 #define X86_FEATURE_RDPID		(16*32+22) /* RDPID instruction */
 #define X86_FEATURE_CLDEMOTE		(16*32+25) /* CLDEMOTE instruction */
 #define X86_FEATURE_MOVDIRI		(16*32+27) /* MOVDIRI instruction */
+#define X86_FEATURE_MOVDIR64B		(16*32+28) /* MOVDIR64B instruction */
 
 /* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */
 #define X86_FEATURE_OVERFLOW_RECOV	(17*32+ 0) /* MCA overflow recovery support */
-- 
GitLab


From 61792b677415b77c8db04991c22966bb8de7603e Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 23 Oct 2018 16:47:16 +0200
Subject: [PATCH 0204/1890] netfilter: ipv6: fix oops when defragmenting
 locally generated fragments
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Unlike ipv4 and normal ipv6 defrag, netfilter ipv6 defragmentation did
not save/restore skb->dst.

This causes oops when handling locally generated ipv6 fragments, as
output path needs a valid dst.

Reported-by: Maciej Å»enczykowski <zenczykowski@gmail.com>
Fixes: 84379c9afe01 ("netfilter: ipv6: nf_defrag: drop skb dst before queueing")
Signed-off-by: Florian Westphal <fw@strlen.de>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv6/netfilter/nf_conntrack_reasm.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index b8ac369f98ad8..d219979c3e529 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -587,11 +587,16 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
 	 */
 	ret = -EINPROGRESS;
 	if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
-	    fq->q.meat == fq->q.len &&
-	    nf_ct_frag6_reasm(fq, skb, dev))
-		ret = 0;
-	else
+	    fq->q.meat == fq->q.len) {
+		unsigned long orefdst = skb->_skb_refdst;
+
+		skb->_skb_refdst = 0UL;
+		if (nf_ct_frag6_reasm(fq, skb, dev))
+			ret = 0;
+		skb->_skb_refdst = orefdst;
+	} else {
 		skb_dst_drop(skb);
+	}
 
 out_unlock:
 	spin_unlock_bh(&fq->q.lock);
-- 
GitLab


From 5e91c9d9cd3fd557226ca75fed58816b9eee7e07 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 22 Oct 2018 21:49:45 +0200
Subject: [PATCH 0205/1890] netfilter: nft_osf: check if attribute is present

If the attribute is not sent, eg. old libnftnl binary, then
tb[NFTA_OSF_TTL] is NULL and kernel crashes from the _init path.

Fixes: a218dc82f0b5 ("netfilter: nft_osf: Add ttl option support")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_osf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c
index ca5e5d8c5ef8b..b13618c764ec2 100644
--- a/net/netfilter/nft_osf.c
+++ b/net/netfilter/nft_osf.c
@@ -50,7 +50,7 @@ static int nft_osf_init(const struct nft_ctx *ctx,
 	int err;
 	u8 ttl;
 
-	if (nla_get_u8(tb[NFTA_OSF_TTL])) {
+	if (tb[NFTA_OSF_TTL]) {
 		ttl = nla_get_u8(tb[NFTA_OSF_TTL]);
 		if (ttl > 2)
 			return -EINVAL;
-- 
GitLab


From 5a8de47b3c250521dd632cdedaac6db88367defa Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Wed, 24 Oct 2018 13:54:03 +0200
Subject: [PATCH 0206/1890] netfilter: bridge: define INT_MIN & INT_MAX in
 userspace
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With 4.19, programs like ebtables fail to build when they include
"linux/netfilter_bridge.h". It is caused by commit 94276fa8a2a4 which
added a use of INT_MIN and INT_MAX to the header:
: In file included from /usr/include/linux/netfilter_bridge/ebtables.h:18,
:                  from include/ebtables_u.h:28,
:                  from communication.c:23:
: /usr/include/linux/netfilter_bridge.h:30:20: error: 'INT_MIN' undeclared here (not in a function)
:   NF_BR_PRI_FIRST = INT_MIN,
:                     ^~~~~~~

Define these constants by including "limits.h" when !__KERNEL__ (the
same way as for other netfilter_* headers).

Fixes: 94276fa8a2a4 ("netfilter: bridge: Expose nf_tables bridge hook priorities through uapi")
Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Acked-by: MÃ¡tÃ© Eckl <ecklm94@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter_bridge.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/uapi/linux/netfilter_bridge.h b/include/uapi/linux/netfilter_bridge.h
index 156ccd089df18..1610fdbab98df 100644
--- a/include/uapi/linux/netfilter_bridge.h
+++ b/include/uapi/linux/netfilter_bridge.h
@@ -11,6 +11,10 @@
 #include <linux/if_vlan.h>
 #include <linux/if_pppox.h>
 
+#ifndef __KERNEL__
+#include <limits.h> /* for INT_MIN, INT_MAX */
+#endif
+
 /* Bridge Hooks */
 /* After promisc drops, checksum checks. */
 #define NF_BR_PRE_ROUTING	0
-- 
GitLab


From ad6e16059d8e00de0887885db11d87cba0bd1512 Mon Sep 17 00:00:00 2001
From: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Date: Sun, 21 Oct 2018 15:18:56 +0800
Subject: [PATCH 0207/1890] EDAC, skx_edac: Add address translation for
 non-volatile DIMMs

Currently, this driver doesn't support address translation for
non-volatile DIMMs.

The ACPI ADXL DSM method provides address translation for both volatile
and non-volatile DIMMs. Enable it to use the ACPI DSM methods if they
are supported and there are non-volatile DIMMs populated on the system.

Co-developed-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
CC: Mauro Carvalho Chehab <mchehab@kernel.org>
CC: arozansk@redhat.com
CC: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/1540106336-5212-1-git-send-email-qiuxu.zhuo@intel.com
---
 drivers/edac/Kconfig    |   1 +
 drivers/edac/skx_edac.c | 193 +++++++++++++++++++++++++++++++++++++---
 2 files changed, 181 insertions(+), 13 deletions(-)

diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 57304b2e989f2..ffd349c124794 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -234,6 +234,7 @@ config EDAC_SKX
 	depends on PCI && X86_64 && X86_MCE_INTEL && PCI_MMCONFIG
 	depends on ACPI_NFIT || !ACPI_NFIT # if ACPI_NFIT=m, EDAC_SKX can't be y
 	select DMI
+	select ACPI_ADXL
 	help
 	  Support for error detection and correction the Intel
 	  Skylake server Integrated Memory Controllers. If your
diff --git a/drivers/edac/skx_edac.c b/drivers/edac/skx_edac.c
index dd209e0dd9abb..a99ea61dad321 100644
--- a/drivers/edac/skx_edac.c
+++ b/drivers/edac/skx_edac.c
@@ -26,6 +26,7 @@
 #include <linux/bitmap.h>
 #include <linux/math64.h>
 #include <linux/mod_devicetable.h>
+#include <linux/adxl.h>
 #include <acpi/nfit.h>
 #include <asm/cpu_device_id.h>
 #include <asm/intel-family.h>
@@ -35,6 +36,7 @@
 #include "edac_module.h"
 
 #define EDAC_MOD_STR    "skx_edac"
+#define MSG_SIZE	1024
 
 /*
  * Debug macros
@@ -54,6 +56,29 @@
 static LIST_HEAD(skx_edac_list);
 
 static u64 skx_tolm, skx_tohm;
+static char *skx_msg;
+static unsigned int nvdimm_count;
+
+enum {
+	INDEX_SOCKET,
+	INDEX_MEMCTRL,
+	INDEX_CHANNEL,
+	INDEX_DIMM,
+	INDEX_MAX
+};
+
+static const char * const component_names[] = {
+	[INDEX_SOCKET]	= "ProcessorSocketId",
+	[INDEX_MEMCTRL]	= "MemoryControllerId",
+	[INDEX_CHANNEL]	= "ChannelId",
+	[INDEX_DIMM]	= "DimmSlotId",
+};
+
+static int component_indices[ARRAY_SIZE(component_names)];
+static int adxl_component_count;
+static const char * const *adxl_component_names;
+static u64 *adxl_values;
+static char *adxl_msg;
 
 #define NUM_IMC			2	/* memory controllers per socket */
 #define NUM_CHANNELS		3	/* channels per memory controller */
@@ -393,6 +418,8 @@ static int get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc,
 	u16 flags;
 	u64 size = 0;
 
+	nvdimm_count++;
+
 	dev_handle = ACPI_NFIT_BUILD_DEVICE_HANDLE(dimmno, chan, imc->lmc,
 						   imc->src_id, 0);
 
@@ -941,12 +968,46 @@ static void teardown_skx_debug(void)
 }
 #endif /*CONFIG_EDAC_DEBUG*/
 
+static bool skx_adxl_decode(struct decoded_addr *res)
+
+{
+	int i, len = 0;
+
+	if (res->addr >= skx_tohm || (res->addr >= skx_tolm &&
+				      res->addr < BIT_ULL(32))) {
+		edac_dbg(0, "Address 0x%llx out of range\n", res->addr);
+		return false;
+	}
+
+	if (adxl_decode(res->addr, adxl_values)) {
+		edac_dbg(0, "Failed to decode 0x%llx\n", res->addr);
+		return false;
+	}
+
+	res->socket  = (int)adxl_values[component_indices[INDEX_SOCKET]];
+	res->imc     = (int)adxl_values[component_indices[INDEX_MEMCTRL]];
+	res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]];
+	res->dimm    = (int)adxl_values[component_indices[INDEX_DIMM]];
+
+	for (i = 0; i < adxl_component_count; i++) {
+		if (adxl_values[i] == ~0x0ull)
+			continue;
+
+		len += snprintf(adxl_msg + len, MSG_SIZE - len, " %s:0x%llx",
+				adxl_component_names[i], adxl_values[i]);
+		if (MSG_SIZE - len <= 0)
+			break;
+	}
+
+	return true;
+}
+
 static void skx_mce_output_error(struct mem_ctl_info *mci,
 				 const struct mce *m,
 				 struct decoded_addr *res)
 {
 	enum hw_event_mc_err_type tp_event;
-	char *type, *optype, msg[256];
+	char *type, *optype;
 	bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
 	bool overflow = GET_BITFIELD(m->status, 62, 62);
 	bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
@@ -1007,22 +1068,47 @@ static void skx_mce_output_error(struct mem_ctl_info *mci,
 			break;
 		}
 	}
+	if (adxl_component_count) {
+		snprintf(skx_msg, MSG_SIZE, "%s%s err_code:%04x:%04x %s",
+			 overflow ? " OVERFLOW" : "",
+			 (uncorrected_error && recoverable) ? " recoverable" : "",
+			 mscod, errcode, adxl_msg);
+	} else {
+		snprintf(skx_msg, MSG_SIZE,
+			 "%s%s err_code:%04x:%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:%x col:%x",
+			 overflow ? " OVERFLOW" : "",
+			 (uncorrected_error && recoverable) ? " recoverable" : "",
+			 mscod, errcode,
+			 res->socket, res->imc, res->rank,
+			 res->bank_group, res->bank_address, res->row, res->column);
+	}
 
-	snprintf(msg, sizeof(msg),
-		 "%s%s err_code:%04x:%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:%x col:%x",
-		 overflow ? " OVERFLOW" : "",
-		 (uncorrected_error && recoverable) ? " recoverable" : "",
-		 mscod, errcode,
-		 res->socket, res->imc, res->rank,
-		 res->bank_group, res->bank_address, res->row, res->column);
-
-	edac_dbg(0, "%s\n", msg);
+	edac_dbg(0, "%s\n", skx_msg);
 
 	/* Call the helper to output message */
 	edac_mc_handle_error(tp_event, mci, core_err_cnt,
 			     m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0,
 			     res->channel, res->dimm, -1,
-			     optype, msg);
+			     optype, skx_msg);
+}
+
+static struct mem_ctl_info *get_mci(int src_id, int lmc)
+{
+	struct skx_dev *d;
+
+	if (lmc > NUM_IMC - 1) {
+		skx_printk(KERN_ERR, "Bad lmc %d\n", lmc);
+		return NULL;
+	}
+
+	list_for_each_entry(d, &skx_edac_list, list) {
+		if (d->imc[0].src_id == src_id)
+			return d->imc[lmc].mci;
+	}
+
+	skx_printk(KERN_ERR, "No mci for src_id %d lmc %d\n", src_id, lmc);
+
+	return NULL;
 }
 
 static int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
@@ -1040,10 +1126,23 @@ static int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
 	if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV))
 		return NOTIFY_DONE;
 
+	memset(&res, 0, sizeof(res));
 	res.addr = mce->addr;
-	if (!skx_decode(&res))
+
+	if (adxl_component_count) {
+		if (!skx_adxl_decode(&res))
+			return NOTIFY_DONE;
+
+		mci = get_mci(res.socket, res.imc);
+	} else {
+		if (!skx_decode(&res))
+			return NOTIFY_DONE;
+
+		mci = res.dev->imc[res.imc].mci;
+	}
+
+	if (!mci)
 		return NOTIFY_DONE;
-	mci = res.dev->imc[res.imc].mci;
 
 	if (mce->mcgstatus & MCG_STATUS_MCIP)
 		type = "Exception";
@@ -1094,6 +1193,62 @@ static void skx_remove(void)
 	}
 }
 
+static void __init skx_adxl_get(void)
+{
+	const char * const *names;
+	int i, j;
+
+	names = adxl_get_component_names();
+	if (!names) {
+		skx_printk(KERN_NOTICE, "No firmware support for address translation.");
+		skx_printk(KERN_CONT, " Only decoding DDR4 address!\n");
+		return;
+	}
+
+	for (i = 0; i < INDEX_MAX; i++) {
+		for (j = 0; names[j]; j++) {
+			if (!strcmp(component_names[i], names[j])) {
+				component_indices[i] = j;
+				break;
+			}
+		}
+
+		if (!names[j])
+			goto err;
+	}
+
+	adxl_component_names = names;
+	while (*names++)
+		adxl_component_count++;
+
+	adxl_values = kcalloc(adxl_component_count, sizeof(*adxl_values),
+			      GFP_KERNEL);
+	if (!adxl_values) {
+		adxl_component_count = 0;
+		return;
+	}
+
+	adxl_msg = kzalloc(MSG_SIZE, GFP_KERNEL);
+	if (!adxl_msg) {
+		adxl_component_count = 0;
+		kfree(adxl_values);
+	}
+
+	return;
+err:
+	skx_printk(KERN_ERR, "'%s' is not matched from DSM parameters: ",
+		   component_names[i]);
+	for (j = 0; names[j]; j++)
+		skx_printk(KERN_CONT, "%s ", names[j]);
+	skx_printk(KERN_CONT, "\n");
+}
+
+static void __exit skx_adxl_put(void)
+{
+	kfree(adxl_values);
+	kfree(adxl_msg);
+}
+
 /*
  * skx_init:
  *	make sure we are running on the correct cpu model
@@ -1158,6 +1313,15 @@ static int __init skx_init(void)
 		}
 	}
 
+	skx_msg = kzalloc(MSG_SIZE, GFP_KERNEL);
+	if (!skx_msg) {
+		rc = -ENOMEM;
+		goto fail;
+	}
+
+	if (nvdimm_count)
+		skx_adxl_get();
+
 	/* Ensure that the OPSTATE is set correctly for POLL or NMI */
 	opstate_init();
 
@@ -1176,6 +1340,9 @@ static void __exit skx_exit(void)
 	edac_dbg(2, "\n");
 	mce_unregister_decode_chain(&skx_mce_dec);
 	skx_remove();
+	if (nvdimm_count)
+		skx_adxl_put();
+	kfree(skx_msg);
 	teardown_skx_debug();
 }
 
-- 
GitLab


From 1b50bb4d36e89fd54c14722c4ab5266ef17767ff Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@ucw.cz>
Date: Wed, 24 Oct 2018 22:44:39 +0200
Subject: [PATCH 0208/1890] Fix pattern handling optimalization

Check for zero duration before skipping step. This fixes pattern

    echo "0 1000 10 2550 0 1000" > pattern

which should do [  .-xXx-.] but does [   Xx-.]

Signed-off-by: Pavel Machek <pavel@ucw.cz>
Suggested-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
Signed-off-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
---
 drivers/leds/trigger/ledtrig-pattern.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/leds/trigger/ledtrig-pattern.c b/drivers/leds/trigger/ledtrig-pattern.c
index ce7acd115dd8d..174a298f1be02 100644
--- a/drivers/leds/trigger/ledtrig-pattern.c
+++ b/drivers/leds/trigger/ledtrig-pattern.c
@@ -87,9 +87,10 @@ static void pattern_trig_timer_function(struct timer_list *t)
 					   data->curr->brightness);
 			mod_timer(&data->timer,
 				  jiffies + msecs_to_jiffies(data->curr->delta_t));
-
-			/* Skip the tuple with zero duration */
-			pattern_trig_update_patterns(data);
+			if (!data->next->delta_t) {
+				/* Skip the tuple with zero duration */
+				pattern_trig_update_patterns(data);
+			}
 			/* Select next tuple */
 			pattern_trig_update_patterns(data);
 		} else {
-- 
GitLab


From 7dc94969e165464896366fcb096f4be18ba56f44 Mon Sep 17 00:00:00 2001
From: Evan Quan <evan.quan@amd.com>
Date: Tue, 23 Oct 2018 14:31:38 +0800
Subject: [PATCH 0209/1890] drm/amd/powerplay: correct the clocks for DAL to be
 Khz unit

Currently the clocks reported are in 10Khz unit. Correct them
as Khz unit as DAL wanted.

Signed-off-by: Evan Quan <evan.quan@amd.com>
Reviewed-by: Feifei Xu<Feifei.Xu@amd.com>
Reviewed-by: Rex Zhu <Rex.Zhu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../drm/amd/powerplay/hwmgr/vega20_hwmgr.c    | 21 +++++++++----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
index 4c9a1a9ef04b8..8a1ee9ce73863 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
@@ -2012,7 +2012,6 @@ int vega20_display_clock_voltage_request(struct pp_hwmgr *hwmgr,
 	if (data->smu_features[GNLD_DPM_DCEFCLK].enabled) {
 		switch (clk_type) {
 		case amd_pp_dcef_clock:
-			clk_freq = clock_req->clock_freq_in_khz / 100;
 			clk_select = PPCLK_DCEFCLK;
 			break;
 		case amd_pp_disp_clock:
@@ -2072,7 +2071,7 @@ static int vega20_notify_smc_display_config_after_ps_adjustment(
 
 	if (data->smu_features[GNLD_DPM_DCEFCLK].supported) {
 		clock_req.clock_type = amd_pp_dcef_clock;
-		clock_req.clock_freq_in_khz = min_clocks.dcefClock;
+		clock_req.clock_freq_in_khz = min_clocks.dcefClock * 10;
 		if (!vega20_display_clock_voltage_request(hwmgr, &clock_req)) {
 			if (data->smu_features[GNLD_DS_DCEFCLK].supported)
 				PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc_with_parameter(
@@ -2371,7 +2370,7 @@ static int vega20_get_sclks(struct pp_hwmgr *hwmgr,
 
 	for (i = 0; i < count; i++) {
 		clocks->data[i].clocks_in_khz =
-			dpm_table->dpm_levels[i].value * 100;
+			dpm_table->dpm_levels[i].value * 1000;
 		clocks->data[i].latency_in_us = 0;
 	}
 
@@ -2401,7 +2400,7 @@ static int vega20_get_memclocks(struct pp_hwmgr *hwmgr,
 	for (i = 0; i < count; i++) {
 		clocks->data[i].clocks_in_khz =
 			data->mclk_latency_table.entries[i].frequency =
-			dpm_table->dpm_levels[i].value * 100;
+			dpm_table->dpm_levels[i].value * 1000;
 		clocks->data[i].latency_in_us =
 			data->mclk_latency_table.entries[i].latency =
 			vega20_get_mem_latency(hwmgr, dpm_table->dpm_levels[i].value);
@@ -2426,7 +2425,7 @@ static int vega20_get_dcefclocks(struct pp_hwmgr *hwmgr,
 
 	for (i = 0; i < count; i++) {
 		clocks->data[i].clocks_in_khz =
-			dpm_table->dpm_levels[i].value * 100;
+			dpm_table->dpm_levels[i].value * 1000;
 		clocks->data[i].latency_in_us = 0;
 	}
 
@@ -2449,7 +2448,7 @@ static int vega20_get_socclocks(struct pp_hwmgr *hwmgr,
 
 	for (i = 0; i < count; i++) {
 		clocks->data[i].clocks_in_khz =
-			dpm_table->dpm_levels[i].value * 100;
+			dpm_table->dpm_levels[i].value * 1000;
 		clocks->data[i].latency_in_us = 0;
 	}
 
@@ -2600,11 +2599,11 @@ static int vega20_odn_edit_dpm_table(struct pp_hwmgr *hwmgr,
 				return -EINVAL;
 			}
 
-			if (input_clk < clocks.data[0].clocks_in_khz / 100 ||
+			if (input_clk < clocks.data[0].clocks_in_khz / 1000 ||
 			    input_clk > od8_settings[OD8_SETTING_UCLK_FMAX].max_value) {
 				pr_info("clock freq %d is not within allowed range [%d - %d]\n",
 					input_clk,
-					clocks.data[0].clocks_in_khz / 100,
+					clocks.data[0].clocks_in_khz / 1000,
 					od8_settings[OD8_SETTING_UCLK_FMAX].max_value);
 				return -EINVAL;
 			}
@@ -2756,7 +2755,7 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr,
 
 		for (i = 0; i < clocks.num_levels; i++)
 			size += sprintf(buf + size, "%d: %uMhz %s\n",
-				i, clocks.data[i].clocks_in_khz / 100,
+				i, clocks.data[i].clocks_in_khz / 1000,
 				(clocks.data[i].clocks_in_khz == now) ? "*" : "");
 		break;
 
@@ -2773,7 +2772,7 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr,
 
 		for (i = 0; i < clocks.num_levels; i++)
 			size += sprintf(buf + size, "%d: %uMhz %s\n",
-				i, clocks.data[i].clocks_in_khz / 100,
+				i, clocks.data[i].clocks_in_khz / 1000,
 				(clocks.data[i].clocks_in_khz == now) ? "*" : "");
 		break;
 
@@ -2838,7 +2837,7 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr,
 					return ret);
 
 			size += sprintf(buf + size, "MCLK: %7uMhz %10uMhz\n",
-				clocks.data[0].clocks_in_khz / 100,
+				clocks.data[0].clocks_in_khz / 1000,
 				od8_settings[OD8_SETTING_UCLK_FMAX].max_value);
 		}
 
-- 
GitLab


From 3732eb0683c17113201cd29fdefd7a58b1acfa7f Mon Sep 17 00:00:00 2001
From: Evan Quan <evan.quan@amd.com>
Date: Wed, 24 Oct 2018 12:57:56 +0800
Subject: [PATCH 0210/1890] drm/amd/powerplay: commonize the API for retrieving
 current clocks

So that it can be shared between all clocks.

Signed-off-by: Evan Quan <evan.quan@amd.com>
Reviewed-by: Feifei Xu<Feifei.Xu@amd.com>
Reviewed-by: Rex Zhu <Rex.Zhu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../drm/amd/powerplay/hwmgr/vega20_hwmgr.c    | 44 +++++++------------
 1 file changed, 15 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
index 8a1ee9ce73863..57143d51e3eed 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
@@ -1875,38 +1875,20 @@ static int vega20_get_gpu_power(struct pp_hwmgr *hwmgr,
 	return ret;
 }
 
-static int vega20_get_current_gfx_clk_freq(struct pp_hwmgr *hwmgr, uint32_t *gfx_freq)
+static int vega20_get_current_clk_freq(struct pp_hwmgr *hwmgr,
+		PPCLK_e clk_id, uint32_t *clk_freq)
 {
-	uint32_t gfx_clk = 0;
 	int ret = 0;
 
-	*gfx_freq = 0;
+	*clk_freq = 0;
 
 	PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc_with_parameter(hwmgr,
-			PPSMC_MSG_GetDpmClockFreq, (PPCLK_GFXCLK << 16))) == 0,
-			"[GetCurrentGfxClkFreq] Attempt to get Current GFXCLK Frequency Failed!",
+			PPSMC_MSG_GetDpmClockFreq, (clk_id << 16))) == 0,
+			"[GetCurrentClkFreq] Attempt to get Current Frequency Failed!",
 			return ret);
-	gfx_clk = smum_get_argument(hwmgr);
+	*clk_freq = smum_get_argument(hwmgr);
 
-	*gfx_freq = gfx_clk * 100;
-
-	return 0;
-}
-
-static int vega20_get_current_mclk_freq(struct pp_hwmgr *hwmgr, uint32_t *mclk_freq)
-{
-	uint32_t mem_clk = 0;
-	int ret = 0;
-
-	*mclk_freq = 0;
-
-	PP_ASSERT_WITH_CODE((ret = smum_send_msg_to_smc_with_parameter(hwmgr,
-			PPSMC_MSG_GetDpmClockFreq, (PPCLK_UCLK << 16))) == 0,
-			"[GetCurrentMClkFreq] Attempt to get Current MCLK Frequency Failed!",
-			return ret);
-	mem_clk = smum_get_argument(hwmgr);
-
-	*mclk_freq = mem_clk * 100;
+	*clk_freq = *clk_freq * 100;
 
 	return 0;
 }
@@ -1937,12 +1919,16 @@ static int vega20_read_sensor(struct pp_hwmgr *hwmgr, int idx,
 
 	switch (idx) {
 	case AMDGPU_PP_SENSOR_GFX_SCLK:
-		ret = vega20_get_current_gfx_clk_freq(hwmgr, (uint32_t *)value);
+		ret = vega20_get_current_clk_freq(hwmgr,
+				PPCLK_GFXCLK,
+				(uint32_t *)value);
 		if (!ret)
 			*size = 4;
 		break;
 	case AMDGPU_PP_SENSOR_GFX_MCLK:
-		ret = vega20_get_current_mclk_freq(hwmgr, (uint32_t *)value);
+		ret = vega20_get_current_clk_freq(hwmgr,
+				PPCLK_UCLK,
+				(uint32_t *)value);
 		if (!ret)
 			*size = 4;
 		break;
@@ -2743,7 +2729,7 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr,
 
 	switch (type) {
 	case PP_SCLK:
-		ret = vega20_get_current_gfx_clk_freq(hwmgr, &now);
+		ret = vega20_get_current_clk_freq(hwmgr, PPCLK_GFXCLK, &now);
 		PP_ASSERT_WITH_CODE(!ret,
 				"Attempt to get current gfx clk Failed!",
 				return ret);
@@ -2760,7 +2746,7 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr,
 		break;
 
 	case PP_MCLK:
-		ret = vega20_get_current_mclk_freq(hwmgr, &now);
+		ret = vega20_get_current_clk_freq(hwmgr, PPCLK_UCLK, &now);
 		PP_ASSERT_WITH_CODE(!ret,
 				"Attempt to get current mclk freq Failed!",
 				return ret);
-- 
GitLab


From 0af5c656fdb797f74ee57414e0c07cd57406d0c3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Thu, 18 Oct 2018 14:29:28 +0200
Subject: [PATCH 0211/1890] drm/amdgpu: fix amdgpu_vm_fini
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We should not remove mappings in rbtree_postorder_for_each_entry_safe
because that rebalances the tree.

Signed-off-by: Christian KÃ¶nig <christian.koenig@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 6904d794d60a7..db0cbf8d219d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -3234,8 +3234,10 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	}
 	rbtree_postorder_for_each_entry_safe(mapping, tmp,
 					     &vm->va.rb_root, rb) {
+		/* Don't remove the mapping here, we don't want to trigger a
+		 * rebalance and the tree is about to be destroyed anyway.
+		 */
 		list_del(&mapping->list);
-		amdgpu_vm_it_remove(mapping, &vm->va);
 		kfree(mapping);
 	}
 	list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
-- 
GitLab


From ef5febae1543f35a45f01614123e829d77326d0f Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Fri, 19 Oct 2018 13:37:46 +0200
Subject: [PATCH 0212/1890] s390/decompressor: add missing FORCE to build
 targets

According to Documentation/kbuild/makefiles.txt all build targets
using if_changed should use FORCE as well. Add missing FORCE to make
sure vmlinux decompressor targets are rebuild properly when not just
immediate prerequisites have changed but also when build command differs.

Reviewed-by: Philipp Rudo <prudo@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/boot/compressed/Makefile | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile
index 593039620487a..92b5487b0556d 100644
--- a/arch/s390/boot/compressed/Makefile
+++ b/arch/s390/boot/compressed/Makefile
@@ -22,7 +22,7 @@ OBJCOPYFLAGS :=
 OBJECTS := $(addprefix $(obj)/,$(obj-y))
 
 LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup -T
-$(obj)/vmlinux: $(obj)/vmlinux.lds $(objtree)/arch/s390/boot/startup.a $(OBJECTS)
+$(obj)/vmlinux: $(obj)/vmlinux.lds $(objtree)/arch/s390/boot/startup.a $(OBJECTS) FORCE
 	$(call if_changed,ld)
 
 OBJCOPYFLAGS_info.bin := -O binary --only-section=.vmlinux.info
@@ -46,17 +46,17 @@ suffix-$(CONFIG_KERNEL_LZMA)  := .lzma
 suffix-$(CONFIG_KERNEL_LZO)  := .lzo
 suffix-$(CONFIG_KERNEL_XZ)  := .xz
 
-$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y)
+$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) FORCE
 	$(call if_changed,gzip)
-$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y)
+$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) FORCE
 	$(call if_changed,bzip2)
-$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y)
+$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y) FORCE
 	$(call if_changed,lz4)
-$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y)
+$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) FORCE
 	$(call if_changed,lzma)
-$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y)
+$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE
 	$(call if_changed,lzo)
-$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y)
+$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) FORCE
 	$(call if_changed,xzkern)
 
 OBJCOPYFLAGS_piggy.o := -I binary -O elf64-s390 -B s390:64-bit --rename-section .data=.vmlinux.bin.compressed
-- 
GitLab


From b44b136a3773d8a9c7853f8df716bd1483613cbb Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Fri, 19 Oct 2018 15:37:01 +0200
Subject: [PATCH 0213/1890] s390/vdso: add missing FORCE to build targets

According to Documentation/kbuild/makefiles.txt all build targets using
if_changed should use FORCE as well. Add missing FORCE to make sure
vdso targets are rebuild properly when not just immediate prerequisites
have changed but also when build command differs.

Reviewed-by: Philipp Rudo <prudo@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/vdso32/Makefile | 6 +++---
 arch/s390/kernel/vdso64/Makefile | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile
index eb8aebea3ea7b..e76309fbbcb3b 100644
--- a/arch/s390/kernel/vdso32/Makefile
+++ b/arch/s390/kernel/vdso32/Makefile
@@ -37,7 +37,7 @@ KASAN_SANITIZE := n
 $(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
 
 # link rule for the .so file, .lds has to be first
-$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32)
+$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) FORCE
 	$(call if_changed,vdso32ld)
 
 # strip rule for the .so file
@@ -46,12 +46,12 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
 	$(call if_changed,objcopy)
 
 # assembly rules for the .S files
-$(obj-vdso32): %.o: %.S
+$(obj-vdso32): %.o: %.S FORCE
 	$(call if_changed_dep,vdso32as)
 
 # actual build commands
 quiet_cmd_vdso32ld = VDSO32L $@
-      cmd_vdso32ld = $(CC) $(c_flags) -Wl,-T $^ -o $@
+      cmd_vdso32ld = $(CC) $(c_flags) -Wl,-T $(filter %.lds %.o,$^) -o $@
 quiet_cmd_vdso32as = VDSO32A $@
       cmd_vdso32as = $(CC) $(a_flags) -c -o $@ $<
 
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
index a22b2cf86eec9..f849ac61c5da0 100644
--- a/arch/s390/kernel/vdso64/Makefile
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -37,7 +37,7 @@ KASAN_SANITIZE := n
 $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so
 
 # link rule for the .so file, .lds has to be first
-$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64)
+$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) FORCE
 	$(call if_changed,vdso64ld)
 
 # strip rule for the .so file
@@ -46,12 +46,12 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
 	$(call if_changed,objcopy)
 
 # assembly rules for the .S files
-$(obj-vdso64): %.o: %.S
+$(obj-vdso64): %.o: %.S FORCE
 	$(call if_changed_dep,vdso64as)
 
 # actual build commands
 quiet_cmd_vdso64ld = VDSO64L $@
-      cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@
+      cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $(filter %.lds %.o,$^) -o $@
 quiet_cmd_vdso64as = VDSO64A $@
       cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $<
 
-- 
GitLab


From 5a2e1853d68904c4b26706dba2884cbeb77bc3ee Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Wed, 17 Oct 2018 13:59:46 +0200
Subject: [PATCH 0214/1890] s390: avoid vmlinux segments overlap

Currently .vmlinux.info section of uncompressed vmlinux elf image is
included into the data segment and load address specified as 0. That
extends data segment to address 0 and makes "text" and "data" segments
overlap.
Program Headers:
  Type           Offset             VirtAddr           PhysAddr
                 FileSiz            MemSiz              Flags  Align
  LOAD           0x0000000000001000 0x0000000000100000 0x0000000000100000
                 0x0000000000ead03c 0x0000000000ead03c  R E    0x1000
  LOAD           0x0000000000eaf000 0x0000000000000000 0x0000000000000000
                 0x0000000001a13400 0x000000000233b520  RWE    0x1000
  NOTE           0x0000000000eae000 0x0000000000fad000 0x0000000000fad000
                 0x000000000000003c 0x000000000000003c         0x4

 Section to Segment mapping:
  Segment Sections...
   00     .text .notes
   01     .rodata __ksymtab __ksymtab_gpl __ksymtab_strings __param
   __modver .data..ro_after_init __ex_table .data __bug_table .init.text
   .exit.text .exit.data .altinstructions .altinstr_replacement
   .nospec_call_table .nospec_return_table .boot.data .init.data
   .data..percpu .bss .vmlinux.info
   02     .notes

Later when vmlinux.bin is produced from vmlinux, .vmlinux.info section
is removed. But elf vmlinux file, even though it is not bootable anymore,
used for debugging and loadable segments overlap should be avoided.

Utilize special ":NONE" phdr specification to avoid adding .vmlinux.info
into loadable data segment. Also set .vmlinux.info section type to INFO,
which allows to get a not-loadable info CONTENTS section.

Since minimal supported version of binutils 2.20 does not have
--dump-section objcopy option, make .vmlinux.info section loadable during
info.bin creation to get actual section contents.

Reported-by: Philipp Rudo <prudo@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/boot/compressed/Makefile | 2 +-
 arch/s390/kernel/vmlinux.lds.S     | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile
index 92b5487b0556d..b1bdd15e3429f 100644
--- a/arch/s390/boot/compressed/Makefile
+++ b/arch/s390/boot/compressed/Makefile
@@ -25,7 +25,7 @@ LDFLAGS_vmlinux := --oformat $(LD_BFD) -e startup -T
 $(obj)/vmlinux: $(obj)/vmlinux.lds $(objtree)/arch/s390/boot/startup.a $(OBJECTS) FORCE
 	$(call if_changed,ld)
 
-OBJCOPYFLAGS_info.bin := -O binary --only-section=.vmlinux.info
+OBJCOPYFLAGS_info.bin := -O binary --only-section=.vmlinux.info --set-section-flags .vmlinux.info=load
 $(obj)/info.bin: vmlinux FORCE
 	$(call if_changed,objcopy)
 
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 21eb7407d51ba..8429ab0797157 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -154,14 +154,14 @@ SECTIONS
 	 * uncompressed image info used by the decompressor
 	 * it should match struct vmlinux_info
 	 */
-	.vmlinux.info 0 : {
+	.vmlinux.info 0 (INFO) : {
 		QUAD(_stext)					/* default_lma */
 		QUAD(startup_continue)				/* entry */
 		QUAD(__bss_start - _stext)			/* image_size */
 		QUAD(__bss_stop - __bss_start)			/* bss_size */
 		QUAD(__boot_data_start)				/* bootdata_off */
 		QUAD(__boot_data_end - __boot_data_start)	/* bootdata_size */
-	}
+	} :NONE
 
 	/* Debugging sections.	*/
 	STABS_DEBUG
-- 
GitLab


From 70025f84e5b79627a6739533c4fe7cef5b605886 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 9 Oct 2018 17:46:51 +0100
Subject: [PATCH 0215/1890] KEYS: Provide key type operations for asymmetric
 key ops [ver #2]

Provide five new operations in the key_type struct that can be used to
provide access to asymmetric key operations.  These will be implemented for
the asymmetric key type in a later patch and may refer to a key retained in
RAM by the kernel or a key retained in crypto hardware.

     int (*asym_query)(const struct kernel_pkey_params *params,
		       struct kernel_pkey_query *info);
     int (*asym_eds_op)(struct kernel_pkey_params *params,
			const void *in, void *out);
     int (*asym_verify_signature)(struct kernel_pkey_params *params,
			          const void *in, const void *in2);

Since encrypt, decrypt and sign are identical in their interfaces, they're
rolled together in the asym_eds_op() operation and there's an operation ID
in the params argument to distinguish them.

Verify is different in that we supply the data and the signature instead
and get an error value (or 0) as the only result on the expectation that
this may well be how a hardware crypto device may work.

Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Marcel Holtmann <marcel@holtmann.org>
Reviewed-by: Marcel Holtmann <marcel@holtmann.org>
Reviewed-by: Denis Kenzior <denkenz@gmail.com>
Tested-by: Denis Kenzior <denkenz@gmail.com>
Signed-off-by: James Morris <james.morris@microsoft.com>
---
 Documentation/security/keys/core.rst | 106 +++++++++++++++++++++++++++
 include/linux/key-type.h             |  11 +++
 include/linux/keyctl.h               |  46 ++++++++++++
 include/uapi/linux/keyctl.h          |   5 ++
 4 files changed, 168 insertions(+)
 create mode 100644 include/linux/keyctl.h

diff --git a/Documentation/security/keys/core.rst b/Documentation/security/keys/core.rst
index 9ce7256c6edba..c144978479d58 100644
--- a/Documentation/security/keys/core.rst
+++ b/Documentation/security/keys/core.rst
@@ -1483,6 +1483,112 @@ The structure has a number of fields, some of which are mandatory:
      attempted key link operation. If there is no match, -EINVAL is returned.
 
 
+  *  ``int (*asym_eds_op)(struct kernel_pkey_params *params,
+			  const void *in, void *out);``
+     ``int (*asym_verify_signature)(struct kernel_pkey_params *params,
+				    const void *in, const void *in2);``
+
+     These methods are optional.  If provided the first allows a key to be
+     used to encrypt, decrypt or sign a blob of data, and the second allows a
+     key to verify a signature.
+
+     In all cases, the following information is provided in the params block::
+
+	struct kernel_pkey_params {
+		struct key	*key;
+		const char	*encoding;
+		const char	*hash_algo;
+		char		*info;
+		__u32		in_len;
+		union {
+			__u32	out_len;
+			__u32	in2_len;
+		};
+		enum kernel_pkey_operation op : 8;
+	};
+
+     This includes the key to be used; a string indicating the encoding to use
+     (for instance, "pkcs1" may be used with an RSA key to indicate
+     RSASSA-PKCS1-v1.5 or RSAES-PKCS1-v1.5 encoding or "raw" if no encoding);
+     the name of the hash algorithm used to generate the data for a signature
+     (if appropriate); the sizes of the input and output (or second input)
+     buffers; and the ID of the operation to be performed.
+
+     For a given operation ID, the input and output buffers are used as
+     follows::
+
+	Operation ID		in,in_len	out,out_len	in2,in2_len
+	=======================	===============	===============	===============
+	kernel_pkey_encrypt	Raw data	Encrypted data	-
+	kernel_pkey_decrypt	Encrypted data	Raw data	-
+	kernel_pkey_sign	Raw data	Signature	-
+	kernel_pkey_verify	Raw data	-		Signature
+
+     asym_eds_op() deals with encryption, decryption and signature creation as
+     specified by params->op.  Note that params->op is also set for
+     asym_verify_signature().
+
+     Encrypting and signature creation both take raw data in the input buffer
+     and return the encrypted result in the output buffer.  Padding may have
+     been added if an encoding was set.  In the case of signature creation,
+     depending on the encoding, the padding created may need to indicate the
+     digest algorithm - the name of which should be supplied in hash_algo.
+
+     Decryption takes encrypted data in the input buffer and returns the raw
+     data in the output buffer.  Padding will get checked and stripped off if
+     an encoding was set.
+
+     Verification takes raw data in the input buffer and the signature in the
+     second input buffer and checks that the one matches the other.  Padding
+     will be validated.  Depending on the encoding, the digest algorithm used
+     to generate the raw data may need to be indicated in hash_algo.
+
+     If successful, asym_eds_op() should return the number of bytes written
+     into the output buffer.  asym_verify_signature() should return 0.
+
+     A variety of errors may be returned, including EOPNOTSUPP if the operation
+     is not supported; EKEYREJECTED if verification fails; ENOPKG if the
+     required crypto isn't available.
+
+
+  *  ``int (*asym_query)(const struct kernel_pkey_params *params,
+			 struct kernel_pkey_query *info);``
+
+     This method is optional.  If provided it allows information about the
+     public or asymmetric key held in the key to be determined.
+
+     The parameter block is as for asym_eds_op() and co. but in_len and out_len
+     are unused.  The encoding and hash_algo fields should be used to reduce
+     the returned buffer/data sizes as appropriate.
+
+     If successful, the following information is filled in::
+
+	struct kernel_pkey_query {
+		__u32		supported_ops;
+		__u32		key_size;
+		__u16		max_data_size;
+		__u16		max_sig_size;
+		__u16		max_enc_size;
+		__u16		max_dec_size;
+	};
+
+     The supported_ops field will contain a bitmask indicating what operations
+     are supported by the key, including encryption of a blob, decryption of a
+     blob, signing a blob and verifying the signature on a blob.  The following
+     constants are defined for this::
+
+	KEYCTL_SUPPORTS_{ENCRYPT,DECRYPT,SIGN,VERIFY}
+
+     The key_size field is the size of the key in bits.  max_data_size and
+     max_sig_size are the maximum raw data and signature sizes for creation and
+     verification of a signature; max_enc_size and max_dec_size are the maximum
+     raw data and signature sizes for encryption and decryption.  The
+     max_*_size fields are measured in bytes.
+
+     If successful, 0 will be returned.  If the key doesn't support this,
+     EOPNOTSUPP will be returned.
+
+
 Request-Key Callback Service
 ============================
 
diff --git a/include/linux/key-type.h b/include/linux/key-type.h
index 05d8fb5a06c49..bc9af551fc838 100644
--- a/include/linux/key-type.h
+++ b/include/linux/key-type.h
@@ -17,6 +17,9 @@
 
 #ifdef CONFIG_KEYS
 
+struct kernel_pkey_query;
+struct kernel_pkey_params;
+
 /*
  * key under-construction record
  * - passed to the request_key actor if supplied
@@ -155,6 +158,14 @@ struct key_type {
 	 */
 	struct key_restriction *(*lookup_restriction)(const char *params);
 
+	/* Asymmetric key accessor functions. */
+	int (*asym_query)(const struct kernel_pkey_params *params,
+			  struct kernel_pkey_query *info);
+	int (*asym_eds_op)(struct kernel_pkey_params *params,
+			   const void *in, void *out);
+	int (*asym_verify_signature)(struct kernel_pkey_params *params,
+				     const void *in, const void *in2);
+
 	/* internal fields */
 	struct list_head	link;		/* link in types list */
 	struct lock_class_key	lock_class;	/* key->sem lock class */
diff --git a/include/linux/keyctl.h b/include/linux/keyctl.h
new file mode 100644
index 0000000000000..c7c48c79ce0e9
--- /dev/null
+++ b/include/linux/keyctl.h
@@ -0,0 +1,46 @@
+/* keyctl kernel bits
+ *
+ * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef __LINUX_KEYCTL_H
+#define __LINUX_KEYCTL_H
+
+#include <uapi/linux/keyctl.h>
+
+struct kernel_pkey_query {
+	__u32		supported_ops;	/* Which ops are supported */
+	__u32		key_size;	/* Size of the key in bits */
+	__u16		max_data_size;	/* Maximum size of raw data to sign in bytes */
+	__u16		max_sig_size;	/* Maximum size of signature in bytes */
+	__u16		max_enc_size;	/* Maximum size of encrypted blob in bytes */
+	__u16		max_dec_size;	/* Maximum size of decrypted blob in bytes */
+};
+
+enum kernel_pkey_operation {
+	kernel_pkey_encrypt,
+	kernel_pkey_decrypt,
+	kernel_pkey_sign,
+	kernel_pkey_verify,
+};
+
+struct kernel_pkey_params {
+	struct key	*key;
+	const char	*encoding;	/* Encoding (eg. "oaep" or "raw" for none) */
+	const char	*hash_algo;	/* Digest algorithm used (eg. "sha1") or NULL if N/A */
+	char		*info;		/* Modified info string to be released later */
+	__u32		in_len;		/* Input data size */
+	union {
+		__u32	out_len;	/* Output buffer size (enc/dec/sign) */
+		__u32	in2_len;	/* 2nd input data size (verify) */
+	};
+	enum kernel_pkey_operation op : 8;
+};
+
+#endif /* __LINUX_KEYCTL_H */
diff --git a/include/uapi/linux/keyctl.h b/include/uapi/linux/keyctl.h
index 0f3cb13db8e93..1d1e9f2877afb 100644
--- a/include/uapi/linux/keyctl.h
+++ b/include/uapi/linux/keyctl.h
@@ -82,4 +82,9 @@ struct keyctl_kdf_params {
 	__u32 __spare[8];
 };
 
+#define KEYCTL_SUPPORTS_ENCRYPT		0x01
+#define KEYCTL_SUPPORTS_DECRYPT		0x02
+#define KEYCTL_SUPPORTS_SIGN		0x04
+#define KEYCTL_SUPPORTS_VERIFY		0x08
+
 #endif /*  _LINUX_KEYCTL_H */
-- 
GitLab


From 00d60fd3b93219ea854220f0fd264b86398cbc53 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 9 Oct 2018 17:46:59 +0100
Subject: [PATCH 0216/1890] KEYS: Provide keyctls to drive the new key type ops
 for asymmetric keys [ver #2]

Provide five keyctl functions that permit userspace to make use of the new
key type ops for accessing and driving asymmetric keys.

 (*) Query an asymmetric key.

	long keyctl(KEYCTL_PKEY_QUERY,
		    key_serial_t key, unsigned long reserved,
		    struct keyctl_pkey_query *info);

     Get information about an asymmetric key.  The information is returned
     in the keyctl_pkey_query struct:

	__u32	supported_ops;

     A bit mask of flags indicating which ops are supported.  This is
     constructed from a bitwise-OR of:

	KEYCTL_SUPPORTS_{ENCRYPT,DECRYPT,SIGN,VERIFY}

	__u32	key_size;

     The size in bits of the key.

	__u16	max_data_size;
	__u16	max_sig_size;
	__u16	max_enc_size;
	__u16	max_dec_size;

     The maximum sizes in bytes of a blob of data to be signed, a signature
     blob, a blob to be encrypted and a blob to be decrypted.

     reserved must be set to 0.  This is intended for future use to hand
     over one or more passphrases needed unlock a key.

     If successful, 0 is returned.  If the key is not an asymmetric key,
     EOPNOTSUPP is returned.

 (*) Encrypt, decrypt, sign or verify a blob using an asymmetric key.

	long keyctl(KEYCTL_PKEY_ENCRYPT,
		    const struct keyctl_pkey_params *params,
		    const char *info,
		    const void *in,
		    void *out);

	long keyctl(KEYCTL_PKEY_DECRYPT,
		    const struct keyctl_pkey_params *params,
		    const char *info,
		    const void *in,
		    void *out);

	long keyctl(KEYCTL_PKEY_SIGN,
		    const struct keyctl_pkey_params *params,
		    const char *info,
		    const void *in,
		    void *out);

	long keyctl(KEYCTL_PKEY_VERIFY,
		    const struct keyctl_pkey_params *params,
		    const char *info,
		    const void *in,
		    const void *in2);

     Use an asymmetric key to perform a public-key cryptographic operation
     a blob of data.

     The parameter block pointed to by params contains a number of integer
     values:

	__s32		key_id;
	__u32		in_len;
	__u32		out_len;
	__u32		in2_len;

     For a given operation, the in and out buffers are used as follows:

	Operation ID		in,in_len	out,out_len	in2,in2_len
	=======================	===============	===============	===========
	KEYCTL_PKEY_ENCRYPT	Raw data	Encrypted data	-
	KEYCTL_PKEY_DECRYPT	Encrypted data	Raw data	-
	KEYCTL_PKEY_SIGN	Raw data	Signature	-
	KEYCTL_PKEY_VERIFY	Raw data	-		Signature

     info is a string of key=value pairs that supply supplementary
     information.

     The __spare space in the parameter block must be set to 0.  This is
     intended, amongst other things, to allow the passing of passphrases
     required to unlock a key.

     If successful, encrypt, decrypt and sign all return the amount of data
     written into the output buffer.  Verification returns 0 on success.

Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Marcel Holtmann <marcel@holtmann.org>
Reviewed-by: Marcel Holtmann <marcel@holtmann.org>
Reviewed-by: Denis Kenzior <denkenz@gmail.com>
Tested-by: Denis Kenzior <denkenz@gmail.com>
Signed-off-by: James Morris <james.morris@microsoft.com>
---
 Documentation/security/keys/core.rst | 111 +++++++++
 include/uapi/linux/keyctl.h          |  25 +++
 security/keys/Makefile               |   1 +
 security/keys/compat.c               |  18 ++
 security/keys/internal.h             |  39 ++++
 security/keys/keyctl.c               |  24 ++
 security/keys/keyctl_pkey.c          | 323 +++++++++++++++++++++++++++
 7 files changed, 541 insertions(+)
 create mode 100644 security/keys/keyctl_pkey.c

diff --git a/Documentation/security/keys/core.rst b/Documentation/security/keys/core.rst
index c144978479d58..9521c4207f014 100644
--- a/Documentation/security/keys/core.rst
+++ b/Documentation/security/keys/core.rst
@@ -859,6 +859,7 @@ The keyctl syscall functions are:
      and either the buffer length or the OtherInfo length exceeds the
      allowed length.
 
+
   *  Restrict keyring linkage::
 
 	long keyctl(KEYCTL_RESTRICT_KEYRING, key_serial_t keyring,
@@ -890,6 +891,116 @@ The keyctl syscall functions are:
      applicable to the asymmetric key type.
 
 
+  *  Query an asymmetric key::
+
+	long keyctl(KEYCTL_PKEY_QUERY,
+		    key_serial_t key_id, unsigned long reserved,
+		    struct keyctl_pkey_query *info);
+
+     Get information about an asymmetric key.  The information is returned in
+     the keyctl_pkey_query struct::
+
+	__u32	supported_ops;
+	__u32	key_size;
+	__u16	max_data_size;
+	__u16	max_sig_size;
+	__u16	max_enc_size;
+	__u16	max_dec_size;
+	__u32	__spare[10];
+
+     ``supported_ops`` contains a bit mask of flags indicating which ops are
+     supported.  This is constructed from a bitwise-OR of::
+
+	KEYCTL_SUPPORTS_{ENCRYPT,DECRYPT,SIGN,VERIFY}
+
+     ``key_size`` indicated the size of the key in bits.
+
+     ``max_*_size`` indicate the maximum sizes in bytes of a blob of data to be
+     signed, a signature blob, a blob to be encrypted and a blob to be
+     decrypted.
+
+     ``__spare[]`` must be set to 0.  This is intended for future use to hand
+     over one or more passphrases needed unlock a key.
+
+     If successful, 0 is returned.  If the key is not an asymmetric key,
+     EOPNOTSUPP is returned.
+
+
+  *  Encrypt, decrypt, sign or verify a blob using an asymmetric key::
+
+	long keyctl(KEYCTL_PKEY_ENCRYPT,
+		    const struct keyctl_pkey_params *params,
+		    const char *info,
+		    const void *in,
+		    void *out);
+
+	long keyctl(KEYCTL_PKEY_DECRYPT,
+		    const struct keyctl_pkey_params *params,
+		    const char *info,
+		    const void *in,
+		    void *out);
+
+	long keyctl(KEYCTL_PKEY_SIGN,
+		    const struct keyctl_pkey_params *params,
+		    const char *info,
+		    const void *in,
+		    void *out);
+
+	long keyctl(KEYCTL_PKEY_VERIFY,
+		    const struct keyctl_pkey_params *params,
+		    const char *info,
+		    const void *in,
+		    const void *in2);
+
+     Use an asymmetric key to perform a public-key cryptographic operation a
+     blob of data.  For encryption and verification, the asymmetric key may
+     only need the public parts to be available, but for decryption and signing
+     the private parts are required also.
+
+     The parameter block pointed to by params contains a number of integer
+     values::
+
+	__s32		key_id;
+	__u32		in_len;
+	__u32		out_len;
+	__u32		in2_len;
+
+     ``key_id`` is the ID of the asymmetric key to be used.  ``in_len`` and
+     ``in2_len`` indicate the amount of data in the in and in2 buffers and
+     ``out_len`` indicates the size of the out buffer as appropriate for the
+     above operations.
+
+     For a given operation, the in and out buffers are used as follows::
+
+	Operation ID		in,in_len	out,out_len	in2,in2_len
+	=======================	===============	===============	===============
+	KEYCTL_PKEY_ENCRYPT	Raw data	Encrypted data	-
+	KEYCTL_PKEY_DECRYPT	Encrypted data	Raw data	-
+	KEYCTL_PKEY_SIGN	Raw data	Signature	-
+	KEYCTL_PKEY_VERIFY	Raw data	-		Signature
+
+     ``info`` is a string of key=value pairs that supply supplementary
+     information.  These include:
+
+	``enc=<encoding>`` The encoding of the encrypted/signature blob.  This
+			can be "pkcs1" for RSASSA-PKCS1-v1.5 or
+			RSAES-PKCS1-v1.5; "pss" for "RSASSA-PSS"; "oaep" for
+			"RSAES-OAEP".  If omitted or is "raw", the raw output
+			of the encryption function is specified.
+
+	``hash=<algo>``	If the data buffer contains the output of a hash
+			function and the encoding includes some indication of
+			which hash function was used, the hash function can be
+			specified with this, eg. "hash=sha256".
+
+     The ``__spare[]`` space in the parameter block must be set to 0.  This is
+     intended, amongst other things, to allow the passing of passphrases
+     required to unlock a key.
+
+     If successful, encrypt, decrypt and sign all return the amount of data
+     written into the output buffer.  Verification returns 0 on success.
+
+
 Kernel Services
 ===============
 
diff --git a/include/uapi/linux/keyctl.h b/include/uapi/linux/keyctl.h
index 1d1e9f2877afb..f45ee0f69c0c2 100644
--- a/include/uapi/linux/keyctl.h
+++ b/include/uapi/linux/keyctl.h
@@ -61,6 +61,11 @@
 #define KEYCTL_INVALIDATE		21	/* invalidate a key */
 #define KEYCTL_GET_PERSISTENT		22	/* get a user's persistent keyring */
 #define KEYCTL_DH_COMPUTE		23	/* Compute Diffie-Hellman values */
+#define KEYCTL_PKEY_QUERY		24	/* Query public key parameters */
+#define KEYCTL_PKEY_ENCRYPT		25	/* Encrypt a blob using a public key */
+#define KEYCTL_PKEY_DECRYPT		26	/* Decrypt a blob using a public key */
+#define KEYCTL_PKEY_SIGN		27	/* Create a public key signature */
+#define KEYCTL_PKEY_VERIFY		28	/* Verify a public key signature */
 #define KEYCTL_RESTRICT_KEYRING		29	/* Restrict keys allowed to link to a keyring */
 
 /* keyctl structures */
@@ -87,4 +92,24 @@ struct keyctl_kdf_params {
 #define KEYCTL_SUPPORTS_SIGN		0x04
 #define KEYCTL_SUPPORTS_VERIFY		0x08
 
+struct keyctl_pkey_query {
+	__u32		supported_ops;	/* Which ops are supported */
+	__u32		key_size;	/* Size of the key in bits */
+	__u16		max_data_size;	/* Maximum size of raw data to sign in bytes */
+	__u16		max_sig_size;	/* Maximum size of signature in bytes */
+	__u16		max_enc_size;	/* Maximum size of encrypted blob in bytes */
+	__u16		max_dec_size;	/* Maximum size of decrypted blob in bytes */
+	__u32		__spare[10];
+};
+
+struct keyctl_pkey_params {
+	__s32		key_id;		/* Serial no. of public key to use */
+	__u32		in_len;		/* Input data size */
+	union {
+		__u32		out_len;	/* Output buffer size (encrypt/decrypt/sign) */
+		__u32		in2_len;	/* 2nd input data size (verify) */
+	};
+	__u32		__spare[7];
+};
+
 #endif /*  _LINUX_KEYCTL_H */
diff --git a/security/keys/Makefile b/security/keys/Makefile
index ef1581b337a3d..9cef54064f608 100644
--- a/security/keys/Makefile
+++ b/security/keys/Makefile
@@ -22,6 +22,7 @@ obj-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_SYSCTL) += sysctl.o
 obj-$(CONFIG_PERSISTENT_KEYRINGS) += persistent.o
 obj-$(CONFIG_KEY_DH_OPERATIONS) += dh.o
+obj-$(CONFIG_ASYMMETRIC_KEY_TYPE) += keyctl_pkey.o
 
 #
 # Key types
diff --git a/security/keys/compat.c b/security/keys/compat.c
index e87c89c0177c1..9482df601dc33 100644
--- a/security/keys/compat.c
+++ b/security/keys/compat.c
@@ -141,6 +141,24 @@ COMPAT_SYSCALL_DEFINE5(keyctl, u32, option,
 		return keyctl_restrict_keyring(arg2, compat_ptr(arg3),
 					       compat_ptr(arg4));
 
+	case KEYCTL_PKEY_QUERY:
+		if (arg3 != 0)
+			return -EINVAL;
+		return keyctl_pkey_query(arg2,
+					 compat_ptr(arg4),
+					 compat_ptr(arg5));
+
+	case KEYCTL_PKEY_ENCRYPT:
+	case KEYCTL_PKEY_DECRYPT:
+	case KEYCTL_PKEY_SIGN:
+		return keyctl_pkey_e_d_s(option,
+					 compat_ptr(arg2), compat_ptr(arg3),
+					 compat_ptr(arg4), compat_ptr(arg5));
+
+	case KEYCTL_PKEY_VERIFY:
+		return keyctl_pkey_verify(compat_ptr(arg2), compat_ptr(arg3),
+					  compat_ptr(arg4), compat_ptr(arg5));
+
 	default:
 		return -EOPNOTSUPP;
 	}
diff --git a/security/keys/internal.h b/security/keys/internal.h
index 9f8208dc0e558..74cb0ff42fedb 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -298,6 +298,45 @@ static inline long compat_keyctl_dh_compute(
 #endif
 #endif
 
+#ifdef CONFIG_ASYMMETRIC_KEY_TYPE
+extern long keyctl_pkey_query(key_serial_t,
+			      const char __user *,
+			      struct keyctl_pkey_query __user *);
+
+extern long keyctl_pkey_verify(const struct keyctl_pkey_params __user *,
+			       const char __user *,
+			       const void __user *, const void __user *);
+
+extern long keyctl_pkey_e_d_s(int,
+			      const struct keyctl_pkey_params __user *,
+			      const char __user *,
+			      const void __user *, void __user *);
+#else
+static inline long keyctl_pkey_query(key_serial_t id,
+				     const char __user *_info,
+				     struct keyctl_pkey_query __user *_res)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline long keyctl_pkey_verify(const struct keyctl_pkey_params __user *params,
+				      const char __user *_info,
+				      const void __user *_in,
+				      const void __user *_in2)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline long keyctl_pkey_e_d_s(int op,
+				     const struct keyctl_pkey_params __user *params,
+				     const char __user *_info,
+				     const void __user *_in,
+				     void __user *_out)
+{
+	return -EOPNOTSUPP;
+}
+#endif
+
 /*
  * Debugging key validation
  */
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index 1ffe60bb2845f..18619690ce77a 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -1747,6 +1747,30 @@ SYSCALL_DEFINE5(keyctl, int, option, unsigned long, arg2, unsigned long, arg3,
 					       (const char __user *) arg3,
 					       (const char __user *) arg4);
 
+	case KEYCTL_PKEY_QUERY:
+		if (arg3 != 0)
+			return -EINVAL;
+		return keyctl_pkey_query((key_serial_t)arg2,
+					 (const char __user *)arg4,
+					 (struct keyctl_pkey_query *)arg5);
+
+	case KEYCTL_PKEY_ENCRYPT:
+	case KEYCTL_PKEY_DECRYPT:
+	case KEYCTL_PKEY_SIGN:
+		return keyctl_pkey_e_d_s(
+			option,
+			(const struct keyctl_pkey_params __user *)arg2,
+			(const char __user *)arg3,
+			(const void __user *)arg4,
+			(void __user *)arg5);
+
+	case KEYCTL_PKEY_VERIFY:
+		return keyctl_pkey_verify(
+			(const struct keyctl_pkey_params __user *)arg2,
+			(const char __user *)arg3,
+			(const void __user *)arg4,
+			(const void __user *)arg5);
+
 	default:
 		return -EOPNOTSUPP;
 	}
diff --git a/security/keys/keyctl_pkey.c b/security/keys/keyctl_pkey.c
new file mode 100644
index 0000000000000..783978842f13a
--- /dev/null
+++ b/security/keys/keyctl_pkey.c
@@ -0,0 +1,323 @@
+/* Public-key operation keyctls
+ *
+ * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/key.h>
+#include <linux/keyctl.h>
+#include <linux/parser.h>
+#include <linux/uaccess.h>
+#include <keys/user-type.h>
+#include "internal.h"
+
+static void keyctl_pkey_params_free(struct kernel_pkey_params *params)
+{
+	kfree(params->info);
+	key_put(params->key);
+}
+
+enum {
+	Opt_err = -1,
+	Opt_enc,		/* "enc=<encoding>" eg. "enc=oaep" */
+	Opt_hash,		/* "hash=<digest-name>" eg. "hash=sha1" */
+};
+
+static const match_table_t param_keys = {
+	{ Opt_enc,	"enc=%s" },
+	{ Opt_hash,	"hash=%s" },
+	{ Opt_err,	NULL }
+};
+
+/*
+ * Parse the information string which consists of key=val pairs.
+ */
+static int keyctl_pkey_params_parse(struct kernel_pkey_params *params)
+{
+	unsigned long token_mask = 0;
+	substring_t args[MAX_OPT_ARGS];
+	char *c = params->info, *p, *q;
+	int token;
+
+	while ((p = strsep(&c, " \t"))) {
+		if (*p == '\0' || *p == ' ' || *p == '\t')
+			continue;
+		token = match_token(p, param_keys, args);
+		if (__test_and_set_bit(token, &token_mask))
+			return -EINVAL;
+		q = args[0].from;
+		if (!q[0])
+			return -EINVAL;
+
+		switch (token) {
+		case Opt_enc:
+			params->encoding = q;
+			break;
+
+		case Opt_hash:
+			params->hash_algo = q;
+			break;
+
+		default:
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * Interpret parameters.  Callers must always call the free function
+ * on params, even if an error is returned.
+ */
+static int keyctl_pkey_params_get(key_serial_t id,
+				  const char __user *_info,
+				  struct kernel_pkey_params *params)
+{
+	key_ref_t key_ref;
+	void *p;
+	int ret;
+
+	memset(params, 0, sizeof(*params));
+	params->encoding = "raw";
+
+	p = strndup_user(_info, PAGE_SIZE);
+	if (IS_ERR(p))
+		return PTR_ERR(p);
+	params->info = p;
+
+	ret = keyctl_pkey_params_parse(params);
+	if (ret < 0)
+		return ret;
+
+	key_ref = lookup_user_key(id, 0, KEY_NEED_SEARCH);
+	if (IS_ERR(key_ref))
+		return PTR_ERR(key_ref);
+	params->key = key_ref_to_ptr(key_ref);
+
+	if (!params->key->type->asym_query)
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
+/*
+ * Get parameters from userspace.  Callers must always call the free function
+ * on params, even if an error is returned.
+ */
+static int keyctl_pkey_params_get_2(const struct keyctl_pkey_params __user *_params,
+				    const char __user *_info,
+				    int op,
+				    struct kernel_pkey_params *params)
+{
+	struct keyctl_pkey_params uparams;
+	struct kernel_pkey_query info;
+	int ret;
+
+	memset(params, 0, sizeof(*params));
+	params->encoding = "raw";
+
+	if (copy_from_user(&uparams, _params, sizeof(uparams)) != 0)
+		return -EFAULT;
+
+	ret = keyctl_pkey_params_get(uparams.key_id, _info, params);
+	if (ret < 0)
+		return ret;
+
+	ret = params->key->type->asym_query(params, &info);
+	if (ret < 0)
+		return ret;
+
+	switch (op) {
+	case KEYCTL_PKEY_ENCRYPT:
+	case KEYCTL_PKEY_DECRYPT:
+		if (uparams.in_len  > info.max_enc_size ||
+		    uparams.out_len > info.max_dec_size)
+			return -EINVAL;
+		break;
+	case KEYCTL_PKEY_SIGN:
+	case KEYCTL_PKEY_VERIFY:
+		if (uparams.in_len  > info.max_sig_size ||
+		    uparams.out_len > info.max_data_size)
+			return -EINVAL;
+		break;
+	default:
+		BUG();
+	}
+
+	params->in_len  = uparams.in_len;
+	params->out_len = uparams.out_len;
+	return 0;
+}
+
+/*
+ * Query information about an asymmetric key.
+ */
+long keyctl_pkey_query(key_serial_t id,
+		       const char __user *_info,
+		       struct keyctl_pkey_query __user *_res)
+{
+	struct kernel_pkey_params params;
+	struct kernel_pkey_query res;
+	long ret;
+
+	memset(&params, 0, sizeof(params));
+
+	ret = keyctl_pkey_params_get(id, _info, &params);
+	if (ret < 0)
+		goto error;
+
+	ret = params.key->type->asym_query(&params, &res);
+	if (ret < 0)
+		goto error;
+
+	ret = -EFAULT;
+	if (copy_to_user(_res, &res, sizeof(res)) == 0 &&
+	    clear_user(_res->__spare, sizeof(_res->__spare)) == 0)
+		ret = 0;
+
+error:
+	keyctl_pkey_params_free(&params);
+	return ret;
+}
+
+/*
+ * Encrypt/decrypt/sign
+ *
+ * Encrypt data, decrypt data or sign data using a public key.
+ *
+ * _info is a string of supplementary information in key=val format.  For
+ * instance, it might contain:
+ *
+ *	"enc=pkcs1 hash=sha256"
+ *
+ * where enc= specifies the encoding and hash= selects the OID to go in that
+ * particular encoding if required.  If enc= isn't supplied, it's assumed that
+ * the caller is supplying raw values.
+ *
+ * If successful, the amount of data written into the output buffer is
+ * returned.
+ */
+long keyctl_pkey_e_d_s(int op,
+		       const struct keyctl_pkey_params __user *_params,
+		       const char __user *_info,
+		       const void __user *_in,
+		       void __user *_out)
+{
+	struct kernel_pkey_params params;
+	void *in, *out;
+	long ret;
+
+	ret = keyctl_pkey_params_get_2(_params, _info, op, &params);
+	if (ret < 0)
+		goto error_params;
+
+	ret = -EOPNOTSUPP;
+	if (!params.key->type->asym_eds_op)
+		goto error_params;
+
+	switch (op) {
+	case KEYCTL_PKEY_ENCRYPT:
+		params.op = kernel_pkey_encrypt;
+		break;
+	case KEYCTL_PKEY_DECRYPT:
+		params.op = kernel_pkey_decrypt;
+		break;
+	case KEYCTL_PKEY_SIGN:
+		params.op = kernel_pkey_sign;
+		break;
+	default:
+		BUG();
+	}
+
+	in = memdup_user(_in, params.in_len);
+	if (IS_ERR(in)) {
+		ret = PTR_ERR(in);
+		goto error_params;
+	}
+
+	ret = -ENOMEM;
+	out = kmalloc(params.out_len, GFP_KERNEL);
+	if (!out)
+		goto error_in;
+
+	ret = params.key->type->asym_eds_op(&params, in, out);
+	if (ret < 0)
+		goto error_out;
+
+	if (copy_to_user(_out, out, ret) != 0)
+		ret = -EFAULT;
+
+error_out:
+	kfree(out);
+error_in:
+	kfree(in);
+error_params:
+	keyctl_pkey_params_free(&params);
+	return ret;
+}
+
+/*
+ * Verify a signature.
+ *
+ * Verify a public key signature using the given key, or if not given, search
+ * for a matching key.
+ *
+ * _info is a string of supplementary information in key=val format.  For
+ * instance, it might contain:
+ *
+ *	"enc=pkcs1 hash=sha256"
+ *
+ * where enc= specifies the signature blob encoding and hash= selects the OID
+ * to go in that particular encoding.  If enc= isn't supplied, it's assumed
+ * that the caller is supplying raw values.
+ *
+ * If successful, 0 is returned.
+ */
+long keyctl_pkey_verify(const struct keyctl_pkey_params __user *_params,
+			const char __user *_info,
+			const void __user *_in,
+			const void __user *_in2)
+{
+	struct kernel_pkey_params params;
+	void *in, *in2;
+	long ret;
+
+	ret = keyctl_pkey_params_get_2(_params, _info, KEYCTL_PKEY_VERIFY,
+				       &params);
+	if (ret < 0)
+		goto error_params;
+
+	ret = -EOPNOTSUPP;
+	if (!params.key->type->asym_verify_signature)
+		goto error_params;
+
+	in = memdup_user(_in, params.in_len);
+	if (IS_ERR(in)) {
+		ret = PTR_ERR(in);
+		goto error_params;
+	}
+
+	in2 = memdup_user(_in2, params.in2_len);
+	if (IS_ERR(in2)) {
+		ret = PTR_ERR(in2);
+		goto error_in;
+	}
+
+	params.op = kernel_pkey_verify;
+	ret = params.key->type->asym_verify_signature(&params, in, in2);
+
+	kfree(in2);
+error_in:
+	kfree(in);
+error_params:
+	keyctl_pkey_params_free(&params);
+	return ret;
+}
-- 
GitLab


From 5a30771832aab228e0863e414f9182f86797429e Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 9 Oct 2018 17:47:07 +0100
Subject: [PATCH 0217/1890] KEYS: Provide missing asymmetric key subops for new
 key type ops [ver #2]

Provide the missing asymmetric key subops for new key type ops.  This
include query, encrypt, decrypt and create signature.  Verify signature
already exists.  Also provided are accessor functions for this:

	int query_asymmetric_key(const struct key *key,
				 struct kernel_pkey_query *info);

	int encrypt_blob(struct kernel_pkey_params *params,
			 const void *data, void *enc);
	int decrypt_blob(struct kernel_pkey_params *params,
			 const void *enc, void *data);
	int create_signature(struct kernel_pkey_params *params,
			     const void *data, void *enc);

The public_key_signature struct gains an encoding field to carry the
encoding for verify_signature().

Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Marcel Holtmann <marcel@holtmann.org>
Reviewed-by: Marcel Holtmann <marcel@holtmann.org>
Reviewed-by: Denis Kenzior <denkenz@gmail.com>
Tested-by: Denis Kenzior <denkenz@gmail.com>
Signed-off-by: James Morris <james.morris@microsoft.com>
---
 Documentation/crypto/asymmetric-keys.txt | 24 ++++--
 crypto/asymmetric_keys/asymmetric_keys.h |  3 +
 crypto/asymmetric_keys/asymmetric_type.c | 43 +++++++++++
 crypto/asymmetric_keys/signature.c       | 95 ++++++++++++++++++++++++
 include/crypto/public_key.h              | 13 +++-
 include/keys/asymmetric-subtype.h        |  9 +++
 6 files changed, 180 insertions(+), 7 deletions(-)

diff --git a/Documentation/crypto/asymmetric-keys.txt b/Documentation/crypto/asymmetric-keys.txt
index 5969bf42562a8..deb656ef008b2 100644
--- a/Documentation/crypto/asymmetric-keys.txt
+++ b/Documentation/crypto/asymmetric-keys.txt
@@ -183,6 +183,10 @@ and looks like the following:
 
 		void (*describe)(const struct key *key, struct seq_file *m);
 		void (*destroy)(void *payload);
+		int (*query)(const struct kernel_pkey_params *params,
+			     struct kernel_pkey_query *info);
+		int (*eds_op)(struct kernel_pkey_params *params,
+			      const void *in, void *out);
 		int (*verify_signature)(const struct key *key,
 					const struct public_key_signature *sig);
 	};
@@ -207,12 +211,22 @@ There are a number of operations defined by the subtype:
      asymmetric key will look after freeing the fingerprint and releasing the
      reference on the subtype module.
 
- (3) verify_signature().
+ (3) query().
 
-     Optional.  These are the entry points for the key usage operations.
-     Currently there is only the one defined.  If not set, the caller will be
-     given -ENOTSUPP.  The subtype may do anything it likes to implement an
-     operation, including offloading to hardware.
+     Mandatory.  This is a function for querying the capabilities of a key.
+
+ (4) eds_op().
+
+     Optional.  This is the entry point for the encryption, decryption and
+     signature creation operations (which are distinguished by the operation ID
+     in the parameter struct).  The subtype may do anything it likes to
+     implement an operation, including offloading to hardware.
+
+ (5) verify_signature().
+
+     Optional.  This is the entry point for signature verification.  The
+     subtype may do anything it likes to implement an operation, including
+     offloading to hardware.
 
 
 ==========================
diff --git a/crypto/asymmetric_keys/asymmetric_keys.h b/crypto/asymmetric_keys/asymmetric_keys.h
index ca8e9ac34ce62..7be1ccf4fa9f2 100644
--- a/crypto/asymmetric_keys/asymmetric_keys.h
+++ b/crypto/asymmetric_keys/asymmetric_keys.h
@@ -16,3 +16,6 @@ extern struct asymmetric_key_id *asymmetric_key_hex_to_key_id(const char *id);
 extern int __asymmetric_key_hex_to_key_id(const char *id,
 					  struct asymmetric_key_id *match_id,
 					  size_t hexlen);
+
+extern int asymmetric_key_eds_op(struct kernel_pkey_params *params,
+				 const void *in, void *out);
diff --git a/crypto/asymmetric_keys/asymmetric_type.c b/crypto/asymmetric_keys/asymmetric_type.c
index 26539e9a8bda4..69a0788a7de5d 100644
--- a/crypto/asymmetric_keys/asymmetric_type.c
+++ b/crypto/asymmetric_keys/asymmetric_type.c
@@ -18,6 +18,7 @@
 #include <linux/slab.h>
 #include <linux/ctype.h>
 #include <keys/system_keyring.h>
+#include <keys/user-type.h>
 #include "asymmetric_keys.h"
 
 MODULE_LICENSE("GPL");
@@ -538,6 +539,45 @@ static struct key_restriction *asymmetric_lookup_restriction(
 	return ret;
 }
 
+int asymmetric_key_eds_op(struct kernel_pkey_params *params,
+			  const void *in, void *out)
+{
+	const struct asymmetric_key_subtype *subtype;
+	struct key *key = params->key;
+	int ret;
+
+	pr_devel("==>%s()\n", __func__);
+
+	if (key->type != &key_type_asymmetric)
+		return -EINVAL;
+	subtype = asymmetric_key_subtype(key);
+	if (!subtype ||
+	    !key->payload.data[0])
+		return -EINVAL;
+	if (!subtype->eds_op)
+		return -ENOTSUPP;
+
+	ret = subtype->eds_op(params, in, out);
+
+	pr_devel("<==%s() = %d\n", __func__, ret);
+	return ret;
+}
+
+static int asymmetric_key_verify_signature(struct kernel_pkey_params *params,
+					   const void *in, const void *in2)
+{
+	struct public_key_signature sig = {
+		.s_size		= params->in2_len,
+		.digest_size	= params->in_len,
+		.encoding	= params->encoding,
+		.hash_algo	= params->hash_algo,
+		.digest		= (void *)in,
+		.s		= (void *)in2,
+	};
+
+	return verify_signature(params->key, &sig);
+}
+
 struct key_type key_type_asymmetric = {
 	.name			= "asymmetric",
 	.preparse		= asymmetric_key_preparse,
@@ -548,6 +588,9 @@ struct key_type key_type_asymmetric = {
 	.destroy		= asymmetric_key_destroy,
 	.describe		= asymmetric_key_describe,
 	.lookup_restriction	= asymmetric_lookup_restriction,
+	.asym_query		= query_asymmetric_key,
+	.asym_eds_op		= asymmetric_key_eds_op,
+	.asym_verify_signature	= asymmetric_key_verify_signature,
 };
 EXPORT_SYMBOL_GPL(key_type_asymmetric);
 
diff --git a/crypto/asymmetric_keys/signature.c b/crypto/asymmetric_keys/signature.c
index 28198314bc39f..ad95a58c66427 100644
--- a/crypto/asymmetric_keys/signature.c
+++ b/crypto/asymmetric_keys/signature.c
@@ -16,7 +16,9 @@
 #include <linux/export.h>
 #include <linux/err.h>
 #include <linux/slab.h>
+#include <linux/keyctl.h>
 #include <crypto/public_key.h>
+#include <keys/user-type.h>
 #include "asymmetric_keys.h"
 
 /*
@@ -36,6 +38,99 @@ void public_key_signature_free(struct public_key_signature *sig)
 }
 EXPORT_SYMBOL_GPL(public_key_signature_free);
 
+/**
+ * query_asymmetric_key - Get information about an aymmetric key.
+ * @params: Various parameters.
+ * @info: Where to put the information.
+ */
+int query_asymmetric_key(const struct kernel_pkey_params *params,
+			 struct kernel_pkey_query *info)
+{
+	const struct asymmetric_key_subtype *subtype;
+	struct key *key = params->key;
+	int ret;
+
+	pr_devel("==>%s()\n", __func__);
+
+	if (key->type != &key_type_asymmetric)
+		return -EINVAL;
+	subtype = asymmetric_key_subtype(key);
+	if (!subtype ||
+	    !key->payload.data[0])
+		return -EINVAL;
+	if (!subtype->query)
+		return -ENOTSUPP;
+
+	ret = subtype->query(params, info);
+
+	pr_devel("<==%s() = %d\n", __func__, ret);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(query_asymmetric_key);
+
+/**
+ * encrypt_blob - Encrypt data using an asymmetric key
+ * @params: Various parameters
+ * @data: Data blob to be encrypted, length params->data_len
+ * @enc: Encrypted data buffer, length params->enc_len
+ *
+ * Encrypt the specified data blob using the private key specified by
+ * params->key.  The encrypted data is wrapped in an encoding if
+ * params->encoding is specified (eg. "pkcs1").
+ *
+ * Returns the length of the data placed in the encrypted data buffer or an
+ * error.
+ */
+int encrypt_blob(struct kernel_pkey_params *params,
+		 const void *data, void *enc)
+{
+	params->op = kernel_pkey_encrypt;
+	return asymmetric_key_eds_op(params, data, enc);
+}
+EXPORT_SYMBOL_GPL(encrypt_blob);
+
+/**
+ * decrypt_blob - Decrypt data using an asymmetric key
+ * @params: Various parameters
+ * @enc: Encrypted data to be decrypted, length params->enc_len
+ * @data: Decrypted data buffer, length params->data_len
+ *
+ * Decrypt the specified data blob using the private key specified by
+ * params->key.  The decrypted data is wrapped in an encoding if
+ * params->encoding is specified (eg. "pkcs1").
+ *
+ * Returns the length of the data placed in the decrypted data buffer or an
+ * error.
+ */
+int decrypt_blob(struct kernel_pkey_params *params,
+		 const void *enc, void *data)
+{
+	params->op = kernel_pkey_decrypt;
+	return asymmetric_key_eds_op(params, enc, data);
+}
+EXPORT_SYMBOL_GPL(decrypt_blob);
+
+/**
+ * create_signature - Sign some data using an asymmetric key
+ * @params: Various parameters
+ * @data: Data blob to be signed, length params->data_len
+ * @enc: Signature buffer, length params->enc_len
+ *
+ * Sign the specified data blob using the private key specified by params->key.
+ * The signature is wrapped in an encoding if params->encoding is specified
+ * (eg. "pkcs1").  If the encoding needs to know the digest type, this can be
+ * passed through params->hash_algo (eg. "sha1").
+ *
+ * Returns the length of the data placed in the signature buffer or an error.
+ */
+int create_signature(struct kernel_pkey_params *params,
+		     const void *data, void *enc)
+{
+	params->op = kernel_pkey_sign;
+	return asymmetric_key_eds_op(params, data, enc);
+}
+EXPORT_SYMBOL_GPL(create_signature);
+
 /**
  * verify_signature - Initiate the use of an asymmetric key to verify a signature
  * @key: The asymmetric key to verify against
diff --git a/include/crypto/public_key.h b/include/crypto/public_key.h
index e0b681a717bac..3a1047a0195c3 100644
--- a/include/crypto/public_key.h
+++ b/include/crypto/public_key.h
@@ -14,6 +14,8 @@
 #ifndef _LINUX_PUBLIC_KEY_H
 #define _LINUX_PUBLIC_KEY_H
 
+#include <linux/keyctl.h>
+
 /*
  * Cryptographic data for the public-key subtype of the asymmetric key type.
  *
@@ -40,6 +42,7 @@ struct public_key_signature {
 	u8 digest_size;		/* Number of bytes in digest */
 	const char *pkey_algo;
 	const char *hash_algo;
+	const char *encoding;
 };
 
 extern void public_key_signature_free(struct public_key_signature *sig);
@@ -65,8 +68,14 @@ extern int restrict_link_by_key_or_keyring_chain(struct key *trust_keyring,
 						 const union key_payload *payload,
 						 struct key *trusted);
 
-extern int verify_signature(const struct key *key,
-			    const struct public_key_signature *sig);
+extern int query_asymmetric_key(const struct kernel_pkey_params *,
+				struct kernel_pkey_query *);
+
+extern int encrypt_blob(struct kernel_pkey_params *, const void *, void *);
+extern int decrypt_blob(struct kernel_pkey_params *, const void *, void *);
+extern int create_signature(struct kernel_pkey_params *, const void *, void *);
+extern int verify_signature(const struct key *,
+			    const struct public_key_signature *);
 
 int public_key_verify_signature(const struct public_key *pkey,
 				const struct public_key_signature *sig);
diff --git a/include/keys/asymmetric-subtype.h b/include/keys/asymmetric-subtype.h
index e0a9c23688728..9ce2f0fae57e3 100644
--- a/include/keys/asymmetric-subtype.h
+++ b/include/keys/asymmetric-subtype.h
@@ -17,6 +17,8 @@
 #include <linux/seq_file.h>
 #include <keys/asymmetric-type.h>
 
+struct kernel_pkey_query;
+struct kernel_pkey_params;
 struct public_key_signature;
 
 /*
@@ -34,6 +36,13 @@ struct asymmetric_key_subtype {
 	/* Destroy a key of this subtype */
 	void (*destroy)(void *payload_crypto, void *payload_auth);
 
+	int (*query)(const struct kernel_pkey_params *params,
+		     struct kernel_pkey_query *info);
+
+	/* Encrypt/decrypt/sign data */
+	int (*eds_op)(struct kernel_pkey_params *params,
+		      const void *in, void *out);
+
 	/* Verify the signature on a key of this subtype (optional) */
 	int (*verify_signature)(const struct key *key,
 				const struct public_key_signature *sig);
-- 
GitLab


From 039884907787e55166e84e0b654a5342cc8a04ab Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 9 Oct 2018 17:47:15 +0100
Subject: [PATCH 0218/1890] KEYS: Make the X.509 and PKCS7 parsers supply the
 sig encoding type [ver #2]

Make the X.509 and PKCS7 parsers fill in the signature encoding type field
recently added to the public_key_signature struct.

Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Marcel Holtmann <marcel@holtmann.org>
Reviewed-by: Marcel Holtmann <marcel@holtmann.org>
Reviewed-by: Denis Kenzior <denkenz@gmail.com>
Tested-by: Denis Kenzior <denkenz@gmail.com>
Signed-off-by: James Morris <james.morris@microsoft.com>
---
 crypto/asymmetric_keys/pkcs7_parser.c     |  1 +
 crypto/asymmetric_keys/x509_cert_parser.c | 21 +++++++++------------
 2 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/crypto/asymmetric_keys/pkcs7_parser.c b/crypto/asymmetric_keys/pkcs7_parser.c
index 0f134162cef4b..f0d56e1a8b7e2 100644
--- a/crypto/asymmetric_keys/pkcs7_parser.c
+++ b/crypto/asymmetric_keys/pkcs7_parser.c
@@ -271,6 +271,7 @@ int pkcs7_sig_note_pkey_algo(void *context, size_t hdrlen,
 	switch (ctx->last_oid) {
 	case OID_rsaEncryption:
 		ctx->sinfo->sig->pkey_algo = "rsa";
+		ctx->sinfo->sig->encoding = "pkcs1";
 		break;
 	default:
 		printk("Unsupported pkey algo: %u\n", ctx->last_oid);
diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c
index b6cabac4b62ba..991f4d735a4ef 100644
--- a/crypto/asymmetric_keys/x509_cert_parser.c
+++ b/crypto/asymmetric_keys/x509_cert_parser.c
@@ -199,35 +199,32 @@ int x509_note_pkey_algo(void *context, size_t hdrlen,
 
 	case OID_md4WithRSAEncryption:
 		ctx->cert->sig->hash_algo = "md4";
-		ctx->cert->sig->pkey_algo = "rsa";
-		break;
+		goto rsa_pkcs1;
 
 	case OID_sha1WithRSAEncryption:
 		ctx->cert->sig->hash_algo = "sha1";
-		ctx->cert->sig->pkey_algo = "rsa";
-		break;
+		goto rsa_pkcs1;
 
 	case OID_sha256WithRSAEncryption:
 		ctx->cert->sig->hash_algo = "sha256";
-		ctx->cert->sig->pkey_algo = "rsa";
-		break;
+		goto rsa_pkcs1;
 
 	case OID_sha384WithRSAEncryption:
 		ctx->cert->sig->hash_algo = "sha384";
-		ctx->cert->sig->pkey_algo = "rsa";
-		break;
+		goto rsa_pkcs1;
 
 	case OID_sha512WithRSAEncryption:
 		ctx->cert->sig->hash_algo = "sha512";
-		ctx->cert->sig->pkey_algo = "rsa";
-		break;
+		goto rsa_pkcs1;
 
 	case OID_sha224WithRSAEncryption:
 		ctx->cert->sig->hash_algo = "sha224";
-		ctx->cert->sig->pkey_algo = "rsa";
-		break;
+		goto rsa_pkcs1;
 	}
 
+rsa_pkcs1:
+	ctx->cert->sig->pkey_algo = "rsa";
+	ctx->cert->sig->encoding = "pkcs1";
 	ctx->algo_oid = ctx->last_oid;
 	return 0;
 }
-- 
GitLab


From 82f94f24475c89c6d5cb673e1277b5b4394499c8 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 9 Oct 2018 17:47:23 +0100
Subject: [PATCH 0219/1890] KEYS: Provide software public key query function
 [ver #2]

Provide a query function for the software public key implementation.  This
permits information about such a key to be obtained using
query_asymmetric_key() or KEYCTL_PKEY_QUERY.

Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Marcel Holtmann <marcel@holtmann.org>
Reviewed-by: Marcel Holtmann <marcel@holtmann.org>
Reviewed-by: Denis Kenzior <denkenz@gmail.com>
Tested-by: Denis Kenzior <denkenz@gmail.com>
Signed-off-by: James Morris <james.morris@microsoft.com>
---
 crypto/asymmetric_keys/public_key.c | 99 ++++++++++++++++++++++++-----
 1 file changed, 82 insertions(+), 17 deletions(-)

diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c
index e929fe1e4106c..761bfab352dc6 100644
--- a/crypto/asymmetric_keys/public_key.c
+++ b/crypto/asymmetric_keys/public_key.c
@@ -59,6 +59,81 @@ static void public_key_destroy(void *payload0, void *payload3)
 	public_key_signature_free(payload3);
 }
 
+/*
+ * Determine the crypto algorithm name.
+ */
+static
+int software_key_determine_akcipher(const char *encoding,
+				    const char *hash_algo,
+				    const struct public_key *pkey,
+				    char alg_name[CRYPTO_MAX_ALG_NAME])
+{
+	int n;
+
+	if (strcmp(encoding, "pkcs1") == 0) {
+		/* The data wangled by the RSA algorithm is typically padded
+		 * and encoded in some manner, such as EMSA-PKCS1-1_5 [RFC3447
+		 * sec 8.2].
+		 */
+		if (!hash_algo)
+			n = snprintf(alg_name, CRYPTO_MAX_ALG_NAME,
+				     "pkcs1pad(%s)",
+				     pkey->pkey_algo);
+		else
+			n = snprintf(alg_name, CRYPTO_MAX_ALG_NAME,
+				     "pkcs1pad(%s,%s)",
+				     pkey->pkey_algo, hash_algo);
+		return n >= CRYPTO_MAX_ALG_NAME ? -EINVAL : 0;
+	}
+
+	if (strcmp(encoding, "raw") == 0) {
+		strcpy(alg_name, pkey->pkey_algo);
+		return 0;
+	}
+
+	return -ENOPKG;
+}
+
+/*
+ * Query information about a key.
+ */
+static int software_key_query(const struct kernel_pkey_params *params,
+			      struct kernel_pkey_query *info)
+{
+	struct crypto_akcipher *tfm;
+	struct public_key *pkey = params->key->payload.data[asym_crypto];
+	char alg_name[CRYPTO_MAX_ALG_NAME];
+	int ret, len;
+
+	ret = software_key_determine_akcipher(params->encoding,
+					      params->hash_algo,
+					      pkey, alg_name);
+	if (ret < 0)
+		return ret;
+
+	tfm = crypto_alloc_akcipher(alg_name, 0, 0);
+	if (IS_ERR(tfm))
+		return PTR_ERR(tfm);
+
+	ret = crypto_akcipher_set_pub_key(tfm, pkey->key, pkey->keylen);
+	if (ret < 0)
+		goto error_free_tfm;
+
+	len = crypto_akcipher_maxsize(tfm);
+	info->key_size = len * 8;
+	info->max_data_size = len;
+	info->max_sig_size = len;
+	info->max_enc_size = len;
+	info->max_dec_size = len;
+	info->supported_ops = KEYCTL_SUPPORTS_VERIFY;
+	ret = 0;
+
+error_free_tfm:
+	crypto_free_akcipher(tfm);
+	pr_devel("<==%s() = %d\n", __func__, ret);
+	return ret;
+}
+
 /*
  * Verify a signature using a public key.
  */
@@ -69,8 +144,7 @@ int public_key_verify_signature(const struct public_key *pkey,
 	struct crypto_akcipher *tfm;
 	struct akcipher_request *req;
 	struct scatterlist sig_sg, digest_sg;
-	const char *alg_name;
-	char alg_name_buf[CRYPTO_MAX_ALG_NAME];
+	char alg_name[CRYPTO_MAX_ALG_NAME];
 	void *output;
 	unsigned int outlen;
 	int ret;
@@ -81,21 +155,11 @@ int public_key_verify_signature(const struct public_key *pkey,
 	BUG_ON(!sig);
 	BUG_ON(!sig->s);
 
-	if (!sig->digest)
-		return -ENOPKG;
-
-	alg_name = sig->pkey_algo;
-	if (strcmp(sig->pkey_algo, "rsa") == 0) {
-		/* The data wangled by the RSA algorithm is typically padded
-		 * and encoded in some manner, such as EMSA-PKCS1-1_5 [RFC3447
-		 * sec 8.2].
-		 */
-		if (snprintf(alg_name_buf, CRYPTO_MAX_ALG_NAME,
-			     "pkcs1pad(rsa,%s)", sig->hash_algo
-			     ) >= CRYPTO_MAX_ALG_NAME)
-			return -EINVAL;
-		alg_name = alg_name_buf;
-	}
+	ret = software_key_determine_akcipher(sig->encoding,
+					      sig->hash_algo,
+					      pkey, alg_name);
+	if (ret < 0)
+		return ret;
 
 	tfm = crypto_alloc_akcipher(alg_name, 0, 0);
 	if (IS_ERR(tfm))
@@ -167,6 +231,7 @@ struct asymmetric_key_subtype public_key_subtype = {
 	.name_len		= sizeof("public_key") - 1,
 	.describe		= public_key_describe,
 	.destroy		= public_key_destroy,
+	.query			= software_key_query,
 	.verify_signature	= public_key_verify_signature_2,
 };
 EXPORT_SYMBOL_GPL(public_key_subtype);
-- 
GitLab


From f7c4e06e066c3df282e6e3d4e7d8c498be9e1e46 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 9 Oct 2018 17:47:31 +0100
Subject: [PATCH 0220/1890] KEYS: Allow the public_key struct to hold a private
 key [ver #2]

Put a flag in the public_key struct to indicate if the structure is holding
a private key.  The private key must be held ASN.1 encoded in the format
specified in RFC 3447 A.1.2.  This is the form required by crypto/rsa.c.

The software encryption subtype's verification and query functions then
need to select the appropriate crypto function to set the key.

Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Marcel Holtmann <marcel@holtmann.org>
Reviewed-by: Marcel Holtmann <marcel@holtmann.org>
Reviewed-by: Denis Kenzior <denkenz@gmail.com>
Tested-by: Denis Kenzior <denkenz@gmail.com>
Signed-off-by: James Morris <james.morris@microsoft.com>
---
 crypto/asymmetric_keys/public_key.c | 14 ++++++++++++--
 include/crypto/public_key.h         |  1 +
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c
index 761bfab352dc6..f2dc278973197 100644
--- a/crypto/asymmetric_keys/public_key.c
+++ b/crypto/asymmetric_keys/public_key.c
@@ -115,7 +115,12 @@ static int software_key_query(const struct kernel_pkey_params *params,
 	if (IS_ERR(tfm))
 		return PTR_ERR(tfm);
 
-	ret = crypto_akcipher_set_pub_key(tfm, pkey->key, pkey->keylen);
+	if (pkey->key_is_private)
+		ret = crypto_akcipher_set_priv_key(tfm,
+						   pkey->key, pkey->keylen);
+	else
+		ret = crypto_akcipher_set_pub_key(tfm,
+						  pkey->key, pkey->keylen);
 	if (ret < 0)
 		goto error_free_tfm;
 
@@ -170,7 +175,12 @@ int public_key_verify_signature(const struct public_key *pkey,
 	if (!req)
 		goto error_free_tfm;
 
-	ret = crypto_akcipher_set_pub_key(tfm, pkey->key, pkey->keylen);
+	if (pkey->key_is_private)
+		ret = crypto_akcipher_set_priv_key(tfm,
+						   pkey->key, pkey->keylen);
+	else
+		ret = crypto_akcipher_set_pub_key(tfm,
+						  pkey->key, pkey->keylen);
 	if (ret)
 		goto error_free_req;
 
diff --git a/include/crypto/public_key.h b/include/crypto/public_key.h
index 3a1047a0195c3..be626eac91133 100644
--- a/include/crypto/public_key.h
+++ b/include/crypto/public_key.h
@@ -25,6 +25,7 @@
 struct public_key {
 	void *key;
 	u32 keylen;
+	bool key_is_private;
 	const char *id_type;
 	const char *pkey_algo;
 };
-- 
GitLab


From c08fed73712620eb0a19244dbbbbdf00edbe5e47 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 9 Oct 2018 17:47:38 +0100
Subject: [PATCH 0221/1890] KEYS: Implement encrypt, decrypt and sign for
 software asymmetric key [ver #2]

Implement the encrypt, decrypt and sign operations for the software
asymmetric key subtype.  This mostly involves offloading the call to the
crypto layer.

Note that the decrypt and sign operations require a private key to be
supplied.  Encrypt (and also verify) will work with either a public or a
private key.  A public key can be supplied with an X.509 certificate and a
private key can be supplied using a PKCS#8 blob:

	# j=`openssl pkcs8 -in ~/pkcs7/firmwarekey2.priv -topk8 -nocrypt -outform DER | keyctl padd asymmetric foo @s`
	# keyctl pkey_query $j - enc=pkcs1
	key_size=4096
	max_data_size=512
	max_sig_size=512
	max_enc_size=512
	max_dec_size=512
	encrypt=y
	decrypt=y
	sign=y
	verify=y
	# keyctl pkey_encrypt $j 0 data enc=pkcs1 >/tmp/enc
	# keyctl pkey_decrypt $j 0 /tmp/enc enc=pkcs1 >/tmp/dec
	# cmp data /tmp/dec
	# keyctl pkey_sign $j 0 data enc=pkcs1 hash=sha1 >/tmp/sig
	# keyctl pkey_verify $j 0 data /tmp/sig enc=pkcs1 hash=sha1
	#

Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Marcel Holtmann <marcel@holtmann.org>
Reviewed-by: Marcel Holtmann <marcel@holtmann.org>
Reviewed-by: Denis Kenzior <denkenz@gmail.com>
Tested-by: Denis Kenzior <denkenz@gmail.com>
Signed-off-by: James Morris <james.morris@microsoft.com>
---
 crypto/asymmetric_keys/public_key.c | 82 ++++++++++++++++++++++++++++-
 1 file changed, 81 insertions(+), 1 deletion(-)

diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c
index f2dc278973197..f5d85b47fcc6d 100644
--- a/crypto/asymmetric_keys/public_key.c
+++ b/crypto/asymmetric_keys/public_key.c
@@ -130,7 +130,11 @@ static int software_key_query(const struct kernel_pkey_params *params,
 	info->max_sig_size = len;
 	info->max_enc_size = len;
 	info->max_dec_size = len;
-	info->supported_ops = KEYCTL_SUPPORTS_VERIFY;
+	info->supported_ops = (KEYCTL_SUPPORTS_ENCRYPT |
+			       KEYCTL_SUPPORTS_VERIFY);
+	if (pkey->key_is_private)
+		info->supported_ops |= (KEYCTL_SUPPORTS_DECRYPT |
+					KEYCTL_SUPPORTS_SIGN);
 	ret = 0;
 
 error_free_tfm:
@@ -139,6 +143,81 @@ static int software_key_query(const struct kernel_pkey_params *params,
 	return ret;
 }
 
+/*
+ * Do encryption, decryption and signing ops.
+ */
+static int software_key_eds_op(struct kernel_pkey_params *params,
+			       const void *in, void *out)
+{
+	const struct public_key *pkey = params->key->payload.data[asym_crypto];
+	struct akcipher_request *req;
+	struct crypto_akcipher *tfm;
+	struct crypto_wait cwait;
+	struct scatterlist in_sg, out_sg;
+	char alg_name[CRYPTO_MAX_ALG_NAME];
+	int ret;
+
+	pr_devel("==>%s()\n", __func__);
+
+	ret = software_key_determine_akcipher(params->encoding,
+					      params->hash_algo,
+					      pkey, alg_name);
+	if (ret < 0)
+		return ret;
+
+	tfm = crypto_alloc_akcipher(alg_name, 0, 0);
+	if (IS_ERR(tfm))
+		return PTR_ERR(tfm);
+
+	req = akcipher_request_alloc(tfm, GFP_KERNEL);
+	if (!req)
+		goto error_free_tfm;
+
+	if (pkey->key_is_private)
+		ret = crypto_akcipher_set_priv_key(tfm,
+						   pkey->key, pkey->keylen);
+	else
+		ret = crypto_akcipher_set_pub_key(tfm,
+						  pkey->key, pkey->keylen);
+	if (ret)
+		goto error_free_req;
+
+	sg_init_one(&in_sg, in, params->in_len);
+	sg_init_one(&out_sg, out, params->out_len);
+	akcipher_request_set_crypt(req, &in_sg, &out_sg, params->in_len,
+				   params->out_len);
+	crypto_init_wait(&cwait);
+	akcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG |
+				      CRYPTO_TFM_REQ_MAY_SLEEP,
+				      crypto_req_done, &cwait);
+
+	/* Perform the encryption calculation. */
+	switch (params->op) {
+	case kernel_pkey_encrypt:
+		ret = crypto_akcipher_encrypt(req);
+		break;
+	case kernel_pkey_decrypt:
+		ret = crypto_akcipher_decrypt(req);
+		break;
+	case kernel_pkey_sign:
+		ret = crypto_akcipher_sign(req);
+		break;
+	default:
+		BUG();
+	}
+
+	ret = crypto_wait_req(ret, &cwait);
+	if (ret == 0)
+		ret = req->dst_len;
+
+error_free_req:
+	akcipher_request_free(req);
+error_free_tfm:
+	crypto_free_akcipher(tfm);
+	pr_devel("<==%s() = %d\n", __func__, ret);
+	return ret;
+}
+
 /*
  * Verify a signature using a public key.
  */
@@ -242,6 +321,7 @@ struct asymmetric_key_subtype public_key_subtype = {
 	.describe		= public_key_describe,
 	.destroy		= public_key_destroy,
 	.query			= software_key_query,
+	.eds_op			= software_key_eds_op,
 	.verify_signature	= public_key_verify_signature_2,
 };
 EXPORT_SYMBOL_GPL(public_key_subtype);
-- 
GitLab


From 3c58b2362ba828ee2970c66c6a6fd7b04fde4413 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 9 Oct 2018 17:47:46 +0100
Subject: [PATCH 0222/1890] KEYS: Implement PKCS#8 RSA Private Key parser [ver
 #2]

Implement PKCS#8 RSA Private Key format [RFC 5208] parser for the
asymmetric key type.  For the moment, this will only support unencrypted
DER blobs.  PEM and decryption can be added later.

PKCS#8 keys can be loaded like this:

	openssl pkcs8 -in private_key.pem -topk8 -nocrypt -outform DER | \
	  keyctl padd asymmetric foo @s

Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Marcel Holtmann <marcel@holtmann.org>
Reviewed-by: Marcel Holtmann <marcel@holtmann.org>
Reviewed-by: Denis Kenzior <denkenz@gmail.com>
Tested-by: Denis Kenzior <denkenz@gmail.com>
Signed-off-by: James Morris <james.morris@microsoft.com>
---
 Documentation/crypto/asymmetric-keys.txt |   2 +
 crypto/asymmetric_keys/Kconfig           |  10 ++
 crypto/asymmetric_keys/Makefile          |  13 ++
 crypto/asymmetric_keys/pkcs8.asn1        |  24 +++
 crypto/asymmetric_keys/pkcs8_parser.c    | 184 +++++++++++++++++++++++
 5 files changed, 233 insertions(+)
 create mode 100644 crypto/asymmetric_keys/pkcs8.asn1
 create mode 100644 crypto/asymmetric_keys/pkcs8_parser.c

diff --git a/Documentation/crypto/asymmetric-keys.txt b/Documentation/crypto/asymmetric-keys.txt
index deb656ef008b2..8763866b11cfd 100644
--- a/Documentation/crypto/asymmetric-keys.txt
+++ b/Documentation/crypto/asymmetric-keys.txt
@@ -248,6 +248,8 @@ Examples of blob formats for which parsers could be implemented include:
  - X.509 ASN.1 stream.
  - Pointer to TPM key.
  - Pointer to UEFI key.
+ - PKCS#8 private key [RFC 5208].
+ - PKCS#5 encrypted private key [RFC 2898].
 
 During key instantiation each parser in the list is tried until one doesn't
 return -EBADMSG.
diff --git a/crypto/asymmetric_keys/Kconfig b/crypto/asymmetric_keys/Kconfig
index f3702e533ff41..66a7dad7ed3d1 100644
--- a/crypto/asymmetric_keys/Kconfig
+++ b/crypto/asymmetric_keys/Kconfig
@@ -31,6 +31,16 @@ config X509_CERTIFICATE_PARSER
 	  data and provides the ability to instantiate a crypto key from a
 	  public key packet found inside the certificate.
 
+config PKCS8_PRIVATE_KEY_PARSER
+	tristate "PKCS#8 private key parser"
+	depends on ASYMMETRIC_PUBLIC_KEY_SUBTYPE
+	select ASN1
+	select OID_REGISTRY
+	help
+	  This option provides support for parsing PKCS#8 format blobs for
+	  private key data and provides the ability to instantiate a crypto key
+	  from that data.
+
 config PKCS7_MESSAGE_PARSER
 	tristate "PKCS#7 message parser"
 	depends on X509_CERTIFICATE_PARSER
diff --git a/crypto/asymmetric_keys/Makefile b/crypto/asymmetric_keys/Makefile
index d4b2e1b2dc650..c38424f55b08d 100644
--- a/crypto/asymmetric_keys/Makefile
+++ b/crypto/asymmetric_keys/Makefile
@@ -29,6 +29,19 @@ $(obj)/x509_cert_parser.o: \
 $(obj)/x509.asn1.o: $(obj)/x509.asn1.c $(obj)/x509.asn1.h
 $(obj)/x509_akid.asn1.o: $(obj)/x509_akid.asn1.c $(obj)/x509_akid.asn1.h
 
+#
+# PKCS#8 private key handling
+#
+obj-$(CONFIG_PKCS8_PRIVATE_KEY_PARSER) += pkcs8_key_parser.o
+pkcs8_key_parser-y := \
+	pkcs8.asn1.o \
+	pkcs8_parser.o
+
+$(obj)/pkcs8_parser.o: $(obj)/pkcs8.asn1.h
+$(obj)/pkcs8-asn1.o: $(obj)/pkcs8.asn1.c $(obj)/pkcs8.asn1.h
+
+clean-files	+= pkcs8.asn1.c pkcs8.asn1.h
+
 #
 # PKCS#7 message handling
 #
diff --git a/crypto/asymmetric_keys/pkcs8.asn1 b/crypto/asymmetric_keys/pkcs8.asn1
new file mode 100644
index 0000000000000..702c41a3c7137
--- /dev/null
+++ b/crypto/asymmetric_keys/pkcs8.asn1
@@ -0,0 +1,24 @@
+--
+-- This is the unencrypted variant
+--
+PrivateKeyInfo ::= SEQUENCE {
+	version			Version,
+	privateKeyAlgorithm	PrivateKeyAlgorithmIdentifier,
+	privateKey		PrivateKey,
+	attributes		[0] IMPLICIT Attributes OPTIONAL
+}
+
+Version ::= INTEGER  ({ pkcs8_note_version })
+
+PrivateKeyAlgorithmIdentifier ::= AlgorithmIdentifier ({ pkcs8_note_algo })
+
+PrivateKey ::= OCTET STRING ({ pkcs8_note_key })
+
+Attributes ::= SET OF Attribute
+
+Attribute ::= ANY
+
+AlgorithmIdentifier ::= SEQUENCE {
+	algorithm   OBJECT IDENTIFIER ({ pkcs8_note_OID }),
+	parameters  ANY OPTIONAL
+}
diff --git a/crypto/asymmetric_keys/pkcs8_parser.c b/crypto/asymmetric_keys/pkcs8_parser.c
new file mode 100644
index 0000000000000..5f6a7ecc9765e
--- /dev/null
+++ b/crypto/asymmetric_keys/pkcs8_parser.c
@@ -0,0 +1,184 @@
+/* PKCS#8 Private Key parser [RFC 5208].
+ *
+ * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) "PKCS8: "fmt
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/oid_registry.h>
+#include <keys/asymmetric-subtype.h>
+#include <keys/asymmetric-parser.h>
+#include <crypto/public_key.h>
+#include "pkcs8.asn1.h"
+
+struct pkcs8_parse_context {
+	struct public_key *pub;
+	unsigned long	data;			/* Start of data */
+	enum OID	last_oid;		/* Last OID encountered */
+	enum OID	algo_oid;		/* Algorithm OID */
+	u32		key_size;
+	const void	*key;
+};
+
+/*
+ * Note an OID when we find one for later processing when we know how to
+ * interpret it.
+ */
+int pkcs8_note_OID(void *context, size_t hdrlen,
+		   unsigned char tag,
+		   const void *value, size_t vlen)
+{
+	struct pkcs8_parse_context *ctx = context;
+
+	ctx->last_oid = look_up_OID(value, vlen);
+	if (ctx->last_oid == OID__NR) {
+		char buffer[50];
+
+		sprint_oid(value, vlen, buffer, sizeof(buffer));
+		pr_info("Unknown OID: [%lu] %s\n",
+			(unsigned long)value - ctx->data, buffer);
+	}
+	return 0;
+}
+
+/*
+ * Note the version number of the ASN.1 blob.
+ */
+int pkcs8_note_version(void *context, size_t hdrlen,
+		       unsigned char tag,
+		       const void *value, size_t vlen)
+{
+	if (vlen != 1 || ((const u8 *)value)[0] != 0) {
+		pr_warn("Unsupported PKCS#8 version\n");
+		return -EBADMSG;
+	}
+	return 0;
+}
+
+/*
+ * Note the public algorithm.
+ */
+int pkcs8_note_algo(void *context, size_t hdrlen,
+		    unsigned char tag,
+		    const void *value, size_t vlen)
+{
+	struct pkcs8_parse_context *ctx = context;
+
+	if (ctx->last_oid != OID_rsaEncryption)
+		return -ENOPKG;
+
+	ctx->pub->pkey_algo = "rsa";
+	return 0;
+}
+
+/*
+ * Note the key data of the ASN.1 blob.
+ */
+int pkcs8_note_key(void *context, size_t hdrlen,
+		   unsigned char tag,
+		   const void *value, size_t vlen)
+{
+	struct pkcs8_parse_context *ctx = context;
+
+	ctx->key = value;
+	ctx->key_size = vlen;
+	return 0;
+}
+
+/*
+ * Parse a PKCS#8 private key blob.
+ */
+static struct public_key *pkcs8_parse(const void *data, size_t datalen)
+{
+	struct pkcs8_parse_context ctx;
+	struct public_key *pub;
+	long ret;
+
+	memset(&ctx, 0, sizeof(ctx));
+
+	ret = -ENOMEM;
+	ctx.pub = kzalloc(sizeof(struct public_key), GFP_KERNEL);
+	if (!ctx.pub)
+		goto error;
+
+	ctx.data = (unsigned long)data;
+
+	/* Attempt to decode the private key */
+	ret = asn1_ber_decoder(&pkcs8_decoder, &ctx, data, datalen);
+	if (ret < 0)
+		goto error_decode;
+
+	ret = -ENOMEM;
+	pub = ctx.pub;
+	pub->key = kmemdup(ctx.key, ctx.key_size, GFP_KERNEL);
+	if (!pub->key)
+		goto error_decode;
+
+	pub->keylen = ctx.key_size;
+	pub->key_is_private = true;
+	return pub;
+
+error_decode:
+	kfree(ctx.pub);
+error:
+	return ERR_PTR(ret);
+}
+
+/*
+ * Attempt to parse a data blob for a key as a PKCS#8 private key.
+ */
+static int pkcs8_key_preparse(struct key_preparsed_payload *prep)
+{
+	struct public_key *pub;
+
+	pub = pkcs8_parse(prep->data, prep->datalen);
+	if (IS_ERR(pub))
+		return PTR_ERR(pub);
+
+	pr_devel("Cert Key Algo: %s\n", pub->pkey_algo);
+	pub->id_type = "PKCS8";
+
+	/* We're pinning the module by being linked against it */
+	__module_get(public_key_subtype.owner);
+	prep->payload.data[asym_subtype] = &public_key_subtype;
+	prep->payload.data[asym_key_ids] = NULL;
+	prep->payload.data[asym_crypto] = pub;
+	prep->payload.data[asym_auth] = NULL;
+	prep->quotalen = 100;
+	return 0;
+}
+
+static struct asymmetric_key_parser pkcs8_key_parser = {
+	.owner	= THIS_MODULE,
+	.name	= "pkcs8",
+	.parse	= pkcs8_key_preparse,
+};
+
+/*
+ * Module stuff
+ */
+static int __init pkcs8_key_init(void)
+{
+	return register_asymmetric_key_parser(&pkcs8_key_parser);
+}
+
+static void __exit pkcs8_key_exit(void)
+{
+	unregister_asymmetric_key_parser(&pkcs8_key_parser);
+}
+
+module_init(pkcs8_key_init);
+module_exit(pkcs8_key_exit);
+
+MODULE_DESCRIPTION("PKCS#8 certificate parser");
+MODULE_LICENSE("GPL");
-- 
GitLab


From b3a8c8a5ebb5b4c3eb7b104364e63c453cc85f14 Mon Sep 17 00:00:00 2001
From: Denis Kenzior <denkenz@gmail.com>
Date: Tue, 9 Oct 2018 17:47:53 +0100
Subject: [PATCH 0223/1890] crypto: rsa-pkcs1pad: Allow hash to be optional
 [ver #2]

The original pkcs1pad implementation allowed to pad/unpad raw RSA
output.  However, this has been taken out in commit:
commit c0d20d22e0ad ("crypto: rsa-pkcs1pad - Require hash to be present")

This patch restored this ability as it is needed by the asymmetric key
implementation.

Signed-off-by: Denis Kenzior <denkenz@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Marcel Holtmann <marcel@holtmann.org>
Reviewed-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: James Morris <james.morris@microsoft.com>
---
 crypto/rsa-pkcs1pad.c | 59 ++++++++++++++++++++++++++++++-------------
 1 file changed, 41 insertions(+), 18 deletions(-)

diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c
index 812476e468213..cfc04e15fd975 100644
--- a/crypto/rsa-pkcs1pad.c
+++ b/crypto/rsa-pkcs1pad.c
@@ -392,7 +392,8 @@ static int pkcs1pad_sign(struct akcipher_request *req)
 	if (!ctx->key_size)
 		return -EINVAL;
 
-	digest_size = digest_info->size;
+	if (digest_info)
+		digest_size = digest_info->size;
 
 	if (req->src_len + digest_size > ctx->key_size - 11)
 		return -EOVERFLOW;
@@ -412,8 +413,9 @@ static int pkcs1pad_sign(struct akcipher_request *req)
 	memset(req_ctx->in_buf + 1, 0xff, ps_end - 1);
 	req_ctx->in_buf[ps_end] = 0x00;
 
-	memcpy(req_ctx->in_buf + ps_end + 1, digest_info->data,
-	       digest_info->size);
+	if (digest_info)
+		memcpy(req_ctx->in_buf + ps_end + 1, digest_info->data,
+		       digest_info->size);
 
 	pkcs1pad_sg_set_buf(req_ctx->in_sg, req_ctx->in_buf,
 			ctx->key_size - 1 - req->src_len, req->src);
@@ -475,10 +477,13 @@ static int pkcs1pad_verify_complete(struct akcipher_request *req, int err)
 		goto done;
 	pos++;
 
-	if (crypto_memneq(out_buf + pos, digest_info->data, digest_info->size))
-		goto done;
+	if (digest_info) {
+		if (crypto_memneq(out_buf + pos, digest_info->data,
+				  digest_info->size))
+			goto done;
 
-	pos += digest_info->size;
+		pos += digest_info->size;
+	}
 
 	err = 0;
 
@@ -608,11 +613,14 @@ static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb)
 
 	hash_name = crypto_attr_alg_name(tb[2]);
 	if (IS_ERR(hash_name))
-		return PTR_ERR(hash_name);
+		hash_name = NULL;
 
-	digest_info = rsa_lookup_asn1(hash_name);
-	if (!digest_info)
-		return -EINVAL;
+	if (hash_name) {
+		digest_info = rsa_lookup_asn1(hash_name);
+		if (!digest_info)
+			return -EINVAL;
+	} else
+		digest_info = NULL;
 
 	inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
 	if (!inst)
@@ -632,14 +640,29 @@ static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb)
 
 	err = -ENAMETOOLONG;
 
-	if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME,
-		     "pkcs1pad(%s,%s)", rsa_alg->base.cra_name, hash_name) >=
-	    CRYPTO_MAX_ALG_NAME ||
-	    snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
-		     "pkcs1pad(%s,%s)",
-		     rsa_alg->base.cra_driver_name, hash_name) >=
-	    CRYPTO_MAX_ALG_NAME)
-		goto out_drop_alg;
+	if (!hash_name) {
+		if (snprintf(inst->alg.base.cra_name,
+			     CRYPTO_MAX_ALG_NAME, "pkcs1pad(%s)",
+			     rsa_alg->base.cra_name) >= CRYPTO_MAX_ALG_NAME)
+			goto out_drop_alg;
+
+		if (snprintf(inst->alg.base.cra_driver_name,
+			     CRYPTO_MAX_ALG_NAME, "pkcs1pad(%s)",
+			     rsa_alg->base.cra_driver_name) >=
+			     CRYPTO_MAX_ALG_NAME)
+			goto out_drop_alg;
+	} else {
+		if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME,
+			     "pkcs1pad(%s,%s)", rsa_alg->base.cra_name,
+			     hash_name) >= CRYPTO_MAX_ALG_NAME)
+			goto out_drop_alg;
+
+		if (snprintf(inst->alg.base.cra_driver_name,
+			     CRYPTO_MAX_ALG_NAME, "pkcs1pad(%s,%s)",
+			     rsa_alg->base.cra_driver_name,
+			     hash_name) >= CRYPTO_MAX_ALG_NAME)
+			goto out_drop_alg;
+	}
 
 	inst->alg.base.cra_flags = rsa_alg->base.cra_flags & CRYPTO_ALG_ASYNC;
 	inst->alg.base.cra_priority = rsa_alg->base.cra_priority;
-- 
GitLab


From 903be6bb84c544551150a6f5aab9fda1ed9a6895 Mon Sep 17 00:00:00 2001
From: Denis Kenzior <denkenz@gmail.com>
Date: Tue, 9 Oct 2018 17:48:02 +0100
Subject: [PATCH 0224/1890] KEYS: asym_tpm: add skeleton for asym_tpm [ver #2]

This patch adds the basic skeleton for the asym_tpm asymmetric key
subtype.

Signed-off-by: Denis Kenzior <denkenz@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Marcel Holtmann <marcel@holtmann.org>
Reviewed-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: James Morris <james.morris@microsoft.com>
---
 crypto/asymmetric_keys/Kconfig    | 11 ++++
 crypto/asymmetric_keys/Makefile   |  1 +
 crypto/asymmetric_keys/asym_tpm.c | 90 +++++++++++++++++++++++++++++++
 include/crypto/asym_tpm_subtype.h | 16 ++++++
 4 files changed, 118 insertions(+)
 create mode 100644 crypto/asymmetric_keys/asym_tpm.c
 create mode 100644 include/crypto/asym_tpm_subtype.h

diff --git a/crypto/asymmetric_keys/Kconfig b/crypto/asymmetric_keys/Kconfig
index 66a7dad7ed3d1..b75555c7d8ae4 100644
--- a/crypto/asymmetric_keys/Kconfig
+++ b/crypto/asymmetric_keys/Kconfig
@@ -21,6 +21,17 @@ config ASYMMETRIC_PUBLIC_KEY_SUBTYPE
 	  appropriate hash algorithms (such as SHA-1) must be available.
 	  ENOPKG will be reported if the requisite algorithm is unavailable.
 
+config ASYMMETRIC_TPM_KEY_SUBTYPE
+	tristate "Asymmetric TPM backed private key subtype"
+	depends on TCG_TPM
+	select CRYPTO_HMAC
+	select CRYPTO_SHA1
+	select CRYPTO_HASH_INFO
+	help
+	  This option provides support for TPM backed private key type handling.
+	  Operations such as sign, verify, encrypt, decrypt are performed by
+	  the TPM after the private key is loaded.
+
 config X509_CERTIFICATE_PARSER
 	tristate "X.509 certificate parser"
 	depends on ASYMMETRIC_PUBLIC_KEY_SUBTYPE
diff --git a/crypto/asymmetric_keys/Makefile b/crypto/asymmetric_keys/Makefile
index c38424f55b08d..73fbe650ff1de 100644
--- a/crypto/asymmetric_keys/Makefile
+++ b/crypto/asymmetric_keys/Makefile
@@ -11,6 +11,7 @@ asymmetric_keys-y := \
 	signature.o
 
 obj-$(CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE) += public_key.o
+obj-$(CONFIG_ASYMMETRIC_TPM_KEY_SUBTYPE) += asym_tpm.o
 
 #
 # X.509 Certificate handling
diff --git a/crypto/asymmetric_keys/asym_tpm.c b/crypto/asymmetric_keys/asym_tpm.c
new file mode 100644
index 0000000000000..d0b2b97e8e543
--- /dev/null
+++ b/crypto/asymmetric_keys/asym_tpm.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0
+#define pr_fmt(fmt) "ASYM-TPM: "fmt
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/seq_file.h>
+#include <linux/scatterlist.h>
+#include <linux/tpm.h>
+#include <keys/asymmetric-subtype.h>
+#include <crypto/asym_tpm_subtype.h>
+
+/*
+ * Provide a part of a description of the key for /proc/keys.
+ */
+static void asym_tpm_describe(const struct key *asymmetric_key,
+			      struct seq_file *m)
+{
+	struct tpm_key *tk = asymmetric_key->payload.data[asym_crypto];
+
+	if (!tk)
+		return;
+
+	seq_printf(m, "TPM1.2/Blob");
+}
+
+static void asym_tpm_destroy(void *payload0, void *payload3)
+{
+	struct tpm_key *tk = payload0;
+
+	if (!tk)
+		return;
+
+	kfree(tk->blob);
+	tk->blob_len = 0;
+
+	kfree(tk);
+}
+
+/* Given the blob, parse it and load it into the TPM */
+struct tpm_key *tpm_key_create(const void *blob, uint32_t blob_len)
+{
+	int r;
+	struct tpm_key *tk;
+
+	r = tpm_is_tpm2(NULL);
+	if (r < 0)
+		goto error;
+
+	/* We don't support TPM2 yet */
+	if (r > 0) {
+		r = -ENODEV;
+		goto error;
+	}
+
+	r = -ENOMEM;
+	tk = kzalloc(sizeof(struct tpm_key), GFP_KERNEL);
+	if (!tk)
+		goto error;
+
+	tk->blob = kmemdup(blob, blob_len, GFP_KERNEL);
+	if (!tk->blob)
+		goto error_memdup;
+
+	tk->blob_len = blob_len;
+
+	return tk;
+
+error_memdup:
+	kfree(tk);
+error:
+	return ERR_PTR(r);
+}
+EXPORT_SYMBOL_GPL(tpm_key_create);
+
+/*
+ * TPM-based asymmetric key subtype
+ */
+struct asymmetric_key_subtype asym_tpm_subtype = {
+	.owner			= THIS_MODULE,
+	.name			= "asym_tpm",
+	.name_len		= sizeof("asym_tpm") - 1,
+	.describe		= asym_tpm_describe,
+	.destroy		= asym_tpm_destroy,
+};
+EXPORT_SYMBOL_GPL(asym_tpm_subtype);
+
+MODULE_DESCRIPTION("TPM based asymmetric key subtype");
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL v2");
diff --git a/include/crypto/asym_tpm_subtype.h b/include/crypto/asym_tpm_subtype.h
new file mode 100644
index 0000000000000..03550b850998b
--- /dev/null
+++ b/include/crypto/asym_tpm_subtype.h
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef _LINUX_ASYM_TPM_SUBTYPE_H
+#define _LINUX_ASYM_TPM_SUBTYPE_H
+
+#include <linux/keyctl.h>
+
+struct tpm_key {
+	void *blob;
+	u32 blob_len;
+};
+
+struct tpm_key *tpm_key_create(const void *blob, uint32_t blob_len);
+
+extern struct asymmetric_key_subtype asym_tpm_subtype;
+
+#endif /* _LINUX_ASYM_TPM_SUBTYPE_H */
-- 
GitLab


From f8c54e1ac4b82933dfcf88c37892da8ae35ccbe4 Mon Sep 17 00:00:00 2001
From: Denis Kenzior <denkenz@gmail.com>
Date: Tue, 9 Oct 2018 17:48:10 +0100
Subject: [PATCH 0225/1890] KEYS: asym_tpm: extract key size & public key [ver
 #2]

The parsed BER/DER blob obtained from user space contains a TPM_Key
structure.  This structure has some information about the key as well as
the public key portion.

This patch extracts this information for future use.

Signed-off-by: Denis Kenzior <denkenz@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Marcel Holtmann <marcel@holtmann.org>
Reviewed-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: James Morris <james.morris@microsoft.com>
---
 crypto/asymmetric_keys/asym_tpm.c | 112 ++++++++++++++++++++++++++++++
 include/crypto/asym_tpm_subtype.h |   3 +
 2 files changed, 115 insertions(+)

diff --git a/crypto/asymmetric_keys/asym_tpm.c b/crypto/asymmetric_keys/asym_tpm.c
index d0b2b97e8e543..308c51e055a48 100644
--- a/crypto/asymmetric_keys/asym_tpm.c
+++ b/crypto/asymmetric_keys/asym_tpm.c
@@ -7,6 +7,7 @@
 #include <linux/seq_file.h>
 #include <linux/scatterlist.h>
 #include <linux/tpm.h>
+#include <asm/unaligned.h>
 #include <keys/asymmetric-subtype.h>
 #include <crypto/asym_tpm_subtype.h>
 
@@ -37,6 +38,110 @@ static void asym_tpm_destroy(void *payload0, void *payload3)
 	kfree(tk);
 }
 
+/*
+ * Parse enough information out of TPM_KEY structure:
+ * TPM_STRUCT_VER -> 4 bytes
+ * TPM_KEY_USAGE -> 2 bytes
+ * TPM_KEY_FLAGS -> 4 bytes
+ * TPM_AUTH_DATA_USAGE -> 1 byte
+ * TPM_KEY_PARMS -> variable
+ * UINT32 PCRInfoSize -> 4 bytes
+ * BYTE* -> PCRInfoSize bytes
+ * TPM_STORE_PUBKEY
+ * UINT32 encDataSize;
+ * BYTE* -> encDataSize;
+ *
+ * TPM_KEY_PARMS:
+ * TPM_ALGORITHM_ID -> 4 bytes
+ * TPM_ENC_SCHEME -> 2 bytes
+ * TPM_SIG_SCHEME -> 2 bytes
+ * UINT32 parmSize -> 4 bytes
+ * BYTE* -> variable
+ */
+static int extract_key_parameters(struct tpm_key *tk)
+{
+	const void *cur = tk->blob;
+	uint32_t len = tk->blob_len;
+	const void *pub_key;
+	uint32_t sz;
+	uint32_t key_len;
+
+	if (len < 11)
+		return -EBADMSG;
+
+	/* Ensure this is a legacy key */
+	if (get_unaligned_be16(cur + 4) != 0x0015)
+		return -EBADMSG;
+
+	/* Skip to TPM_KEY_PARMS */
+	cur += 11;
+	len -= 11;
+
+	if (len < 12)
+		return -EBADMSG;
+
+	/* Make sure this is an RSA key */
+	if (get_unaligned_be32(cur) != 0x00000001)
+		return -EBADMSG;
+
+	/* Make sure this is TPM_ES_RSAESPKCSv15 encoding scheme */
+	if (get_unaligned_be16(cur + 4) != 0x0002)
+		return -EBADMSG;
+
+	/* Make sure this is TPM_SS_RSASSAPKCS1v15_DER signature scheme */
+	if (get_unaligned_be16(cur + 6) != 0x0003)
+		return -EBADMSG;
+
+	sz = get_unaligned_be32(cur + 8);
+	if (len < sz + 12)
+		return -EBADMSG;
+
+	/* Move to TPM_RSA_KEY_PARMS */
+	len -= 12;
+	cur += 12;
+
+	/* Grab the RSA key length */
+	key_len = get_unaligned_be32(cur);
+
+	switch (key_len) {
+	case 512:
+	case 1024:
+	case 1536:
+	case 2048:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* Move just past TPM_KEY_PARMS */
+	cur += sz;
+	len -= sz;
+
+	if (len < 4)
+		return -EBADMSG;
+
+	sz = get_unaligned_be32(cur);
+	if (len < 4 + sz)
+		return -EBADMSG;
+
+	/* Move to TPM_STORE_PUBKEY */
+	cur += 4 + sz;
+	len -= 4 + sz;
+
+	/* Grab the size of the public key, it should jive with the key size */
+	sz = get_unaligned_be32(cur);
+	if (sz > 256)
+		return -EINVAL;
+
+	pub_key = cur + 4;
+
+	tk->key_len = key_len;
+	tk->pub_key = pub_key;
+	tk->pub_key_len = sz;
+
+	return 0;
+}
+
 /* Given the blob, parse it and load it into the TPM */
 struct tpm_key *tpm_key_create(const void *blob, uint32_t blob_len)
 {
@@ -64,8 +169,15 @@ struct tpm_key *tpm_key_create(const void *blob, uint32_t blob_len)
 
 	tk->blob_len = blob_len;
 
+	r = extract_key_parameters(tk);
+	if (r < 0)
+		goto error_extract;
+
 	return tk;
 
+error_extract:
+	kfree(tk->blob);
+	tk->blob_len = 0;
 error_memdup:
 	kfree(tk);
 error:
diff --git a/include/crypto/asym_tpm_subtype.h b/include/crypto/asym_tpm_subtype.h
index 03550b850998b..48198c36d6b9b 100644
--- a/include/crypto/asym_tpm_subtype.h
+++ b/include/crypto/asym_tpm_subtype.h
@@ -7,6 +7,9 @@
 struct tpm_key {
 	void *blob;
 	u32 blob_len;
+	uint16_t key_len; /* Size in bits of the key */
+	const void *pub_key; /* pointer inside blob to the public key bytes */
+	uint16_t pub_key_len; /* length of the public key */
 };
 
 struct tpm_key *tpm_key_create(const void *blob, uint32_t blob_len);
-- 
GitLab


From d5e72745ca121459f68c598dac7b374a76322b94 Mon Sep 17 00:00:00 2001
From: Denis Kenzior <denkenz@gmail.com>
Date: Tue, 9 Oct 2018 17:48:17 +0100
Subject: [PATCH 0226/1890] KEYS: Add parser for TPM-based keys [ver #2]

For TPM based keys, the only standard seems to be described here:
http://david.woodhou.se/draft-woodhouse-cert-best-practice.html#rfc.section.4.4

Quote from the relevant section:
"Rather, a common form of storage for "wrapped" keys is to encode the
binary TCPA_KEY structure in a single ASN.1 OCTET-STRING, and store the
result in PEM format with the tag "-----BEGIN TSS KEY BLOB-----". "

This patch implements the above behavior.  It is assumed that the PEM
encoding is stripped out by userspace and only the raw DER/BER format is
provided.  This is similar to how PKCS7, PKCS8 and X.509 keys are
handled.

Signed-off-by: Denis Kenzior <denkenz@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Marcel Holtmann <marcel@holtmann.org>
Reviewed-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: James Morris <james.morris@microsoft.com>
---
 crypto/asymmetric_keys/Kconfig      |   9 +++
 crypto/asymmetric_keys/Makefile     |  11 +++
 crypto/asymmetric_keys/tpm.asn1     |   5 ++
 crypto/asymmetric_keys/tpm_parser.c | 102 ++++++++++++++++++++++++++++
 4 files changed, 127 insertions(+)
 create mode 100644 crypto/asymmetric_keys/tpm.asn1
 create mode 100644 crypto/asymmetric_keys/tpm_parser.c

diff --git a/crypto/asymmetric_keys/Kconfig b/crypto/asymmetric_keys/Kconfig
index b75555c7d8ae4..88353a9ebc9b4 100644
--- a/crypto/asymmetric_keys/Kconfig
+++ b/crypto/asymmetric_keys/Kconfig
@@ -52,6 +52,15 @@ config PKCS8_PRIVATE_KEY_PARSER
 	  private key data and provides the ability to instantiate a crypto key
 	  from that data.
 
+config TPM_KEY_PARSER
+	tristate "TPM private key parser"
+	depends on ASYMMETRIC_TPM_KEY_SUBTYPE
+	select ASN1
+	help
+	  This option provides support for parsing TPM format blobs for
+	  private key data and provides the ability to instantiate a crypto key
+	  from that data.
+
 config PKCS7_MESSAGE_PARSER
 	tristate "PKCS#7 message parser"
 	depends on X509_CERTIFICATE_PARSER
diff --git a/crypto/asymmetric_keys/Makefile b/crypto/asymmetric_keys/Makefile
index 73fbe650ff1de..28b91adba2aed 100644
--- a/crypto/asymmetric_keys/Makefile
+++ b/crypto/asymmetric_keys/Makefile
@@ -75,3 +75,14 @@ verify_signed_pefile-y := \
 
 $(obj)/mscode_parser.o: $(obj)/mscode.asn1.h $(obj)/mscode.asn1.h
 $(obj)/mscode.asn1.o: $(obj)/mscode.asn1.c $(obj)/mscode.asn1.h
+
+#
+# TPM private key parsing
+#
+obj-$(CONFIG_TPM_KEY_PARSER) += tpm_key_parser.o
+tpm_key_parser-y := \
+	tpm.asn1.o \
+	tpm_parser.o
+
+$(obj)/tpm_parser.o: $(obj)/tpm.asn1.h
+$(obj)/tpm.asn1.o: $(obj)/tpm.asn1.c $(obj)/tpm.asn1.h
diff --git a/crypto/asymmetric_keys/tpm.asn1 b/crypto/asymmetric_keys/tpm.asn1
new file mode 100644
index 0000000000000..d7f194232f30a
--- /dev/null
+++ b/crypto/asymmetric_keys/tpm.asn1
@@ -0,0 +1,5 @@
+--
+-- Unencryted TPM Blob.  For details of the format, see:
+-- http://david.woodhou.se/draft-woodhouse-cert-best-practice.html#I-D.mavrogiannopoulos-tpmuri
+--
+PrivateKeyInfo ::= OCTET STRING ({ tpm_note_key })
diff --git a/crypto/asymmetric_keys/tpm_parser.c b/crypto/asymmetric_keys/tpm_parser.c
new file mode 100644
index 0000000000000..96405d8dcd98d
--- /dev/null
+++ b/crypto/asymmetric_keys/tpm_parser.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0
+#define pr_fmt(fmt) "TPM-PARSER: "fmt
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <keys/asymmetric-subtype.h>
+#include <keys/asymmetric-parser.h>
+#include <crypto/asym_tpm_subtype.h>
+#include "tpm.asn1.h"
+
+struct tpm_parse_context {
+	const void	*blob;
+	u32		blob_len;
+};
+
+/*
+ * Note the key data of the ASN.1 blob.
+ */
+int tpm_note_key(void *context, size_t hdrlen,
+		   unsigned char tag,
+		   const void *value, size_t vlen)
+{
+	struct tpm_parse_context *ctx = context;
+
+	ctx->blob = value;
+	ctx->blob_len = vlen;
+
+	return 0;
+}
+
+/*
+ * Parse a TPM-encrypted private key blob.
+ */
+static struct tpm_key *tpm_parse(const void *data, size_t datalen)
+{
+	struct tpm_parse_context ctx;
+	long ret;
+
+	memset(&ctx, 0, sizeof(ctx));
+
+	/* Attempt to decode the private key */
+	ret = asn1_ber_decoder(&tpm_decoder, &ctx, data, datalen);
+	if (ret < 0)
+		goto error;
+
+	return tpm_key_create(ctx.blob, ctx.blob_len);
+
+error:
+	return ERR_PTR(ret);
+}
+/*
+ * Attempt to parse a data blob for a key as a TPM private key blob.
+ */
+static int tpm_key_preparse(struct key_preparsed_payload *prep)
+{
+	struct tpm_key *tk;
+
+	/*
+	 * TPM 1.2 keys are max 2048 bits long, so assume the blob is no
+	 * more than 4x that
+	 */
+	if (prep->datalen > 256 * 4)
+		return -EMSGSIZE;
+
+	tk = tpm_parse(prep->data, prep->datalen);
+
+	if (IS_ERR(tk))
+		return PTR_ERR(tk);
+
+	/* We're pinning the module by being linked against it */
+	__module_get(asym_tpm_subtype.owner);
+	prep->payload.data[asym_subtype] = &asym_tpm_subtype;
+	prep->payload.data[asym_key_ids] = NULL;
+	prep->payload.data[asym_crypto] = tk;
+	prep->payload.data[asym_auth] = NULL;
+	prep->quotalen = 100;
+	return 0;
+}
+
+static struct asymmetric_key_parser tpm_key_parser = {
+	.owner	= THIS_MODULE,
+	.name	= "tpm_parser",
+	.parse	= tpm_key_preparse,
+};
+
+static int __init tpm_key_init(void)
+{
+	return register_asymmetric_key_parser(&tpm_key_parser);
+}
+
+static void __exit tpm_key_exit(void)
+{
+	unregister_asymmetric_key_parser(&tpm_key_parser);
+}
+
+module_init(tpm_key_init);
+module_exit(tpm_key_exit);
+
+MODULE_DESCRIPTION("TPM private key-blob parser");
+MODULE_LICENSE("GPL v2");
-- 
GitLab


From dff5a61a59614c110d06a5dc1466cdb762b6affd Mon Sep 17 00:00:00 2001
From: Denis Kenzior <denkenz@gmail.com>
Date: Tue, 9 Oct 2018 17:48:25 +0100
Subject: [PATCH 0227/1890] KEYS: asym_tpm: Implement pkey_query [ver #2]

This commit implements the pkey_query operation.  This is accomplished
by utilizing the public key portion to obtain max encryption size
information for the operations that utilize the public key (encrypt,
verify).  The private key size extracted from the TPM_Key data structure
is used to fill the information where the private key is used (decrypt,
sign).

The kernel uses a DER/BER format for public keys and does not support
setting the key via the raw binary form.  To get around this a simple
DER/BER formatter is implemented which stores the DER/BER formatted key
and exponent in a temporary buffer for use by the crypto API.

The only exponent supported currently is 65537.  This holds true for
other Linux TPM tools such as 'create_tpm_key' and
trousers-openssl_tpm_engine.

Signed-off-by: Denis Kenzior <denkenz@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Marcel Holtmann <marcel@holtmann.org>
Reviewed-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: James Morris <james.morris@microsoft.com>
---
 crypto/asymmetric_keys/asym_tpm.c | 135 ++++++++++++++++++++++++++++++
 1 file changed, 135 insertions(+)

diff --git a/crypto/asymmetric_keys/asym_tpm.c b/crypto/asymmetric_keys/asym_tpm.c
index 308c51e055a48..837472d107d5f 100644
--- a/crypto/asymmetric_keys/asym_tpm.c
+++ b/crypto/asymmetric_keys/asym_tpm.c
@@ -7,10 +7,24 @@
 #include <linux/seq_file.h>
 #include <linux/scatterlist.h>
 #include <linux/tpm.h>
+#include <crypto/akcipher.h>
 #include <asm/unaligned.h>
 #include <keys/asymmetric-subtype.h>
 #include <crypto/asym_tpm_subtype.h>
 
+/*
+ * Maximum buffer size for the BER/DER encoded public key.  The public key
+ * is of the form SEQUENCE { INTEGER n, INTEGER e } where n is a maximum 2048
+ * bit key and e is usually 65537
+ * The encoding overhead is:
+ * - max 4 bytes for SEQUENCE
+ *   - max 4 bytes for INTEGER n type/length
+ *     - 257 bytes of n
+ *   - max 2 bytes for INTEGER e type/length
+ *     - 3 bytes of e
+ */
+#define PUB_KEY_BUF_SIZE (4 + 4 + 257 + 2 + 3)
+
 /*
  * Provide a part of a description of the key for /proc/keys.
  */
@@ -38,6 +52,126 @@ static void asym_tpm_destroy(void *payload0, void *payload3)
 	kfree(tk);
 }
 
+/* How many bytes will it take to encode the length */
+static inline uint32_t definite_length(uint32_t len)
+{
+	if (len <= 127)
+		return 1;
+	if (len <= 255)
+		return 2;
+	return 3;
+}
+
+static inline uint8_t *encode_tag_length(uint8_t *buf, uint8_t tag,
+					 uint32_t len)
+{
+	*buf++ = tag;
+
+	if (len <= 127) {
+		buf[0] = len;
+		return buf + 1;
+	}
+
+	if (len <= 255) {
+		buf[0] = 0x81;
+		buf[1] = len;
+		return buf + 2;
+	}
+
+	buf[0] = 0x82;
+	put_unaligned_be16(len, buf + 1);
+	return buf + 3;
+}
+
+static uint32_t derive_pub_key(const void *pub_key, uint32_t len, uint8_t *buf)
+{
+	uint8_t *cur = buf;
+	uint32_t n_len = definite_length(len) + 1 + len + 1;
+	uint32_t e_len = definite_length(3) + 1 + 3;
+	uint8_t e[3] = { 0x01, 0x00, 0x01 };
+
+	/* SEQUENCE */
+	cur = encode_tag_length(cur, 0x30, n_len + e_len);
+	/* INTEGER n */
+	cur = encode_tag_length(cur, 0x02, len + 1);
+	cur[0] = 0x00;
+	memcpy(cur + 1, pub_key, len);
+	cur += len + 1;
+	cur = encode_tag_length(cur, 0x02, sizeof(e));
+	memcpy(cur, e, sizeof(e));
+	cur += sizeof(e);
+
+	return cur - buf;
+}
+
+/*
+ * Determine the crypto algorithm name.
+ */
+static int determine_akcipher(const char *encoding, const char *hash_algo,
+			      char alg_name[CRYPTO_MAX_ALG_NAME])
+{
+	/* TODO: We don't support hashing yet */
+	if (hash_algo)
+		return -ENOPKG;
+
+	if (strcmp(encoding, "pkcs1") == 0) {
+		strcpy(alg_name, "pkcs1pad(rsa)");
+		return 0;
+	}
+
+	if (strcmp(encoding, "raw") == 0) {
+		strcpy(alg_name, "rsa");
+		return 0;
+	}
+
+	return -ENOPKG;
+}
+
+/*
+ * Query information about a key.
+ */
+static int tpm_key_query(const struct kernel_pkey_params *params,
+			 struct kernel_pkey_query *info)
+{
+	struct tpm_key *tk = params->key->payload.data[asym_crypto];
+	int ret;
+	char alg_name[CRYPTO_MAX_ALG_NAME];
+	struct crypto_akcipher *tfm;
+	uint8_t der_pub_key[PUB_KEY_BUF_SIZE];
+	uint32_t der_pub_key_len;
+	int len;
+
+	/* TPM only works on private keys, public keys still done in software */
+	ret = determine_akcipher(params->encoding, params->hash_algo, alg_name);
+	if (ret < 0)
+		return ret;
+
+	tfm = crypto_alloc_akcipher(alg_name, 0, 0);
+	if (IS_ERR(tfm))
+		return PTR_ERR(tfm);
+
+	der_pub_key_len = derive_pub_key(tk->pub_key, tk->pub_key_len,
+					 der_pub_key);
+
+	ret = crypto_akcipher_set_pub_key(tfm, der_pub_key, der_pub_key_len);
+	if (ret < 0)
+		goto error_free_tfm;
+
+	len = crypto_akcipher_maxsize(tfm);
+
+	info->key_size = tk->key_len;
+	info->max_data_size = tk->key_len / 8;
+	info->max_sig_size = len;
+	info->max_enc_size = len;
+	info->max_dec_size = tk->key_len / 8;
+
+	ret = 0;
+error_free_tfm:
+	crypto_free_akcipher(tfm);
+	pr_devel("<==%s() = %d\n", __func__, ret);
+	return ret;
+}
+
 /*
  * Parse enough information out of TPM_KEY structure:
  * TPM_STRUCT_VER -> 4 bytes
@@ -194,6 +328,7 @@ struct asymmetric_key_subtype asym_tpm_subtype = {
 	.name_len		= sizeof("asym_tpm") - 1,
 	.describe		= asym_tpm_describe,
 	.destroy		= asym_tpm_destroy,
+	.query			= tpm_key_query,
 };
 EXPORT_SYMBOL_GPL(asym_tpm_subtype);
 
-- 
GitLab


From ad4b1eb5fb3350c979a4f86eacfe7aac0595f335 Mon Sep 17 00:00:00 2001
From: Denis Kenzior <denkenz@gmail.com>
Date: Tue, 9 Oct 2018 17:48:33 +0100
Subject: [PATCH 0228/1890] KEYS: asym_tpm: Implement encryption operation [ver
 #2]

This patch impelements the pkey_encrypt operation.  The public key
portion extracted from the TPM key blob is used.  The operation is
performed entirely in software using the crypto API.

Signed-off-by: Denis Kenzior <denkenz@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Marcel Holtmann <marcel@holtmann.org>
Reviewed-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: James Morris <james.morris@microsoft.com>
---
 crypto/asymmetric_keys/asym_tpm.c | 84 +++++++++++++++++++++++++++++++
 1 file changed, 84 insertions(+)

diff --git a/crypto/asymmetric_keys/asym_tpm.c b/crypto/asymmetric_keys/asym_tpm.c
index 837472d107d5f..8edca3c4c193c 100644
--- a/crypto/asymmetric_keys/asym_tpm.c
+++ b/crypto/asymmetric_keys/asym_tpm.c
@@ -165,6 +165,8 @@ static int tpm_key_query(const struct kernel_pkey_params *params,
 	info->max_enc_size = len;
 	info->max_dec_size = tk->key_len / 8;
 
+	info->supported_ops = KEYCTL_SUPPORTS_ENCRYPT;
+
 	ret = 0;
 error_free_tfm:
 	crypto_free_akcipher(tfm);
@@ -172,6 +174,87 @@ static int tpm_key_query(const struct kernel_pkey_params *params,
 	return ret;
 }
 
+/*
+ * Encryption operation is performed with the public key.  Hence it is done
+ * in software
+ */
+static int tpm_key_encrypt(struct tpm_key *tk,
+			   struct kernel_pkey_params *params,
+			   const void *in, void *out)
+{
+	char alg_name[CRYPTO_MAX_ALG_NAME];
+	struct crypto_akcipher *tfm;
+	struct akcipher_request *req;
+	struct crypto_wait cwait;
+	struct scatterlist in_sg, out_sg;
+	uint8_t der_pub_key[PUB_KEY_BUF_SIZE];
+	uint32_t der_pub_key_len;
+	int ret;
+
+	pr_devel("==>%s()\n", __func__);
+
+	ret = determine_akcipher(params->encoding, params->hash_algo, alg_name);
+	if (ret < 0)
+		return ret;
+
+	tfm = crypto_alloc_akcipher(alg_name, 0, 0);
+	if (IS_ERR(tfm))
+		return PTR_ERR(tfm);
+
+	der_pub_key_len = derive_pub_key(tk->pub_key, tk->pub_key_len,
+					 der_pub_key);
+
+	ret = crypto_akcipher_set_pub_key(tfm, der_pub_key, der_pub_key_len);
+	if (ret < 0)
+		goto error_free_tfm;
+
+	req = akcipher_request_alloc(tfm, GFP_KERNEL);
+	if (!req)
+		goto error_free_tfm;
+
+	sg_init_one(&in_sg, in, params->in_len);
+	sg_init_one(&out_sg, out, params->out_len);
+	akcipher_request_set_crypt(req, &in_sg, &out_sg, params->in_len,
+				   params->out_len);
+	crypto_init_wait(&cwait);
+	akcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG |
+				      CRYPTO_TFM_REQ_MAY_SLEEP,
+				      crypto_req_done, &cwait);
+
+	ret = crypto_akcipher_encrypt(req);
+	ret = crypto_wait_req(ret, &cwait);
+
+	if (ret == 0)
+		ret = req->dst_len;
+
+	akcipher_request_free(req);
+error_free_tfm:
+	crypto_free_akcipher(tfm);
+	pr_devel("<==%s() = %d\n", __func__, ret);
+	return ret;
+}
+
+/*
+ * Do encryption, decryption and signing ops.
+ */
+static int tpm_key_eds_op(struct kernel_pkey_params *params,
+			  const void *in, void *out)
+{
+	struct tpm_key *tk = params->key->payload.data[asym_crypto];
+	int ret = -EOPNOTSUPP;
+
+	/* Perform the encryption calculation. */
+	switch (params->op) {
+	case kernel_pkey_encrypt:
+		ret = tpm_key_encrypt(tk, params, in, out);
+		break;
+	default:
+		BUG();
+	}
+
+	return ret;
+}
+
 /*
  * Parse enough information out of TPM_KEY structure:
  * TPM_STRUCT_VER -> 4 bytes
@@ -329,6 +412,7 @@ struct asymmetric_key_subtype asym_tpm_subtype = {
 	.describe		= asym_tpm_describe,
 	.destroy		= asym_tpm_destroy,
 	.query			= tpm_key_query,
+	.eds_op			= tpm_key_eds_op,
 };
 EXPORT_SYMBOL_GPL(asym_tpm_subtype);
 
-- 
GitLab


From e1ea9f86023e7668604cc6456a818e5e4d0361c9 Mon Sep 17 00:00:00 2001
From: Denis Kenzior <denkenz@gmail.com>
Date: Tue, 9 Oct 2018 17:48:41 +0100
Subject: [PATCH 0229/1890] KEYS: trusted: Expose common functionality [ver #2]

This patch exposes some common functionality needed to send TPM commands.
Several functions from keys/trusted.c are exposed for use by the new tpm
key subtype and a module dependency is introduced.

In the future, common functionality between the trusted key type and the
asym_tpm subtype should be factored out into a common utility library.

Signed-off-by: Denis Kenzior <denkenz@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Marcel Holtmann <marcel@holtmann.org>
Reviewed-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: James Morris <james.morris@microsoft.com>
---
 crypto/asymmetric_keys/Kconfig |  1 +
 security/keys/trusted.c        | 12 ++++++++----
 security/keys/trusted.h        | 14 +++++++++++++-
 3 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/crypto/asymmetric_keys/Kconfig b/crypto/asymmetric_keys/Kconfig
index 88353a9ebc9b4..be70ca6c85d31 100644
--- a/crypto/asymmetric_keys/Kconfig
+++ b/crypto/asymmetric_keys/Kconfig
@@ -24,6 +24,7 @@ config ASYMMETRIC_PUBLIC_KEY_SUBTYPE
 config ASYMMETRIC_TPM_KEY_SUBTYPE
 	tristate "Asymmetric TPM backed private key subtype"
 	depends on TCG_TPM
+	depends on TRUSTED_KEYS
 	select CRYPTO_HMAC
 	select CRYPTO_SHA1
 	select CRYPTO_HASH_INFO
diff --git a/security/keys/trusted.c b/security/keys/trusted.c
index b69d3b1777c25..1c025fdfe0e09 100644
--- a/security/keys/trusted.c
+++ b/security/keys/trusted.c
@@ -121,7 +121,7 @@ static int TSS_rawhmac(unsigned char *digest, const unsigned char *key,
 /*
  * calculate authorization info fields to send to TPM
  */
-static int TSS_authhmac(unsigned char *digest, const unsigned char *key,
+int TSS_authhmac(unsigned char *digest, const unsigned char *key,
 			unsigned int keylen, unsigned char *h1,
 			unsigned char *h2, unsigned char h3, ...)
 {
@@ -168,11 +168,12 @@ static int TSS_authhmac(unsigned char *digest, const unsigned char *key,
 	kzfree(sdesc);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(TSS_authhmac);
 
 /*
  * verify the AUTH1_COMMAND (Seal) result from TPM
  */
-static int TSS_checkhmac1(unsigned char *buffer,
+int TSS_checkhmac1(unsigned char *buffer,
 			  const uint32_t command,
 			  const unsigned char *ononce,
 			  const unsigned char *key,
@@ -249,6 +250,7 @@ static int TSS_checkhmac1(unsigned char *buffer,
 	kzfree(sdesc);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(TSS_checkhmac1);
 
 /*
  * verify the AUTH2_COMMAND (unseal) result from TPM
@@ -355,7 +357,7 @@ static int TSS_checkhmac2(unsigned char *buffer,
  * For key specific tpm requests, we will generate and send our
  * own TPM command packets using the drivers send function.
  */
-static int trusted_tpm_send(unsigned char *cmd, size_t buflen)
+int trusted_tpm_send(unsigned char *cmd, size_t buflen)
 {
 	int rc;
 
@@ -367,6 +369,7 @@ static int trusted_tpm_send(unsigned char *cmd, size_t buflen)
 		rc = -EPERM;
 	return rc;
 }
+EXPORT_SYMBOL_GPL(trusted_tpm_send);
 
 /*
  * Lock a trusted key, by extending a selected PCR.
@@ -425,7 +428,7 @@ static int osap(struct tpm_buf *tb, struct osapsess *s,
 /*
  * Create an object independent authorisation protocol (oiap) session
  */
-static int oiap(struct tpm_buf *tb, uint32_t *handle, unsigned char *nonce)
+int oiap(struct tpm_buf *tb, uint32_t *handle, unsigned char *nonce)
 {
 	int ret;
 
@@ -442,6 +445,7 @@ static int oiap(struct tpm_buf *tb, uint32_t *handle, unsigned char *nonce)
 	       TPM_NONCE_SIZE);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(oiap);
 
 struct tpm_digests {
 	unsigned char encauth[SHA1_DIGEST_SIZE];
diff --git a/security/keys/trusted.h b/security/keys/trusted.h
index 8d5fe9eafb22a..adbcb68178260 100644
--- a/security/keys/trusted.h
+++ b/security/keys/trusted.h
@@ -3,7 +3,7 @@
 #define __TRUSTED_KEY_H
 
 /* implementation specific TPM constants */
-#define MAX_BUF_SIZE			512
+#define MAX_BUF_SIZE			1024
 #define TPM_GETRANDOM_SIZE		14
 #define TPM_OSAP_SIZE			36
 #define TPM_OIAP_SIZE			10
@@ -36,6 +36,18 @@ enum {
 	SRK_keytype = 4
 };
 
+int TSS_authhmac(unsigned char *digest, const unsigned char *key,
+			unsigned int keylen, unsigned char *h1,
+			unsigned char *h2, unsigned char h3, ...);
+int TSS_checkhmac1(unsigned char *buffer,
+			  const uint32_t command,
+			  const unsigned char *ononce,
+			  const unsigned char *key,
+			  unsigned int keylen, ...);
+
+int trusted_tpm_send(unsigned char *cmd, size_t buflen);
+int oiap(struct tpm_buf *tb, uint32_t *handle, unsigned char *nonce);
+
 #define TPM_DEBUG 0
 
 #if TPM_DEBUG
-- 
GitLab


From 22447981fc050b5f1bdd0e7cbee89b4152a2b2d8 Mon Sep 17 00:00:00 2001
From: Denis Kenzior <denkenz@gmail.com>
Date: Tue, 9 Oct 2018 17:48:49 +0100
Subject: [PATCH 0230/1890] KEYS: Move trusted.h to include/keys [ver #2]

Signed-off-by: Denis Kenzior <denkenz@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Marcel Holtmann <marcel@holtmann.org>
Reviewed-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: James Morris <james.morris@microsoft.com>
---
 {security => include}/keys/trusted.h | 0
 security/keys/trusted.c              | 2 +-
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename {security => include}/keys/trusted.h (100%)

diff --git a/security/keys/trusted.h b/include/keys/trusted.h
similarity index 100%
rename from security/keys/trusted.h
rename to include/keys/trusted.h
diff --git a/security/keys/trusted.c b/security/keys/trusted.c
index 1c025fdfe0e09..ff6789365a12f 100644
--- a/security/keys/trusted.c
+++ b/security/keys/trusted.c
@@ -30,7 +30,7 @@
 #include <linux/tpm.h>
 #include <linux/tpm_command.h>
 
-#include "trusted.h"
+#include <keys/trusted.h>
 
 static const char hmac_alg[] = "hmac(sha1)";
 static const char hash_alg[] = "sha1";
-- 
GitLab


From 0c36264aa1d5b77eb21f646f83223134e4528cfe Mon Sep 17 00:00:00 2001
From: Denis Kenzior <denkenz@gmail.com>
Date: Tue, 9 Oct 2018 17:48:58 +0100
Subject: [PATCH 0231/1890] KEYS: asym_tpm: Add loadkey2 and flushspecific [ver
 #2]

This commit adds TPM_LoadKey2 and TPM_FlushSpecific operations.

Signed-off-by: Denis Kenzior <denkenz@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: James Morris <james.morris@microsoft.com>
Tested-by: Marcel Holtmann <marcel@holtmann.org>
Reviewed-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: James Morris <james.morris@microsoft.com>
---
 crypto/asymmetric_keys/asym_tpm.c | 94 +++++++++++++++++++++++++++++++
 1 file changed, 94 insertions(+)

diff --git a/crypto/asymmetric_keys/asym_tpm.c b/crypto/asymmetric_keys/asym_tpm.c
index 8edca3c4c193c..6a2d33014eccc 100644
--- a/crypto/asymmetric_keys/asym_tpm.c
+++ b/crypto/asymmetric_keys/asym_tpm.c
@@ -7,11 +7,105 @@
 #include <linux/seq_file.h>
 #include <linux/scatterlist.h>
 #include <linux/tpm.h>
+#include <linux/tpm_command.h>
 #include <crypto/akcipher.h>
+#include <crypto/hash.h>
+#include <crypto/sha.h>
 #include <asm/unaligned.h>
 #include <keys/asymmetric-subtype.h>
+#include <keys/trusted.h>
 #include <crypto/asym_tpm_subtype.h>
 
+#define TPM_ORD_FLUSHSPECIFIC	186
+#define TPM_ORD_LOADKEY2	65
+#define TPM_LOADKEY2_SIZE		59
+#define TPM_FLUSHSPECIFIC_SIZE		18
+
+#define TPM_RT_KEY                      0x00000001
+
+/*
+ * Load a TPM key from the blob provided by userspace
+ */
+static int tpm_loadkey2(struct tpm_buf *tb,
+			uint32_t keyhandle, unsigned char *keyauth,
+			const unsigned char *keyblob, int keybloblen,
+			uint32_t *newhandle)
+{
+	unsigned char nonceodd[TPM_NONCE_SIZE];
+	unsigned char enonce[TPM_NONCE_SIZE];
+	unsigned char authdata[SHA1_DIGEST_SIZE];
+	uint32_t authhandle = 0;
+	unsigned char cont = 0;
+	uint32_t ordinal;
+	int ret;
+
+	ordinal = htonl(TPM_ORD_LOADKEY2);
+
+	/* session for loading the key */
+	ret = oiap(tb, &authhandle, enonce);
+	if (ret < 0) {
+		pr_info("oiap failed (%d)\n", ret);
+		return ret;
+	}
+
+	/* generate odd nonce */
+	ret = tpm_get_random(NULL, nonceodd, TPM_NONCE_SIZE);
+	if (ret < 0) {
+		pr_info("tpm_get_random failed (%d)\n", ret);
+		return ret;
+	}
+
+	/* calculate authorization HMAC value */
+	ret = TSS_authhmac(authdata, keyauth, SHA1_DIGEST_SIZE, enonce,
+			   nonceodd, cont, sizeof(uint32_t), &ordinal,
+			   keybloblen, keyblob, 0, 0);
+	if (ret < 0)
+		return ret;
+
+	/* build the request buffer */
+	INIT_BUF(tb);
+	store16(tb, TPM_TAG_RQU_AUTH1_COMMAND);
+	store32(tb, TPM_LOADKEY2_SIZE + keybloblen);
+	store32(tb, TPM_ORD_LOADKEY2);
+	store32(tb, keyhandle);
+	storebytes(tb, keyblob, keybloblen);
+	store32(tb, authhandle);
+	storebytes(tb, nonceodd, TPM_NONCE_SIZE);
+	store8(tb, cont);
+	storebytes(tb, authdata, SHA1_DIGEST_SIZE);
+
+	ret = trusted_tpm_send(tb->data, MAX_BUF_SIZE);
+	if (ret < 0) {
+		pr_info("authhmac failed (%d)\n", ret);
+		return ret;
+	}
+
+	ret = TSS_checkhmac1(tb->data, ordinal, nonceodd, keyauth,
+			     SHA1_DIGEST_SIZE, 0, 0);
+	if (ret < 0) {
+		pr_info("TSS_checkhmac1 failed (%d)\n", ret);
+		return ret;
+	}
+
+	*newhandle = LOAD32(tb->data, TPM_DATA_OFFSET);
+	return 0;
+}
+
+/*
+ * Execute the FlushSpecific TPM command
+ */
+static int tpm_flushspecific(struct tpm_buf *tb, uint32_t handle)
+{
+	INIT_BUF(tb);
+	store16(tb, TPM_TAG_RQU_COMMAND);
+	store32(tb, TPM_FLUSHSPECIFIC_SIZE);
+	store32(tb, TPM_ORD_FLUSHSPECIFIC);
+	store32(tb, handle);
+	store32(tb, TPM_RT_KEY);
+
+	return trusted_tpm_send(tb->data, MAX_BUF_SIZE);
+}
+
 /*
  * Maximum buffer size for the BER/DER encoded public key.  The public key
  * is of the form SEQUENCE { INTEGER n, INTEGER e } where n is a maximum 2048
-- 
GitLab


From f884fe5a158f750e232b029e1fac0283e388e062 Mon Sep 17 00:00:00 2001
From: Denis Kenzior <denkenz@gmail.com>
Date: Tue, 9 Oct 2018 17:49:05 +0100
Subject: [PATCH 0232/1890] KEYS: asym_tpm: Implement tpm_unbind [ver #2]

Signed-off-by: Denis Kenzior <denkenz@gmail.com>
Reviewed-by: James Morris <james.morris@microsoft.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Marcel Holtmann <marcel@holtmann.org>
Reviewed-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: James Morris <james.morris@microsoft.com>
---
 crypto/asymmetric_keys/asym_tpm.c | 82 +++++++++++++++++++++++++++++++
 1 file changed, 82 insertions(+)

diff --git a/crypto/asymmetric_keys/asym_tpm.c b/crypto/asymmetric_keys/asym_tpm.c
index 6a2d33014eccc..e893b52122226 100644
--- a/crypto/asymmetric_keys/asym_tpm.c
+++ b/crypto/asymmetric_keys/asym_tpm.c
@@ -18,8 +18,10 @@
 
 #define TPM_ORD_FLUSHSPECIFIC	186
 #define TPM_ORD_LOADKEY2	65
+#define TPM_ORD_UNBIND		30
 #define TPM_LOADKEY2_SIZE		59
 #define TPM_FLUSHSPECIFIC_SIZE		18
+#define TPM_UNBIND_SIZE			63
 
 #define TPM_RT_KEY                      0x00000001
 
@@ -106,6 +108,86 @@ static int tpm_flushspecific(struct tpm_buf *tb, uint32_t handle)
 	return trusted_tpm_send(tb->data, MAX_BUF_SIZE);
 }
 
+/*
+ * Decrypt a blob provided by userspace using a specific key handle.
+ * The handle is a well known handle or previously loaded by e.g. LoadKey2
+ */
+static int tpm_unbind(struct tpm_buf *tb,
+			uint32_t keyhandle, unsigned char *keyauth,
+			const unsigned char *blob, uint32_t bloblen,
+			void *out, uint32_t outlen)
+{
+	unsigned char nonceodd[TPM_NONCE_SIZE];
+	unsigned char enonce[TPM_NONCE_SIZE];
+	unsigned char authdata[SHA1_DIGEST_SIZE];
+	uint32_t authhandle = 0;
+	unsigned char cont = 0;
+	uint32_t ordinal;
+	uint32_t datalen;
+	int ret;
+
+	ordinal = htonl(TPM_ORD_UNBIND);
+	datalen = htonl(bloblen);
+
+	/* session for loading the key */
+	ret = oiap(tb, &authhandle, enonce);
+	if (ret < 0) {
+		pr_info("oiap failed (%d)\n", ret);
+		return ret;
+	}
+
+	/* generate odd nonce */
+	ret = tpm_get_random(NULL, nonceodd, TPM_NONCE_SIZE);
+	if (ret < 0) {
+		pr_info("tpm_get_random failed (%d)\n", ret);
+		return ret;
+	}
+
+	/* calculate authorization HMAC value */
+	ret = TSS_authhmac(authdata, keyauth, SHA1_DIGEST_SIZE, enonce,
+			   nonceodd, cont, sizeof(uint32_t), &ordinal,
+			   sizeof(uint32_t), &datalen,
+			   bloblen, blob, 0, 0);
+	if (ret < 0)
+		return ret;
+
+	/* build the request buffer */
+	INIT_BUF(tb);
+	store16(tb, TPM_TAG_RQU_AUTH1_COMMAND);
+	store32(tb, TPM_UNBIND_SIZE + bloblen);
+	store32(tb, TPM_ORD_UNBIND);
+	store32(tb, keyhandle);
+	store32(tb, bloblen);
+	storebytes(tb, blob, bloblen);
+	store32(tb, authhandle);
+	storebytes(tb, nonceodd, TPM_NONCE_SIZE);
+	store8(tb, cont);
+	storebytes(tb, authdata, SHA1_DIGEST_SIZE);
+
+	ret = trusted_tpm_send(tb->data, MAX_BUF_SIZE);
+	if (ret < 0) {
+		pr_info("authhmac failed (%d)\n", ret);
+		return ret;
+	}
+
+	datalen = LOAD32(tb->data, TPM_DATA_OFFSET);
+
+	ret = TSS_checkhmac1(tb->data, ordinal, nonceodd,
+			     keyauth, SHA1_DIGEST_SIZE,
+			     sizeof(uint32_t), TPM_DATA_OFFSET,
+			     datalen, TPM_DATA_OFFSET + sizeof(uint32_t),
+			     0, 0);
+	if (ret < 0) {
+		pr_info("TSS_checkhmac1 failed (%d)\n", ret);
+		return ret;
+	}
+
+	memcpy(out, tb->data + TPM_DATA_OFFSET + sizeof(uint32_t),
+	       min(outlen, datalen));
+
+	return datalen;
+}
+
 /*
  * Maximum buffer size for the BER/DER encoded public key.  The public key
  * is of the form SEQUENCE { INTEGER n, INTEGER e } where n is a maximum 2048
-- 
GitLab


From a335974ae0883e045151a2160093a22aa02c3626 Mon Sep 17 00:00:00 2001
From: Denis Kenzior <denkenz@gmail.com>
Date: Tue, 9 Oct 2018 17:49:13 +0100
Subject: [PATCH 0233/1890] KEYS: asym_tpm: Implement the decrypt operation
 [ver #2]

This patch implements the pkey_decrypt operation using the private key
blob.  The blob is first loaded into the TPM via tpm_loadkey2.  Once the
handle is obtained, tpm_unbind operation is used to decrypt the data on
the TPM and the result is returned.  The key loaded by tpm_loadkey2 is
then evicted via tpm_flushspecific operation.

This patch assumes that the SRK authorization is a well known 20-byte of
zeros and the same holds for the key authorization of the provided key.

Signed-off-by: Denis Kenzior <denkenz@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Marcel Holtmann <marcel@holtmann.org>
Reviewed-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: James Morris <james.morris@microsoft.com>
---
 crypto/asymmetric_keys/asym_tpm.c | 58 ++++++++++++++++++++++++++++++-
 1 file changed, 57 insertions(+), 1 deletion(-)

diff --git a/crypto/asymmetric_keys/asym_tpm.c b/crypto/asymmetric_keys/asym_tpm.c
index e893b52122226..6f5d5cf98910d 100644
--- a/crypto/asymmetric_keys/asym_tpm.c
+++ b/crypto/asymmetric_keys/asym_tpm.c
@@ -341,7 +341,8 @@ static int tpm_key_query(const struct kernel_pkey_params *params,
 	info->max_enc_size = len;
 	info->max_dec_size = tk->key_len / 8;
 
-	info->supported_ops = KEYCTL_SUPPORTS_ENCRYPT;
+	info->supported_ops = KEYCTL_SUPPORTS_ENCRYPT |
+			      KEYCTL_SUPPORTS_DECRYPT;
 
 	ret = 0;
 error_free_tfm:
@@ -410,6 +411,58 @@ static int tpm_key_encrypt(struct tpm_key *tk,
 	return ret;
 }
 
+/*
+ * Decryption operation is performed with the private key in the TPM.
+ */
+static int tpm_key_decrypt(struct tpm_key *tk,
+			   struct kernel_pkey_params *params,
+			   const void *in, void *out)
+{
+	struct tpm_buf *tb;
+	uint32_t keyhandle;
+	uint8_t srkauth[SHA1_DIGEST_SIZE];
+	uint8_t keyauth[SHA1_DIGEST_SIZE];
+	int r;
+
+	pr_devel("==>%s()\n", __func__);
+
+	if (params->hash_algo)
+		return -ENOPKG;
+
+	if (strcmp(params->encoding, "pkcs1"))
+		return -ENOPKG;
+
+	tb = kzalloc(sizeof(*tb), GFP_KERNEL);
+	if (!tb)
+		return -ENOMEM;
+
+	/* TODO: Handle a non-all zero SRK authorization */
+	memset(srkauth, 0, sizeof(srkauth));
+
+	r = tpm_loadkey2(tb, SRKHANDLE, srkauth,
+				tk->blob, tk->blob_len, &keyhandle);
+	if (r < 0) {
+		pr_devel("loadkey2 failed (%d)\n", r);
+		goto error;
+	}
+
+	/* TODO: Handle a non-all zero key authorization */
+	memset(keyauth, 0, sizeof(keyauth));
+
+	r = tpm_unbind(tb, keyhandle, keyauth,
+		       in, params->in_len, out, params->out_len);
+	if (r < 0)
+		pr_devel("tpm_unbind failed (%d)\n", r);
+
+	if (tpm_flushspecific(tb, keyhandle) < 0)
+		pr_devel("flushspecific failed (%d)\n", r);
+
+error:
+	kzfree(tb);
+	pr_devel("<==%s() = %d\n", __func__, r);
+	return r;
+}
+
 /*
  * Do encryption, decryption and signing ops.
  */
@@ -424,6 +477,9 @@ static int tpm_key_eds_op(struct kernel_pkey_params *params,
 	case kernel_pkey_encrypt:
 		ret = tpm_key_encrypt(tk, params, in, out);
 		break;
+	case kernel_pkey_decrypt:
+		ret = tpm_key_decrypt(tk, params, in, out);
+		break;
 	default:
 		BUG();
 	}
-- 
GitLab


From e08e6891231f5fae82a6ffb4affdfa2ced8c1a77 Mon Sep 17 00:00:00 2001
From: Denis Kenzior <denkenz@gmail.com>
Date: Tue, 9 Oct 2018 17:49:20 +0100
Subject: [PATCH 0234/1890] KEYS: asym_tpm: Implement signature verification
 [ver #2]

This patch implements the verify_signature operation.  The public key
portion extracted from the TPM key blob is used.  The operation is
performed entirely in software using the crypto API.

Signed-off-by: Denis Kenzior <denkenz@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Marcel Holtmann <marcel@holtmann.org>
Reviewed-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: James Morris <james.morris@microsoft.com>
---
 crypto/asymmetric_keys/asym_tpm.c | 106 ++++++++++++++++++++++++++++--
 1 file changed, 100 insertions(+), 6 deletions(-)

diff --git a/crypto/asymmetric_keys/asym_tpm.c b/crypto/asymmetric_keys/asym_tpm.c
index 6f5d5cf98910d..a38ba375675ee 100644
--- a/crypto/asymmetric_keys/asym_tpm.c
+++ b/crypto/asymmetric_keys/asym_tpm.c
@@ -15,6 +15,7 @@
 #include <keys/asymmetric-subtype.h>
 #include <keys/trusted.h>
 #include <crypto/asym_tpm_subtype.h>
+#include <crypto/public_key.h>
 
 #define TPM_ORD_FLUSHSPECIFIC	186
 #define TPM_ORD_LOADKEY2	65
@@ -286,12 +287,16 @@ static uint32_t derive_pub_key(const void *pub_key, uint32_t len, uint8_t *buf)
 static int determine_akcipher(const char *encoding, const char *hash_algo,
 			      char alg_name[CRYPTO_MAX_ALG_NAME])
 {
-	/* TODO: We don't support hashing yet */
-	if (hash_algo)
-		return -ENOPKG;
-
 	if (strcmp(encoding, "pkcs1") == 0) {
-		strcpy(alg_name, "pkcs1pad(rsa)");
+		if (!hash_algo) {
+			strcpy(alg_name, "pkcs1pad(rsa)");
+			return 0;
+		}
+
+		if (snprintf(alg_name, CRYPTO_MAX_ALG_NAME, "pkcs1pad(rsa,%s)",
+			     hash_algo) >= CRYPTO_MAX_ALG_NAME)
+			return -EINVAL;
+
 		return 0;
 	}
 
@@ -342,7 +347,8 @@ static int tpm_key_query(const struct kernel_pkey_params *params,
 	info->max_dec_size = tk->key_len / 8;
 
 	info->supported_ops = KEYCTL_SUPPORTS_ENCRYPT |
-			      KEYCTL_SUPPORTS_DECRYPT;
+			      KEYCTL_SUPPORTS_DECRYPT |
+			      KEYCTL_SUPPORTS_VERIFY;
 
 	ret = 0;
 error_free_tfm:
@@ -487,6 +493,93 @@ static int tpm_key_eds_op(struct kernel_pkey_params *params,
 	return ret;
 }
 
+/*
+ * Verify a signature using a public key.
+ */
+static int tpm_key_verify_signature(const struct key *key,
+				    const struct public_key_signature *sig)
+{
+	const struct tpm_key *tk = key->payload.data[asym_crypto];
+	struct crypto_wait cwait;
+	struct crypto_akcipher *tfm;
+	struct akcipher_request *req;
+	struct scatterlist sig_sg, digest_sg;
+	char alg_name[CRYPTO_MAX_ALG_NAME];
+	uint8_t der_pub_key[PUB_KEY_BUF_SIZE];
+	uint32_t der_pub_key_len;
+	void *output;
+	unsigned int outlen;
+	int ret;
+
+	pr_devel("==>%s()\n", __func__);
+
+	BUG_ON(!tk);
+	BUG_ON(!sig);
+	BUG_ON(!sig->s);
+
+	if (!sig->digest)
+		return -ENOPKG;
+
+	ret = determine_akcipher(sig->encoding, sig->hash_algo, alg_name);
+	if (ret < 0)
+		return ret;
+
+	tfm = crypto_alloc_akcipher(alg_name, 0, 0);
+	if (IS_ERR(tfm))
+		return PTR_ERR(tfm);
+
+	der_pub_key_len = derive_pub_key(tk->pub_key, tk->pub_key_len,
+					 der_pub_key);
+
+	ret = crypto_akcipher_set_pub_key(tfm, der_pub_key, der_pub_key_len);
+	if (ret < 0)
+		goto error_free_tfm;
+
+	ret = -ENOMEM;
+	req = akcipher_request_alloc(tfm, GFP_KERNEL);
+	if (!req)
+		goto error_free_tfm;
+
+	ret = -ENOMEM;
+	outlen = crypto_akcipher_maxsize(tfm);
+	output = kmalloc(outlen, GFP_KERNEL);
+	if (!output)
+		goto error_free_req;
+
+	sg_init_one(&sig_sg, sig->s, sig->s_size);
+	sg_init_one(&digest_sg, output, outlen);
+	akcipher_request_set_crypt(req, &sig_sg, &digest_sg, sig->s_size,
+				   outlen);
+	crypto_init_wait(&cwait);
+	akcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG |
+				      CRYPTO_TFM_REQ_MAY_SLEEP,
+				      crypto_req_done, &cwait);
+
+	/* Perform the verification calculation.  This doesn't actually do the
+	 * verification, but rather calculates the hash expected by the
+	 * signature and returns that to us.
+	 */
+	ret = crypto_wait_req(crypto_akcipher_verify(req), &cwait);
+	if (ret)
+		goto out_free_output;
+
+	/* Do the actual verification step. */
+	if (req->dst_len != sig->digest_size ||
+	    memcmp(sig->digest, output, sig->digest_size) != 0)
+		ret = -EKEYREJECTED;
+
+out_free_output:
+	kfree(output);
+error_free_req:
+	akcipher_request_free(req);
+error_free_tfm:
+	crypto_free_akcipher(tfm);
+	pr_devel("<==%s() = %d\n", __func__, ret);
+	if (WARN_ON_ONCE(ret > 0))
+		ret = -EINVAL;
+	return ret;
+}
+
 /*
  * Parse enough information out of TPM_KEY structure:
  * TPM_STRUCT_VER -> 4 bytes
@@ -645,6 +738,7 @@ struct asymmetric_key_subtype asym_tpm_subtype = {
 	.destroy		= asym_tpm_destroy,
 	.query			= tpm_key_query,
 	.eds_op			= tpm_key_eds_op,
+	.verify_signature	= tpm_key_verify_signature,
 };
 EXPORT_SYMBOL_GPL(asym_tpm_subtype);
 
-- 
GitLab


From e73d170f6c77e7006b48c5e9c325fe520f6012ca Mon Sep 17 00:00:00 2001
From: Denis Kenzior <denkenz@gmail.com>
Date: Tue, 9 Oct 2018 17:49:28 +0100
Subject: [PATCH 0235/1890] KEYS: asym_tpm: Implement tpm_sign [ver #2]

Signed-off-by: Denis Kenzior <denkenz@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Marcel Holtmann <marcel@holtmann.org>
Reviewed-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: James Morris <james.morris@microsoft.com>
---
 crypto/asymmetric_keys/asym_tpm.c | 87 +++++++++++++++++++++++++++++++
 1 file changed, 87 insertions(+)

diff --git a/crypto/asymmetric_keys/asym_tpm.c b/crypto/asymmetric_keys/asym_tpm.c
index a38ba375675ee..a5a5f913a74f3 100644
--- a/crypto/asymmetric_keys/asym_tpm.c
+++ b/crypto/asymmetric_keys/asym_tpm.c
@@ -20,9 +20,11 @@
 #define TPM_ORD_FLUSHSPECIFIC	186
 #define TPM_ORD_LOADKEY2	65
 #define TPM_ORD_UNBIND		30
+#define TPM_ORD_SIGN		60
 #define TPM_LOADKEY2_SIZE		59
 #define TPM_FLUSHSPECIFIC_SIZE		18
 #define TPM_UNBIND_SIZE			63
+#define TPM_SIGN_SIZE			63
 
 #define TPM_RT_KEY                      0x00000001
 
@@ -189,6 +191,91 @@ static int tpm_unbind(struct tpm_buf *tb,
 	return datalen;
 }
 
+/*
+ * Sign a blob provided by userspace (that has had the hash function applied)
+ * using a specific key handle.  The handle is assumed to have been previously
+ * loaded by e.g. LoadKey2.
+ *
+ * Note that the key signature scheme of the used key should be set to
+ * TPM_SS_RSASSAPKCS1v15_DER.  This allows the hashed input to be of any size
+ * up to key_length_in_bytes - 11 and not be limited to size 20 like the
+ * TPM_SS_RSASSAPKCS1v15_SHA1 signature scheme.
+ */
+static int tpm_sign(struct tpm_buf *tb,
+		    uint32_t keyhandle, unsigned char *keyauth,
+		    const unsigned char *blob, uint32_t bloblen,
+		    void *out, uint32_t outlen)
+{
+	unsigned char nonceodd[TPM_NONCE_SIZE];
+	unsigned char enonce[TPM_NONCE_SIZE];
+	unsigned char authdata[SHA1_DIGEST_SIZE];
+	uint32_t authhandle = 0;
+	unsigned char cont = 0;
+	uint32_t ordinal;
+	uint32_t datalen;
+	int ret;
+
+	ordinal = htonl(TPM_ORD_SIGN);
+	datalen = htonl(bloblen);
+
+	/* session for loading the key */
+	ret = oiap(tb, &authhandle, enonce);
+	if (ret < 0) {
+		pr_info("oiap failed (%d)\n", ret);
+		return ret;
+	}
+
+	/* generate odd nonce */
+	ret = tpm_get_random(NULL, nonceodd, TPM_NONCE_SIZE);
+	if (ret < 0) {
+		pr_info("tpm_get_random failed (%d)\n", ret);
+		return ret;
+	}
+
+	/* calculate authorization HMAC value */
+	ret = TSS_authhmac(authdata, keyauth, SHA1_DIGEST_SIZE, enonce,
+			   nonceodd, cont, sizeof(uint32_t), &ordinal,
+			   sizeof(uint32_t), &datalen,
+			   bloblen, blob, 0, 0);
+	if (ret < 0)
+		return ret;
+
+	/* build the request buffer */
+	INIT_BUF(tb);
+	store16(tb, TPM_TAG_RQU_AUTH1_COMMAND);
+	store32(tb, TPM_SIGN_SIZE + bloblen);
+	store32(tb, TPM_ORD_SIGN);
+	store32(tb, keyhandle);
+	store32(tb, bloblen);
+	storebytes(tb, blob, bloblen);
+	store32(tb, authhandle);
+	storebytes(tb, nonceodd, TPM_NONCE_SIZE);
+	store8(tb, cont);
+	storebytes(tb, authdata, SHA1_DIGEST_SIZE);
+
+	ret = trusted_tpm_send(tb->data, MAX_BUF_SIZE);
+	if (ret < 0) {
+		pr_info("authhmac failed (%d)\n", ret);
+		return ret;
+	}
+
+	datalen = LOAD32(tb->data, TPM_DATA_OFFSET);
+
+	ret = TSS_checkhmac1(tb->data, ordinal, nonceodd,
+			     keyauth, SHA1_DIGEST_SIZE,
+			     sizeof(uint32_t), TPM_DATA_OFFSET,
+			     datalen, TPM_DATA_OFFSET + sizeof(uint32_t),
+			     0, 0);
+	if (ret < 0) {
+		pr_info("TSS_checkhmac1 failed (%d)\n", ret);
+		return ret;
+	}
+
+	memcpy(out, tb->data + TPM_DATA_OFFSET + sizeof(uint32_t),
+	       min(datalen, outlen));
+
+	return datalen;
+}
 /*
  * Maximum buffer size for the BER/DER encoded public key.  The public key
  * is of the form SEQUENCE { INTEGER n, INTEGER e } where n is a maximum 2048
-- 
GitLab


From 64ae16dfeefec670276607fa789ce096c7ebd7c4 Mon Sep 17 00:00:00 2001
From: Denis Kenzior <denkenz@gmail.com>
Date: Tue, 9 Oct 2018 17:49:35 +0100
Subject: [PATCH 0236/1890] KEYS: asym_tpm: Add support for the sign operation
 [ver #2]

The sign operation can operate in a non-hashed mode by running the RSA
sign operation directly on the input.  This assumes that the input is
less than key_size_in_bytes - 11.  Since the TPM performs its own PKCS1
padding, it isn't possible to support 'raw' mode, only 'pkcs1'.

Alternatively, a hashed version is also possible.  In this variant the
input is hashed (by userspace) via the selected hash function first.
Then this implementation takes care of converting the hash to ASN.1
format and the sign operation is performed on the result.  This is
similar to the implementation inside crypto/rsa-pkcs1pad.c.

ASN1 templates were copied from crypto/rsa-pkcs1pad.c.  There seems to
be no easy way to expose that functionality, but likely the templates
should be shared somehow.

The sign operation is implemented via TPM_Sign operation on the TPM.
It is assumed that the TPM wrapped key provided uses
TPM_SS_RSASSAPKCS1v15_DER signature scheme.  This allows the TPM_Sign
operation to work on data up to key_len_in_bytes - 11 bytes long.

In theory, we could also use TPM_Unbind instead of TPM_Sign, but we would
have to manually pkcs1 pad the digest first.

Signed-off-by: Denis Kenzior <denkenz@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Marcel Holtmann <marcel@holtmann.org>
Reviewed-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: James Morris <james.morris@microsoft.com>
---
 crypto/asymmetric_keys/asym_tpm.c | 156 +++++++++++++++++++++++++++++-
 1 file changed, 155 insertions(+), 1 deletion(-)

diff --git a/crypto/asymmetric_keys/asym_tpm.c b/crypto/asymmetric_keys/asym_tpm.c
index a5a5f913a74f3..5d4c270463f60 100644
--- a/crypto/asymmetric_keys/asym_tpm.c
+++ b/crypto/asymmetric_keys/asym_tpm.c
@@ -435,7 +435,8 @@ static int tpm_key_query(const struct kernel_pkey_params *params,
 
 	info->supported_ops = KEYCTL_SUPPORTS_ENCRYPT |
 			      KEYCTL_SUPPORTS_DECRYPT |
-			      KEYCTL_SUPPORTS_VERIFY;
+			      KEYCTL_SUPPORTS_VERIFY |
+			      KEYCTL_SUPPORTS_SIGN;
 
 	ret = 0;
 error_free_tfm:
@@ -556,6 +557,156 @@ static int tpm_key_decrypt(struct tpm_key *tk,
 	return r;
 }
 
+/*
+ * Hash algorithm OIDs plus ASN.1 DER wrappings [RFC4880 sec 5.2.2].
+ */
+static const u8 digest_info_md5[] = {
+	0x30, 0x20, 0x30, 0x0c, 0x06, 0x08,
+	0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x02, 0x05, /* OID */
+	0x05, 0x00, 0x04, 0x10
+};
+
+static const u8 digest_info_sha1[] = {
+	0x30, 0x21, 0x30, 0x09, 0x06, 0x05,
+	0x2b, 0x0e, 0x03, 0x02, 0x1a,
+	0x05, 0x00, 0x04, 0x14
+};
+
+static const u8 digest_info_rmd160[] = {
+	0x30, 0x21, 0x30, 0x09, 0x06, 0x05,
+	0x2b, 0x24, 0x03, 0x02, 0x01,
+	0x05, 0x00, 0x04, 0x14
+};
+
+static const u8 digest_info_sha224[] = {
+	0x30, 0x2d, 0x30, 0x0d, 0x06, 0x09,
+	0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x04,
+	0x05, 0x00, 0x04, 0x1c
+};
+
+static const u8 digest_info_sha256[] = {
+	0x30, 0x31, 0x30, 0x0d, 0x06, 0x09,
+	0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x01,
+	0x05, 0x00, 0x04, 0x20
+};
+
+static const u8 digest_info_sha384[] = {
+	0x30, 0x41, 0x30, 0x0d, 0x06, 0x09,
+	0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x02,
+	0x05, 0x00, 0x04, 0x30
+};
+
+static const u8 digest_info_sha512[] = {
+	0x30, 0x51, 0x30, 0x0d, 0x06, 0x09,
+	0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x03,
+	0x05, 0x00, 0x04, 0x40
+};
+
+static const struct asn1_template {
+	const char	*name;
+	const u8	*data;
+	size_t		size;
+} asn1_templates[] = {
+#define _(X) { #X, digest_info_##X, sizeof(digest_info_##X) }
+	_(md5),
+	_(sha1),
+	_(rmd160),
+	_(sha256),
+	_(sha384),
+	_(sha512),
+	_(sha224),
+	{ NULL }
+#undef _
+};
+
+static const struct asn1_template *lookup_asn1(const char *name)
+{
+	const struct asn1_template *p;
+
+	for (p = asn1_templates; p->name; p++)
+		if (strcmp(name, p->name) == 0)
+			return p;
+	return NULL;
+}
+
+/*
+ * Sign operation is performed with the private key in the TPM.
+ */
+static int tpm_key_sign(struct tpm_key *tk,
+			struct kernel_pkey_params *params,
+			const void *in, void *out)
+{
+	struct tpm_buf *tb;
+	uint32_t keyhandle;
+	uint8_t srkauth[SHA1_DIGEST_SIZE];
+	uint8_t keyauth[SHA1_DIGEST_SIZE];
+	void *asn1_wrapped = NULL;
+	uint32_t in_len = params->in_len;
+	int r;
+
+	pr_devel("==>%s()\n", __func__);
+
+	if (strcmp(params->encoding, "pkcs1"))
+		return -ENOPKG;
+
+	if (params->hash_algo) {
+		const struct asn1_template *asn1 =
+						lookup_asn1(params->hash_algo);
+
+		if (!asn1)
+			return -ENOPKG;
+
+		/* request enough space for the ASN.1 template + input hash */
+		asn1_wrapped = kzalloc(in_len + asn1->size, GFP_KERNEL);
+		if (!asn1_wrapped)
+			return -ENOMEM;
+
+		/* Copy ASN.1 template, then the input */
+		memcpy(asn1_wrapped, asn1->data, asn1->size);
+		memcpy(asn1_wrapped + asn1->size, in, in_len);
+
+		in = asn1_wrapped;
+		in_len += asn1->size;
+	}
+
+	if (in_len > tk->key_len / 8 - 11) {
+		r = -EOVERFLOW;
+		goto error_free_asn1_wrapped;
+	}
+
+	r = -ENOMEM;
+	tb = kzalloc(sizeof(*tb), GFP_KERNEL);
+	if (!tb)
+		goto error_free_asn1_wrapped;
+
+	/* TODO: Handle a non-all zero SRK authorization */
+	memset(srkauth, 0, sizeof(srkauth));
+
+	r = tpm_loadkey2(tb, SRKHANDLE, srkauth,
+			 tk->blob, tk->blob_len, &keyhandle);
+	if (r < 0) {
+		pr_devel("loadkey2 failed (%d)\n", r);
+		goto error_free_tb;
+	}
+
+	/* TODO: Handle a non-all zero key authorization */
+	memset(keyauth, 0, sizeof(keyauth));
+
+	r = tpm_sign(tb, keyhandle, keyauth, in, in_len, out, params->out_len);
+	if (r < 0)
+		pr_devel("tpm_sign failed (%d)\n", r);
+
+	if (tpm_flushspecific(tb, keyhandle) < 0)
+		pr_devel("flushspecific failed (%d)\n", r);
+
+error_free_tb:
+	kzfree(tb);
+error_free_asn1_wrapped:
+	kfree(asn1_wrapped);
+	pr_devel("<==%s() = %d\n", __func__, r);
+	return r;
+}
+
 /*
  * Do encryption, decryption and signing ops.
  */
@@ -573,6 +724,9 @@ static int tpm_key_eds_op(struct kernel_pkey_params *params,
 	case kernel_pkey_decrypt:
 		ret = tpm_key_decrypt(tk, params, in, out);
 		break;
+	case kernel_pkey_sign:
+		ret = tpm_key_sign(tk, params, in, out);
+		break;
 	default:
 		BUG();
 	}
-- 
GitLab


From cc4ebf5c0a3440ed0a32d25c55ebdb6ce5f3c0bc Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Fri, 19 Oct 2018 06:54:54 +0000
Subject: [PATCH 0237/1890] Revert "powerpc/8xx: Use L1 entry APG to handle
 _PAGE_ACCESSED for CONFIG_SWAP"

This reverts commit 4f94b2c7462d9720b2afa7e8e8d4c19446bb31ce.

That commit was buggy, as it used rlwinm instead of rlwimi.
Instead of fixing that bug, we revert the previous commit in order to
reduce the dependency between L1 entries and L2 entries

Fixes: 4f94b2c7462d9 ("powerpc/8xx: Use L1 entry APG to handle _PAGE_ACCESSED for CONFIG_SWAP")
Cc: stable@vger.kernel.org
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/mmu-8xx.h | 34 ++++------------------
 arch/powerpc/kernel/head_8xx.S     | 45 ++++++++++++++++++------------
 arch/powerpc/mm/8xx_mmu.c          |  2 +-
 3 files changed, 34 insertions(+), 47 deletions(-)

diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h
index 4f547752ae795..193f53116c7ae 100644
--- a/arch/powerpc/include/asm/mmu-8xx.h
+++ b/arch/powerpc/include/asm/mmu-8xx.h
@@ -34,20 +34,12 @@
  * respectively NA for All or X for Supervisor and no access for User.
  * Then we use the APG to say whether accesses are according to Page rules or
  * "all Supervisor" rules (Access to all)
- * We also use the 2nd APG bit for _PAGE_ACCESSED when having SWAP:
- * When that bit is not set access is done iaw "all user"
- * which means no access iaw page rules.
- * Therefore, we define 4 APG groups. lsb is _PMD_USER, 2nd is _PAGE_ACCESSED
- * 0x => No access => 11 (all accesses performed as user iaw page definition)
- * 10 => No user => 01 (all accesses performed according to page definition)
- * 11 => User => 00 (all accesses performed as supervisor iaw page definition)
+ * Therefore, we define 2 APG groups. lsb is _PMD_USER
+ * 0 => No user => 01 (all accesses performed according to page definition)
+ * 1 => User => 00 (all accesses performed as supervisor iaw page definition)
  * We define all 16 groups so that all other bits of APG can take any value
  */
-#ifdef CONFIG_SWAP
-#define MI_APG_INIT	0xf4f4f4f4
-#else
 #define MI_APG_INIT	0x44444444
-#endif
 
 /* The effective page number register.  When read, contains the information
  * about the last instruction TLB miss.  When MI_RPN is written, bits in
@@ -115,20 +107,12 @@
  * Supervisor and no access for user and NA for ALL.
  * Then we use the APG to say whether accesses are according to Page rules or
  * "all Supervisor" rules (Access to all)
- * We also use the 2nd APG bit for _PAGE_ACCESSED when having SWAP:
- * When that bit is not set access is done iaw "all user"
- * which means no access iaw page rules.
- * Therefore, we define 4 APG groups. lsb is _PMD_USER, 2nd is _PAGE_ACCESSED
- * 0x => No access => 11 (all accesses performed as user iaw page definition)
- * 10 => No user => 01 (all accesses performed according to page definition)
- * 11 => User => 00 (all accesses performed as supervisor iaw page definition)
+ * Therefore, we define 2 APG groups. lsb is _PMD_USER
+ * 0 => No user => 01 (all accesses performed according to page definition)
+ * 1 => User => 00 (all accesses performed as supervisor iaw page definition)
  * We define all 16 groups so that all other bits of APG can take any value
  */
-#ifdef CONFIG_SWAP
-#define MD_APG_INIT	0xf4f4f4f4
-#else
 #define MD_APG_INIT	0x44444444
-#endif
 
 /* The effective page number register.  When read, contains the information
  * about the last instruction TLB miss.  When MD_RPN is written, bits in
@@ -180,12 +164,6 @@
  */
 #define SPRN_M_TW	799
 
-/* APGs */
-#define M_APG0		0x00000000
-#define M_APG1		0x00000020
-#define M_APG2		0x00000040
-#define M_APG3		0x00000060
-
 #ifdef CONFIG_PPC_MM_SLICES
 #include <asm/nohash/32/slice.h>
 #define SLICE_ARRAY_SIZE	(1 << (32 - SLICE_LOW_SHIFT - 1))
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 134a573a9f2d0..12c92a483fb11 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -353,13 +353,14 @@ _ENTRY(ITLBMiss_cmp)
 #if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
 	mtcr	r12
 #endif
-
-#ifdef CONFIG_SWAP
-	rlwinm	r11, r10, 31, _PAGE_ACCESSED >> 1
-#endif
 	/* Load the MI_TWC with the attributes for this "segment." */
 	mtspr	SPRN_MI_TWC, r11	/* Set segment attributes */
 
+#ifdef CONFIG_SWAP
+	rlwinm	r11, r10, 32-5, _PAGE_PRESENT
+	and	r11, r11, r10
+	rlwimi	r10, r11, 0, _PAGE_PRESENT
+#endif
 	li	r11, RPN_PATTERN | 0x200
 	/* The Linux PTE won't go exactly into the MMU TLB.
 	 * Software indicator bits 20 and 23 must be clear.
@@ -470,14 +471,22 @@ _ENTRY(DTLBMiss_jmp)
 	 * above.
 	 */
 	rlwimi	r11, r10, 0, _PAGE_GUARDED
-#ifdef CONFIG_SWAP
-	/* _PAGE_ACCESSED has to be set. We use second APG bit for that, 0
-	 * on that bit will represent a Non Access group
-	 */
-	rlwinm	r11, r10, 31, _PAGE_ACCESSED >> 1
-#endif
 	mtspr	SPRN_MD_TWC, r11
 
+	/* Both _PAGE_ACCESSED and _PAGE_PRESENT has to be set.
+	 * We also need to know if the insn is a load/store, so:
+	 * Clear _PAGE_PRESENT and load that which will
+	 * trap into DTLB Error with store bit set accordinly.
+	 */
+	/* PRESENT=0x1, ACCESSED=0x20
+	 * r11 = ((r10 & PRESENT) & ((r10 & ACCESSED) >> 5));
+	 * r10 = (r10 & ~PRESENT) | r11;
+	 */
+#ifdef CONFIG_SWAP
+	rlwinm	r11, r10, 32-5, _PAGE_PRESENT
+	and	r11, r11, r10
+	rlwimi	r10, r11, 0, _PAGE_PRESENT
+#endif
 	/* The Linux PTE won't go exactly into the MMU TLB.
 	 * Software indicator bits 24, 25, 26, and 27 must be
 	 * set.  All other Linux PTE bits control the behavior
@@ -637,8 +646,8 @@ InstructionBreakpoint:
  */
 DTLBMissIMMR:
 	mtcr	r12
-	/* Set 512k byte guarded page and mark it valid and accessed */
-	li	r10, MD_PS512K | MD_GUARDED | MD_SVALID | M_APG2
+	/* Set 512k byte guarded page and mark it valid */
+	li	r10, MD_PS512K | MD_GUARDED | MD_SVALID
 	mtspr	SPRN_MD_TWC, r10
 	mfspr	r10, SPRN_IMMR			/* Get current IMMR */
 	rlwinm	r10, r10, 0, 0xfff80000		/* Get 512 kbytes boundary */
@@ -656,8 +665,8 @@ _ENTRY(dtlb_miss_exit_2)
 
 DTLBMissLinear:
 	mtcr	r12
-	/* Set 8M byte page and mark it valid and accessed */
-	li	r11, MD_PS8MEG | MD_SVALID | M_APG2
+	/* Set 8M byte page and mark it valid */
+	li	r11, MD_PS8MEG | MD_SVALID
 	mtspr	SPRN_MD_TWC, r11
 	rlwinm	r10, r10, 0, 0x0f800000	/* 8xx supports max 256Mb RAM */
 	ori	r10, r10, 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY | \
@@ -675,8 +684,8 @@ _ENTRY(dtlb_miss_exit_3)
 #ifndef CONFIG_PIN_TLB_TEXT
 ITLBMissLinear:
 	mtcr	r12
-	/* Set 8M byte page and mark it valid,accessed */
-	li	r11, MI_PS8MEG | MI_SVALID | M_APG2
+	/* Set 8M byte page and mark it valid */
+	li	r11, MI_PS8MEG | MI_SVALID
 	mtspr	SPRN_MI_TWC, r11
 	rlwinm	r10, r10, 0, 0x0f800000	/* 8xx supports max 256Mb RAM */
 	ori	r10, r10, 0xf0 | MI_SPS16K | _PAGE_SH | _PAGE_DIRTY | \
@@ -960,7 +969,7 @@ initial_mmu:
 	ori	r8, r8, MI_EVALID	/* Mark it valid */
 	mtspr	SPRN_MI_EPN, r8
 	li	r8, MI_PS8MEG /* Set 8M byte page */
-	ori	r8, r8, MI_SVALID | M_APG2	/* Make it valid, APG 2 */
+	ori	r8, r8, MI_SVALID	/* Make it valid */
 	mtspr	SPRN_MI_TWC, r8
 	li	r8, MI_BOOTINIT		/* Create RPN for address 0 */
 	mtspr	SPRN_MI_RPN, r8		/* Store TLB entry */
@@ -987,7 +996,7 @@ initial_mmu:
 	ori	r8, r8, MD_EVALID	/* Mark it valid */
 	mtspr	SPRN_MD_EPN, r8
 	li	r8, MD_PS512K | MD_GUARDED	/* Set 512k byte page */
-	ori	r8, r8, MD_SVALID | M_APG2	/* Make it valid and accessed */
+	ori	r8, r8, MD_SVALID	/* Make it valid */
 	mtspr	SPRN_MD_TWC, r8
 	mr	r8, r9			/* Create paddr for TLB */
 	ori	r8, r8, MI_BOOTINIT|0x2 /* Inhibit cache -- Cort */
diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c
index 36484a2ef9158..fee599cf3bc32 100644
--- a/arch/powerpc/mm/8xx_mmu.c
+++ b/arch/powerpc/mm/8xx_mmu.c
@@ -79,7 +79,7 @@ void __init MMU_init_hw(void)
 	for (; i < 32 && mem >= LARGE_PAGE_SIZE_8M; i++) {
 		mtspr(SPRN_MD_CTR, ctr | (i << 8));
 		mtspr(SPRN_MD_EPN, (unsigned long)__va(addr) | MD_EVALID);
-		mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID | M_APG2);
+		mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID);
 		mtspr(SPRN_MD_RPN, addr | flags | _PAGE_PRESENT);
 		addr += LARGE_PAGE_SIZE_8M;
 		mem -= LARGE_PAGE_SIZE_8M;
-- 
GitLab


From 082e2869fc022de4db9977e06558da78384ea98c Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Fri, 19 Oct 2018 06:55:04 +0000
Subject: [PATCH 0238/1890] powerpc/code-patching: Add a helper to get the
 address of a patch_site

This patch adds a helper to get the address of a patch_site.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
[mpe: Call it "patch site" addr]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/code-patching.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h
index 31733a95bbd05..3d5acd2b113a2 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -36,6 +36,11 @@ int raw_patch_instruction(unsigned int *addr, unsigned int instr);
 int patch_instruction_site(s32 *addr, unsigned int instr);
 int patch_branch_site(s32 *site, unsigned long target, int flags);
 
+static inline unsigned long patch_site_addr(s32 *site)
+{
+	return (unsigned long)site + *site;
+}
+
 int instr_is_relative_branch(unsigned int instr);
 int instr_is_relative_link_branch(unsigned int instr);
 int instr_is_branch_to_addr(const unsigned int *instr, unsigned long addr);
-- 
GitLab


From 1a210878bf21de3f60646c13001d04bd4f57dfe1 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Fri, 19 Oct 2018 06:55:06 +0000
Subject: [PATCH 0239/1890] powerpc/8xx: Use patch_site for memory setup
 patching

The 8xx TLB miss routines are patched at startup at several places.

This patch uses the new patch_site functionality in order
to get a better code readability and avoid a label mess when
dumping the code with 'objdump -d'

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/mmu-8xx.h |  5 +++++
 arch/powerpc/kernel/head_8xx.S     | 19 +++++++++++--------
 arch/powerpc/mm/8xx_mmu.c          | 23 +++++++----------------
 3 files changed, 23 insertions(+), 24 deletions(-)

diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h
index 193f53116c7ae..3a15d6647d47e 100644
--- a/arch/powerpc/include/asm/mmu-8xx.h
+++ b/arch/powerpc/include/asm/mmu-8xx.h
@@ -229,6 +229,11 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
 	BUG();
 }
 
+/* patch sites */
+extern s32 patch__itlbmiss_linmem_top;
+extern s32 patch__dtlbmiss_linmem_top, patch__dtlbmiss_immr_jmp;
+extern s32 patch__fixupdar_linmem_top;
+
 #endif /* !__ASSEMBLY__ */
 
 #if defined(CONFIG_PPC_4K_PAGES)
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 12c92a483fb11..0425571a533dc 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -31,6 +31,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/ptrace.h>
 #include <asm/export.h>
+#include <asm/code-patching-asm.h>
 
 #if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000
 /* By simply checking Address >= 0x80000000, we know if its a kernel address */
@@ -318,8 +319,8 @@ InstructionTLBMiss:
 	cmpli	cr0, r11, PAGE_OFFSET@h
 #ifndef CONFIG_PIN_TLB_TEXT
 	/* It is assumed that kernel code fits into the first 8M page */
-_ENTRY(ITLBMiss_cmp)
-	cmpli	cr7, r11, (PAGE_OFFSET + 0x0800000)@h
+0:	cmpli	cr7, r11, (PAGE_OFFSET + 0x0800000)@h
+	patch_site	0b, patch__itlbmiss_linmem_top
 #endif
 #endif
 #endif
@@ -436,11 +437,11 @@ DataStoreTLBMiss:
 #ifndef CONFIG_PIN_TLB_IMMR
 	cmpli	cr0, r11, VIRT_IMMR_BASE@h
 #endif
-_ENTRY(DTLBMiss_cmp)
-	cmpli	cr7, r11, (PAGE_OFFSET + 0x1800000)@h
+0:	cmpli	cr7, r11, (PAGE_OFFSET + 0x1800000)@h
+	patch_site	0b, patch__dtlbmiss_linmem_top
 #ifndef CONFIG_PIN_TLB_IMMR
-_ENTRY(DTLBMiss_jmp)
-	beq-	DTLBMissIMMR
+0:	beq-	DTLBMissIMMR
+	patch_site	0b, patch__dtlbmiss_immr_jmp
 #endif
 	blt	cr7, DTLBMissLinear
 	lis	r11, (swapper_pg_dir-PAGE_OFFSET)@ha
@@ -714,8 +715,10 @@ FixupDAR:/* Entry point for dcbx workaround. */
 	mfspr	r11, SPRN_M_TW	/* Get level 1 table */
 	blt+	3f
 	rlwinm	r11, r10, 16, 0xfff8
-_ENTRY(FixupDAR_cmp)
-	cmpli	cr7, r11, (PAGE_OFFSET + 0x1800000)@h
+
+0:	cmpli	cr7, r11, (PAGE_OFFSET + 0x1800000)@h
+	patch_site	0b, patch__fixupdar_linmem_top
+
 	/* create physical page address from effective address */
 	tophys(r11, r10)
 	blt-	cr7, 201f
diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c
index fee599cf3bc32..ca01ece28ab37 100644
--- a/arch/powerpc/mm/8xx_mmu.c
+++ b/arch/powerpc/mm/8xx_mmu.c
@@ -97,22 +97,13 @@ static void __init mmu_mapin_immr(void)
 		map_kernel_page(v + offset, p + offset, PAGE_KERNEL_NCG);
 }
 
-/* Address of instructions to patch */
-#ifndef CONFIG_PIN_TLB_IMMR
-extern unsigned int DTLBMiss_jmp;
-#endif
-extern unsigned int DTLBMiss_cmp, FixupDAR_cmp;
-#ifndef CONFIG_PIN_TLB_TEXT
-extern unsigned int ITLBMiss_cmp;
-#endif
-
-static void __init mmu_patch_cmp_limit(unsigned int *addr, unsigned long mapped)
+static void __init mmu_patch_cmp_limit(s32 *site, unsigned long mapped)
 {
-	unsigned int instr = *addr;
+	unsigned int instr = *(unsigned int *)patch_site_addr(site);
 
 	instr &= 0xffff0000;
 	instr |= (unsigned long)__va(mapped) >> 16;
-	patch_instruction(addr, instr);
+	patch_instruction_site(site, instr);
 }
 
 unsigned long __init mmu_mapin_ram(unsigned long top)
@@ -123,17 +114,17 @@ unsigned long __init mmu_mapin_ram(unsigned long top)
 		mapped = 0;
 		mmu_mapin_immr();
 #ifndef CONFIG_PIN_TLB_IMMR
-		patch_instruction(&DTLBMiss_jmp, PPC_INST_NOP);
+		patch_instruction_site(&patch__dtlbmiss_immr_jmp, PPC_INST_NOP);
 #endif
 #ifndef CONFIG_PIN_TLB_TEXT
-		mmu_patch_cmp_limit(&ITLBMiss_cmp, 0);
+		mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, 0);
 #endif
 	} else {
 		mapped = top & ~(LARGE_PAGE_SIZE_8M - 1);
 	}
 
-	mmu_patch_cmp_limit(&DTLBMiss_cmp, mapped);
-	mmu_patch_cmp_limit(&FixupDAR_cmp, mapped);
+	mmu_patch_cmp_limit(&patch__dtlbmiss_linmem_top, mapped);
+	mmu_patch_cmp_limit(&patch__fixupdar_linmem_top, mapped);
 
 	/* If the size of RAM is not an exact power of two, we may not
 	 * have covered RAM in its entirety with 8 MiB
-- 
GitLab


From 709cf19c5749308603ffa12557d8bd152a926783 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Fri, 19 Oct 2018 06:55:08 +0000
Subject: [PATCH 0240/1890] powerpc/8xx: Use patch_site for perf counters setup

The 8xx TLB miss routines are patched when (de)activating
perf counters.

This patch uses the new patch_site functionality in order
to get a better code readability and avoid a label mess when
dumping the code with 'objdump -d'

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/mmu-8xx.h |  4 ++++
 arch/powerpc/kernel/head_8xx.S     | 33 +++++++++++++++++-------------
 arch/powerpc/perf/8xx-pmu.c        | 27 +++++++++++-------------
 3 files changed, 35 insertions(+), 29 deletions(-)

diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h
index 3a15d6647d47e..fa05aa566ece4 100644
--- a/arch/powerpc/include/asm/mmu-8xx.h
+++ b/arch/powerpc/include/asm/mmu-8xx.h
@@ -234,6 +234,10 @@ extern s32 patch__itlbmiss_linmem_top;
 extern s32 patch__dtlbmiss_linmem_top, patch__dtlbmiss_immr_jmp;
 extern s32 patch__fixupdar_linmem_top;
 
+extern s32 patch__itlbmiss_exit_1, patch__itlbmiss_exit_2;
+extern s32 patch__dtlbmiss_exit_1, patch__dtlbmiss_exit_2, patch__dtlbmiss_exit_3;
+extern s32 patch__itlbmiss_perf, patch__dtlbmiss_perf;
+
 #endif /* !__ASSEMBLY__ */
 
 #if defined(CONFIG_PPC_4K_PAGES)
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 0425571a533dc..3b67b9533c82f 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -374,16 +374,17 @@ InstructionTLBMiss:
 	mtspr	SPRN_MI_RPN, r10	/* Update TLB entry */
 
 	/* Restore registers */
-_ENTRY(itlb_miss_exit_1)
-	mfspr	r10, SPRN_SPRG_SCRATCH0
+0:	mfspr	r10, SPRN_SPRG_SCRATCH0
 	mfspr	r11, SPRN_SPRG_SCRATCH1
 #if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE)
 	mfspr	r12, SPRN_SPRG_SCRATCH2
 #endif
 	rfi
+	patch_site	0b, patch__itlbmiss_exit_1
+
 #ifdef CONFIG_PERF_EVENTS
-_ENTRY(itlb_miss_perf)
-	lis	r10, (itlb_miss_counter - PAGE_OFFSET)@ha
+	patch_site	0f, patch__itlbmiss_perf
+0:	lis	r10, (itlb_miss_counter - PAGE_OFFSET)@ha
 	lwz	r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10)
 	addi	r11, r11, 1
 	stw	r11, (itlb_miss_counter - PAGE_OFFSET)@l(r10)
@@ -499,14 +500,16 @@ DataStoreTLBMiss:
 
 	/* Restore registers */
 	mtspr	SPRN_DAR, r11	/* Tag DAR */
-_ENTRY(dtlb_miss_exit_1)
-	mfspr	r10, SPRN_SPRG_SCRATCH0
+
+0:	mfspr	r10, SPRN_SPRG_SCRATCH0
 	mfspr	r11, SPRN_SPRG_SCRATCH1
 	mfspr	r12, SPRN_SPRG_SCRATCH2
 	rfi
+	patch_site	0b, patch__dtlbmiss_exit_1
+
 #ifdef CONFIG_PERF_EVENTS
-_ENTRY(dtlb_miss_perf)
-	lis	r10, (dtlb_miss_counter - PAGE_OFFSET)@ha
+	patch_site	0f, patch__dtlbmiss_perf
+0:	lis	r10, (dtlb_miss_counter - PAGE_OFFSET)@ha
 	lwz	r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10)
 	addi	r11, r11, 1
 	stw	r11, (dtlb_miss_counter - PAGE_OFFSET)@l(r10)
@@ -658,11 +661,12 @@ DTLBMissIMMR:
 
 	li	r11, RPN_PATTERN
 	mtspr	SPRN_DAR, r11	/* Tag DAR */
-_ENTRY(dtlb_miss_exit_2)
-	mfspr	r10, SPRN_SPRG_SCRATCH0
+
+0:	mfspr	r10, SPRN_SPRG_SCRATCH0
 	mfspr	r11, SPRN_SPRG_SCRATCH1
 	mfspr	r12, SPRN_SPRG_SCRATCH2
 	rfi
+	patch_site	0b, patch__dtlbmiss_exit_2
 
 DTLBMissLinear:
 	mtcr	r12
@@ -676,11 +680,12 @@ DTLBMissLinear:
 
 	li	r11, RPN_PATTERN
 	mtspr	SPRN_DAR, r11	/* Tag DAR */
-_ENTRY(dtlb_miss_exit_3)
-	mfspr	r10, SPRN_SPRG_SCRATCH0
+
+0:	mfspr	r10, SPRN_SPRG_SCRATCH0
 	mfspr	r11, SPRN_SPRG_SCRATCH1
 	mfspr	r12, SPRN_SPRG_SCRATCH2
 	rfi
+	patch_site	0b, patch__dtlbmiss_exit_3
 
 #ifndef CONFIG_PIN_TLB_TEXT
 ITLBMissLinear:
@@ -693,11 +698,11 @@ ITLBMissLinear:
 			  _PAGE_PRESENT
 	mtspr	SPRN_MI_RPN, r10	/* Update TLB entry */
 
-_ENTRY(itlb_miss_exit_2)
-	mfspr	r10, SPRN_SPRG_SCRATCH0
+0:	mfspr	r10, SPRN_SPRG_SCRATCH0
 	mfspr	r11, SPRN_SPRG_SCRATCH1
 	mfspr	r12, SPRN_SPRG_SCRATCH2
 	rfi
+	patch_site	0b, patch__itlbmiss_exit_2
 #endif
 
 /* This is the procedure to calculate the data EA for buggy dcbx,dcbi instructions
diff --git a/arch/powerpc/perf/8xx-pmu.c b/arch/powerpc/perf/8xx-pmu.c
index 6c0020d1c5614..e38f74e9e7a4a 100644
--- a/arch/powerpc/perf/8xx-pmu.c
+++ b/arch/powerpc/perf/8xx-pmu.c
@@ -31,9 +31,6 @@
 
 extern unsigned long itlb_miss_counter, dtlb_miss_counter;
 extern atomic_t instruction_counter;
-extern unsigned int itlb_miss_perf, dtlb_miss_perf;
-extern unsigned int itlb_miss_exit_1, itlb_miss_exit_2;
-extern unsigned int dtlb_miss_exit_1, dtlb_miss_exit_2, dtlb_miss_exit_3;
 
 static atomic_t insn_ctr_ref;
 static atomic_t itlb_miss_ref;
@@ -103,22 +100,22 @@ static int mpc8xx_pmu_add(struct perf_event *event, int flags)
 		break;
 	case PERF_8xx_ID_ITLB_LOAD_MISS:
 		if (atomic_inc_return(&itlb_miss_ref) == 1) {
-			unsigned long target = (unsigned long)&itlb_miss_perf;
+			unsigned long target = patch_site_addr(&patch__itlbmiss_perf);
 
-			patch_branch(&itlb_miss_exit_1, target, 0);
+			patch_branch_site(&patch__itlbmiss_exit_1, target, 0);
 #ifndef CONFIG_PIN_TLB_TEXT
-			patch_branch(&itlb_miss_exit_2, target, 0);
+			patch_branch_site(&patch__itlbmiss_exit_2, target, 0);
 #endif
 		}
 		val = itlb_miss_counter;
 		break;
 	case PERF_8xx_ID_DTLB_LOAD_MISS:
 		if (atomic_inc_return(&dtlb_miss_ref) == 1) {
-			unsigned long target = (unsigned long)&dtlb_miss_perf;
+			unsigned long target = patch_site_addr(&patch__dtlbmiss_perf);
 
-			patch_branch(&dtlb_miss_exit_1, target, 0);
-			patch_branch(&dtlb_miss_exit_2, target, 0);
-			patch_branch(&dtlb_miss_exit_3, target, 0);
+			patch_branch_site(&patch__dtlbmiss_exit_1, target, 0);
+			patch_branch_site(&patch__dtlbmiss_exit_2, target, 0);
+			patch_branch_site(&patch__dtlbmiss_exit_3, target, 0);
 		}
 		val = dtlb_miss_counter;
 		break;
@@ -180,17 +177,17 @@ static void mpc8xx_pmu_del(struct perf_event *event, int flags)
 		break;
 	case PERF_8xx_ID_ITLB_LOAD_MISS:
 		if (atomic_dec_return(&itlb_miss_ref) == 0) {
-			patch_instruction(&itlb_miss_exit_1, insn);
+			patch_instruction_site(&patch__itlbmiss_exit_1, insn);
 #ifndef CONFIG_PIN_TLB_TEXT
-			patch_instruction(&itlb_miss_exit_2, insn);
+			patch_instruction_site(&patch__itlbmiss_exit_2, insn);
 #endif
 		}
 		break;
 	case PERF_8xx_ID_DTLB_LOAD_MISS:
 		if (atomic_dec_return(&dtlb_miss_ref) == 0) {
-			patch_instruction(&dtlb_miss_exit_1, insn);
-			patch_instruction(&dtlb_miss_exit_2, insn);
-			patch_instruction(&dtlb_miss_exit_3, insn);
+			patch_instruction_site(&patch__dtlbmiss_exit_1, insn);
+			patch_instruction_site(&patch__dtlbmiss_exit_2, insn);
+			patch_instruction_site(&patch__dtlbmiss_exit_3, insn);
 		}
 		break;
 	}
-- 
GitLab


From 772b039fd9a7e12d5fc80e6f649341934ad51fbe Mon Sep 17 00:00:00 2001
From: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
Date: Wed, 10 Oct 2018 15:52:59 +0530
Subject: [PATCH 0241/1890] powerpc/pseries: Export maximum memory value

This patch exports the maximum possible amount of memory
configured on the system via /proc/powerpc/lparcfg.

Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/pseries/lparcfg.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c
index 8bd590af488a1..794487313cc8d 100644
--- a/arch/powerpc/platforms/pseries/lparcfg.c
+++ b/arch/powerpc/platforms/pseries/lparcfg.c
@@ -26,6 +26,7 @@
 #include <linux/seq_file.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
+#include <linux/hugetlb.h>
 #include <asm/lppaca.h>
 #include <asm/hvcall.h>
 #include <asm/firmware.h>
@@ -36,6 +37,7 @@
 #include <asm/vio.h>
 #include <asm/mmu.h>
 #include <asm/machdep.h>
+#include <asm/drmem.h>
 
 #include "pseries.h"
 
@@ -433,6 +435,16 @@ static void parse_em_data(struct seq_file *m)
 		seq_printf(m, "power_mode_data=%016lx\n", retbuf[0]);
 }
 
+static void maxmem_data(struct seq_file *m)
+{
+	unsigned long maxmem = 0;
+
+	maxmem += drmem_info->n_lmbs * drmem_info->lmb_size;
+	maxmem += hugetlb_total_pages() * PAGE_SIZE;
+
+	seq_printf(m, "MaxMem=%ld\n", maxmem);
+}
+
 static int pseries_lparcfg_data(struct seq_file *m, void *v)
 {
 	int partition_potential_processors;
@@ -491,6 +503,7 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
 	seq_printf(m, "slb_size=%d\n", mmu_slb_size);
 #endif
 	parse_em_data(m);
+	maxmem_data(m);
 
 	return 0;
 }
-- 
GitLab


From c43befca86ae35cc82bd889484bd179bff27b6c6 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Sat, 20 Oct 2018 20:54:55 +1100
Subject: [PATCH 0242/1890] KVM: PPC: Use exported tb_to_ns() function in
 decrementer emulation

This changes the KVM code that emulates the decrementer function to do
the conversion of decrementer values to time intervals in nanoseconds
by calling the tb_to_ns() function exported by the powerpc timer code,
in preference to open-coded arithmetic using values from the
decrementer_clockevent struct.  Similarly, the HV-KVM code that did
the same conversion using arithmetic on tb_ticks_per_sec also now
uses tb_to_ns().

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kvm/book3s_hv.c | 3 +--
 arch/powerpc/kvm/emulate.c   | 7 +++----
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 3e3a71594e631..a6d948b6425bd 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2160,8 +2160,7 @@ static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
 		kvmppc_core_prepare_to_enter(vcpu);
 		return;
 	}
-	dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC
-		   / tb_ticks_per_sec;
+	dec_nsec = tb_to_ns(vcpu->arch.dec_expires - now);
 	hrtimer_start(&vcpu->arch.dec_timer, dec_nsec, HRTIMER_MODE_REL);
 	vcpu->arch.timer_running = 1;
 }
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index fa888bfc347e6..9f5b8c01c4e16 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -61,11 +61,10 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
 
 	dec_time = vcpu->arch.dec;
 	/*
-	 * Guest timebase ticks at the same frequency as host decrementer.
-	 * So use the host decrementer calculations for decrementer emulation.
+	 * Guest timebase ticks at the same frequency as host timebase.
+	 * So use the host timebase calculations for decrementer emulation.
 	 */
-	dec_time = dec_time << decrementer_clockevent.shift;
-	do_div(dec_time, decrementer_clockevent.mult);
+	dec_time = tb_to_ns(dec_time);
 	dec_nsec = do_div(dec_time, NSEC_PER_SEC);
 	hrtimer_start(&vcpu->arch.dec_timer,
 		ktime_set(dec_time, dec_nsec), HRTIMER_MODE_REL);
-- 
GitLab


From 48dc0ef19044bfb69193302fbe3a834e3331b7ae Mon Sep 17 00:00:00 2001
From: Breno Leitao <leitao@debian.org>
Date: Mon, 22 Oct 2018 18:16:26 -0300
Subject: [PATCH 0243/1890] selftests/powerpc: Fix ptrace tm failure

Test ptrace-tm-spd-gpr fails on current kernel (4.19) due to a segmentation
fault that happens on the child process prior to setting cptr[2] = 1. This
causes the parent process to wait forever at 'while (!pptr[2])' and the test to
be killed by the test harness framework by timeout, thus, failing.

The segmentation fault happens because of a inline assembly being
generated as:

	0x10000355c <tm_spd_gpr+492>    lfs    f0, 0(0)

This is reading memory position 0x0 and causing the segmentation fault.

This code is being generated by ASM_LOAD_FPR_SINGLE_PRECISION(flt_4), where
flt_4 is passed to the inline assembly block as:

	[flt_4] "r" (&d)

Since the inline assembly 'r' constraint means any GPR, gpr0 is being
chosen, thus causing this issue when issuing a Load Floating-Point Single
instruction.

This patch simply changes the constraint to 'b', which specify that this
register will be used as base, and r0 is not allowed to be used, avoiding
this issue.

Other than that, removing flt_2 register from the input operands, since it
is not used by the inline assembly code at all.

Cc: stable@vger.kernel.org
Signed-off-by: Breno Leitao <leitao@debian.org>
Acked-by: Segher Boessenkool <segher@kernel.crashing.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c
index 327fa943c7f36..dbdffa2e2c824 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c
@@ -67,8 +67,8 @@ void tm_spd_gpr(void)
 		"3: ;"
 		: [res] "=r" (result), [texasr] "=r" (texasr)
 		: [gpr_1]"i"(GPR_1), [gpr_2]"i"(GPR_2), [gpr_4]"i"(GPR_4),
-		[sprn_texasr] "i" (SPRN_TEXASR), [flt_1] "r" (&a),
-		[flt_2] "r" (&b), [flt_4] "r" (&d)
+		[sprn_texasr] "i" (SPRN_TEXASR), [flt_1] "b" (&a),
+		[flt_4] "b" (&d)
 		: "memory", "r5", "r6", "r7",
 		"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
 		"r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
-- 
GitLab


From 8dce6b2215eaa91dbf04463e11098a48748da5ab Mon Sep 17 00:00:00 2001
From: Tyrel Datwyler <tyreld@linux.vnet.ibm.com>
Date: Mon, 22 Oct 2018 20:32:12 -0500
Subject: [PATCH 0244/1890] powerpc/pseries: add missing cpumask.h include file

Build error is encountered when inlcuding <asm/rtas.h> if no explicit or
implicit include of cpumask.h exists in the including file.

In file included from arch/powerpc/platforms/pseries/hotplug-pci.c:3:0:
./arch/powerpc/include/asm/rtas.h:360:34: error: unknown type name 'cpumask_var_t'
 extern int rtas_online_cpus_mask(cpumask_var_t cpus);
                                  ^
./arch/powerpc/include/asm/rtas.h:361:35: error: unknown type name 'cpumask_var_t'
 extern int rtas_offline_cpus_mask(cpumask_var_t cpus);

Fixes: 120496ac2d2d ("powerpc: Bring all threads online prior to migration/hibernation")
Signed-off-by: Tyrel Datwyler <tyreld@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/rtas.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index bb38dd67d47dd..1b06add4f092a 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -5,6 +5,7 @@
 #include <linux/spinlock.h>
 #include <asm/page.h>
 #include <linux/time.h>
+#include <linux/cpumask.h>
 
 /*
  * Definitions for talking to the RTAS on CHRP machines.
-- 
GitLab


From e901378578c62202594cba0f6c076f3df365ec91 Mon Sep 17 00:00:00 2001
From: Felipe Rechia <felipe.rechia@datacom.com.br>
Date: Wed, 24 Oct 2018 10:57:22 -0300
Subject: [PATCH 0245/1890] powerpc/process: Fix flush_all_to_thread for SPE

Fix a bug introduced by the creation of flush_all_to_thread() for
processors that have SPE (Signal Processing Engine) and use it to
compute floating-point operations.

>From userspace perspective, the problem was seen in attempts of
computing floating-point operations which should generate exceptions.
For example:

  fork();
  float x = 0.0 / 0.0;
  isnan(x);           // forked process returns False (should be True)

The operation above also should always cause the SPEFSCR FINV bit to
be set. However, the SPE floating-point exceptions were turned off
after a fork().

Kernel versions prior to the bug used flush_spe_to_thread(), which
first saves SPEFSCR register values in tsk->thread and then calls
giveup_spe(tsk).

After commit 579e633e764e, the save_all() function was called first
to giveup_spe(), and then the SPEFSCR register values were saved in
tsk->thread. This would save the SPEFSCR register values after
disabling SPE for that thread, causing the bug described above.

Fixes 579e633e764e ("powerpc: create flush_all_to_thread()")
Signed-off-by: Felipe Rechia <felipe.rechia@datacom.com.br>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/process.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 7ad304a3cc7d4..bcb36229d4fdc 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -590,12 +590,11 @@ void flush_all_to_thread(struct task_struct *tsk)
 	if (tsk->thread.regs) {
 		preempt_disable();
 		BUG_ON(tsk != current);
-		save_all(tsk);
-
 #ifdef CONFIG_SPE
 		if (tsk->thread.regs->msr & MSR_SPE)
 			tsk->thread.spefscr = mfspr(SPRN_SPEFSCR);
 #endif
+		save_all(tsk);
 
 		preempt_enable();
 	}
-- 
GitLab


From 4faaaa762328cfb8579b9c908999ca189e2ea474 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Wed, 24 Oct 2018 16:25:23 +0200
Subject: [PATCH 0246/1890] drm/amdgpu: fix VM leaf walking
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Make sure we don't try to go down further after the leave walk already
ended. This fixes a crash with a new VM test.

Signed-off-by: Christian KÃ¶nig <christian.koenig@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Tested-by:  Rex Zhu Rex.Zhu@amd.com
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index db0cbf8d219d7..352b304090602 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -542,7 +542,8 @@ static void amdgpu_vm_pt_next_leaf(struct amdgpu_device *adev,
 				   struct amdgpu_vm_pt_cursor *cursor)
 {
 	amdgpu_vm_pt_next(adev, cursor);
-	while (amdgpu_vm_pt_descendant(adev, cursor));
+	if (cursor->pfn != ~0ll)
+		while (amdgpu_vm_pt_descendant(adev, cursor));
 }
 
 /**
-- 
GitLab


From 31e3aad62ab8039581d621403b7905aa19fb30af Mon Sep 17 00:00:00 2001
From: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Date: Thu, 25 Oct 2018 15:47:02 -0400
Subject: [PATCH 0247/1890] drm/amdgpu: Fix compute ring 1.0.0 failure after
 reset
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Problem: After GPU reset on dGPUs with gfx8 compute ring
1.0.0 fails to pass the ring test. Ring registers inspection
shows that it's active and no hang is observed (rptr == wptr)
No significant diffs were observed between CP_HQD* registers
for the ring in good and bad shape.

Fix: No clear reason why but reversing the order of ring tests
fixes the problem.

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Acked-by: Christian KÃ¶nig <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 3d0f277a6523f..617b0c8908a37 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -4815,8 +4815,10 @@ static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
 	if (r)
 		goto done;
 
-	/* Test KCQs */
-	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+	/* Test KCQs - reversing the order of rings seems to fix ring test failure
+	 * after GPU reset
+	 */
+	for (i = adev->gfx.num_compute_rings - 1; i >= 0; i--) {
 		ring = &adev->gfx.compute_ring[i];
 		ring->ready = true;
 		r = amdgpu_ring_test_ring(ring);
-- 
GitLab


From cb899a615be6c42e186e0e71862ebbd5a5c15533 Mon Sep 17 00:00:00 2001
From: Shirish S <shirish.s@amd.com>
Date: Fri, 26 Oct 2018 02:38:58 +0530
Subject: [PATCH 0248/1890] drm/amdgpu: fix reporting of failed msg sent to SMU
 (v2)

Currently send_msg_to_smc_async() only report which message
failed, but the actual failing message is the previous one,
which SMU is unable to service.

This patch reads the contents of register where the SMU is stuck
and report appropriately.

v2: fix the build (Alex)

Signed-off-by: Shirish S <shirish.s@amd.com>
Reviewed-by: Rex Zhu <Rex.Zhu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/smumgr/smu8_smumgr.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smu8_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smu8_smumgr.c
index f836d30fdd442..09b844ec3eaba 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/smu8_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/smu8_smumgr.c
@@ -71,7 +71,11 @@ static int smu8_send_msg_to_smc_async(struct pp_hwmgr *hwmgr, uint16_t msg)
 	result = PHM_WAIT_FIELD_UNEQUAL(hwmgr,
 					SMU_MP1_SRBM2P_RESP_0, CONTENT, 0);
 	if (result != 0) {
+		/* Read the last message to SMU, to report actual cause */
+		uint32_t val = cgs_read_register(hwmgr->device,
+						 mmSMU_MP1_SRBM2P_MSG_0);
 		pr_err("smu8_send_msg_to_smc_async (0x%04x) failed\n", msg);
+		pr_err("SMU still servicing msg (0x%04x)\n", val);
 		return result;
 	}
 
-- 
GitLab


From babf4770be0adc69e6d2de150f4040f175e24beb Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Wed, 10 Oct 2018 19:10:06 +0300
Subject: [PATCH 0249/1890] ovl: fix error handling in ovl_verify_set_fh()

We hit a BUG on kfree of an ERR_PTR()...

Reported-by: syzbot+ff03fe05c717b82502d0@syzkaller.appspotmail.com
Fixes: 8b88a2e64036 ("ovl: verify upper root dir matches lower root dir")
Cc: <stable@vger.kernel.org> # v4.13
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/namei.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 9c0ca6a7becfb..efd372312ef10 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -422,8 +422,10 @@ int ovl_verify_set_fh(struct dentry *dentry, const char *name,
 
 	fh = ovl_encode_real_fh(real, is_upper);
 	err = PTR_ERR(fh);
-	if (IS_ERR(fh))
+	if (IS_ERR(fh)) {
+		fh = NULL;
 		goto fail;
+	}
 
 	err = ovl_verify_fh(dentry, name, fh);
 	if (set && err == -ENODATA)
-- 
GitLab


From 1f244dc5213960f76e7463bbb5719c331045bbc9 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Fri, 26 Oct 2018 23:34:39 +0200
Subject: [PATCH 0250/1890] ovl: clean up error handling in ovl_get_tmpfile()

If security_inode_copy_up() fails, it should not set new_creds, so no need
for the cleanup (which would've Oops-ed anyway, due to old_creds being
NULL).

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/copy_up.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 1cc797a08a5b5..989782a0c0c97 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -477,9 +477,8 @@ static struct dentry *ovl_get_tmpfile(struct ovl_copy_up_ctx *c)
 	};
 
 	err = security_inode_copy_up(c->dentry, &new_creds);
-	temp = ERR_PTR(err);
 	if (err < 0)
-		goto out;
+		return ERR_PTR(err);
 
 	if (new_creds)
 		old_creds = override_creds(new_creds);
@@ -488,7 +487,7 @@ static struct dentry *ovl_get_tmpfile(struct ovl_copy_up_ctx *c)
 		temp = ovl_do_tmpfile(c->workdir, c->stat.mode);
 	else
 		temp = ovl_create_temp(c->workdir, &cattr);
-out:
+
 	if (new_creds) {
 		revert_creds(old_creds);
 		put_cred(new_creds);
-- 
GitLab


From 8f97d1e99149a7f1aa19e47a51b09764382a482e Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 11 Oct 2018 17:38:14 +0300
Subject: [PATCH 0251/1890] vfs: fix FIGETBSZ ioctl on an overlayfs file

Some anon_bdev filesystems (e.g. overlayfs, ceph) don't have s_blocksize
set. Returning zero from FIGETBSZ ioctl results in a Floating point
exception from the e2fsprogs utility filefrag, which divides the size of
the file with the value returned by FIGETBSZ.

Fix the interface by returning -EINVAL for these filesystems.

Fixes: d1d04ef8572b ("ovl: stack file ops")
Cc: <stable@vger.kernel.org> # v4.19
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/ioctl.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/ioctl.c b/fs/ioctl.c
index 2005529af5608..0400297c8d72d 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -669,6 +669,9 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
 		return ioctl_fiemap(filp, arg);
 
 	case FIGETBSZ:
+		/* anon_bdev filesystems may not have a block size */
+		if (!inode->i_sb->s_blocksize)
+			return -EINVAL;
 		return put_user(inode->i_sb->s_blocksize, argp);
 
 	case FICLONE:
-- 
GitLab


From 6cd078702f2f33cb6b19a682de3e9184112f1a46 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 18 Oct 2018 09:45:49 +0300
Subject: [PATCH 0252/1890] ovl: fix recursive oi->lock in ovl_link()

linking a non-copied-up file into a non-copied-up parent results in a
nested call to mutex_lock_interruptible(&oi->lock). Fix this by copying up
target parent before ovl_nlink_start(), same as done in ovl_rename().

~/unionmount-testsuite$ ./run --ov -s
~/unionmount-testsuite$ ln /mnt/a/foo100 /mnt/a/dir100/

 WARNING: possible recursive locking detected
 --------------------------------------------
 ln/1545 is trying to acquire lock:
 00000000bcce7c4c (&ovl_i_lock_key[depth]){+.+.}, at:
     ovl_copy_up_start+0x28/0x7d
 but task is already holding lock:
 0000000026d73d5b (&ovl_i_lock_key[depth]){+.+.}, at:
     ovl_nlink_start+0x3c/0xc1

[SzM: this seems to be a false positive, but doing the copy-up first is
harmless and removes the lockdep splat]

Reported-by: syzbot+3ef5c0d1a5cb0b21e6be@syzkaller.appspotmail.com
Fixes: 5f8415d6b87e ("ovl: persistent overlay inode nlink for...")
Cc: <stable@vger.kernel.org> # v4.13
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/dir.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 276914ae3c60a..e1a55ecb7aba7 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -663,6 +663,10 @@ static int ovl_link(struct dentry *old, struct inode *newdir,
 	if (err)
 		goto out_drop_write;
 
+	err = ovl_copy_up(new->d_parent);
+	if (err)
+		goto out_drop_write;
+
 	if (ovl_is_metacopy_dentry(old)) {
 		err = ovl_set_redirect(old, false);
 		if (err)
-- 
GitLab


From 007ea44892e6fa963a0876a979e34890325c64eb Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Fri, 26 Oct 2018 23:34:39 +0200
Subject: [PATCH 0253/1890] ovl: relax permission checking on underlying layers

Make permission checking more consistent:

 - special files don't need any access check on underling fs

 - exec permission check doesn't need to be performed on underlying fs

Reported-by: "J. Bruce Fields" <bfields@fieldses.org>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/inode.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 3b7ed5d2279c6..6bcc9dedc342c 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -286,13 +286,22 @@ int ovl_permission(struct inode *inode, int mask)
 	if (err)
 		return err;
 
-	old_cred = ovl_override_creds(inode->i_sb);
-	if (!upperinode &&
-	    !special_file(realinode->i_mode) && mask & MAY_WRITE) {
+	/* No need to do any access on underlying for special files */
+	if (special_file(realinode->i_mode))
+		return 0;
+
+	/* No need to access underlying for execute */
+	mask &= ~MAY_EXEC;
+	if ((mask & (MAY_READ | MAY_WRITE)) == 0)
+		return 0;
+
+	/* Lower files get copied up, so turn write access into read */
+	if (!upperinode && mask & MAY_WRITE) {
 		mask &= ~(MAY_WRITE | MAY_APPEND);
-		/* Make sure mounter can read file for copy up later */
 		mask |= MAY_READ;
 	}
+
+	old_cred = ovl_override_creds(inode->i_sb);
 	err = inode_permission(realinode, mask);
 	revert_creds(old_cred);
 
-- 
GitLab


From b10cdcdc2012b2c199077a0a648e8a7067e573bf Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Mon, 8 Oct 2018 07:25:23 +0300
Subject: [PATCH 0254/1890] ovl: untangle copy up call chain

In an attempt to dedup ~100 LOC, we ended up creating a tangled call chain,
whose branches merge and diverge in several points according to the
immutable c->tmpfile copy up mode.

This call chain was hard to analyse for locking correctness because the
locking requirements for the c->tmpfile flow were very different from the
locking requirements for the !c->tmpfile flow (i.e. directory vs.  regulare
file copy up).

Split the copy up helpers of the c->tmpfile flow from those of the
!c->tmpfile (i.e. workdir) flow and remove the c->tmpfile mode from copy up
context.

Suggested-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/copy_up.c | 245 ++++++++++++++++++++++++++---------------
 1 file changed, 159 insertions(+), 86 deletions(-)

diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 989782a0c0c97..618cffc14f914 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -395,7 +395,6 @@ struct ovl_copy_up_ctx {
 	struct dentry *destdir;
 	struct qstr destname;
 	struct dentry *workdir;
-	bool tmpfile;
 	bool origin;
 	bool indexed;
 	bool metacopy;
@@ -440,62 +439,6 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c)
 	return err;
 }
 
-static int ovl_install_temp(struct ovl_copy_up_ctx *c, struct dentry *temp,
-			    struct dentry **newdentry)
-{
-	int err;
-	struct dentry *upper;
-	struct inode *udir = d_inode(c->destdir);
-
-	upper = lookup_one_len(c->destname.name, c->destdir, c->destname.len);
-	if (IS_ERR(upper))
-		return PTR_ERR(upper);
-
-	if (c->tmpfile)
-		err = ovl_do_link(temp, udir, upper);
-	else
-		err = ovl_do_rename(d_inode(c->workdir), temp, udir, upper, 0);
-
-	if (!err)
-		*newdentry = dget(c->tmpfile ? upper : temp);
-	dput(upper);
-
-	return err;
-}
-
-static struct dentry *ovl_get_tmpfile(struct ovl_copy_up_ctx *c)
-{
-	int err;
-	struct dentry *temp;
-	const struct cred *old_creds = NULL;
-	struct cred *new_creds = NULL;
-	struct ovl_cattr cattr = {
-		/* Can't properly set mode on creation because of the umask */
-		.mode = c->stat.mode & S_IFMT,
-		.rdev = c->stat.rdev,
-		.link = c->link
-	};
-
-	err = security_inode_copy_up(c->dentry, &new_creds);
-	if (err < 0)
-		return ERR_PTR(err);
-
-	if (new_creds)
-		old_creds = override_creds(new_creds);
-
-	if (c->tmpfile)
-		temp = ovl_do_tmpfile(c->workdir, c->stat.mode);
-	else
-		temp = ovl_create_temp(c->workdir, &cattr);
-
-	if (new_creds) {
-		revert_creds(old_creds);
-		put_cred(new_creds);
-	}
-
-	return temp;
-}
-
 static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
 {
 	int err;
@@ -547,37 +490,94 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
 	return err;
 }
 
-static int ovl_copy_up_locked(struct ovl_copy_up_ctx *c)
+static struct dentry *ovl_get_workdir_temp(struct ovl_copy_up_ctx *c)
+{
+	int err;
+	struct dentry *temp;
+	const struct cred *old_creds = NULL;
+	struct cred *new_creds = NULL;
+	struct ovl_cattr cattr = {
+		/* Can't properly set mode on creation because of the umask */
+		.mode = c->stat.mode & S_IFMT,
+		.rdev = c->stat.rdev,
+		.link = c->link
+	};
+
+	err = security_inode_copy_up(c->dentry, &new_creds);
+	if (err < 0)
+		return ERR_PTR(err);
+
+	if (new_creds)
+		old_creds = override_creds(new_creds);
+
+	temp = ovl_create_temp(c->workdir, &cattr);
+
+	if (new_creds) {
+		revert_creds(old_creds);
+		put_cred(new_creds);
+	}
+
+	return temp;
+}
+
+/*
+ * Move temp file from workdir into place on upper dir.
+ * Used when copying up directories and when upper fs doesn't support O_TMPFILE.
+ *
+ * Caller must hold ovl_lock_rename_workdir().
+ */
+static int ovl_rename_temp(struct ovl_copy_up_ctx *c, struct dentry *temp,
+			   struct dentry **newdentry)
+{
+	int err;
+	struct dentry *upper;
+	struct inode *udir = d_inode(c->destdir);
+
+	upper = lookup_one_len(c->destname.name, c->destdir, c->destname.len);
+	if (IS_ERR(upper))
+		return PTR_ERR(upper);
+
+	err = ovl_do_rename(d_inode(c->workdir), temp, udir, upper, 0);
+	if (!err)
+		*newdentry = dget(temp);
+	dput(upper);
+
+	return err;
+}
+
+/*
+ * Copyup using workdir to prepare temp file.  Used when copying up directories,
+ * special files or when upper fs doesn't support O_TMPFILE.
+ */
+static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
 {
-	struct inode *udir = c->destdir->d_inode;
 	struct inode *inode;
 	struct dentry *newdentry = NULL;
 	struct dentry *temp;
 	int err;
 
-	temp = ovl_get_tmpfile(c);
+	err = ovl_lock_rename_workdir(c->workdir, c->destdir);
+	if (err)
+		return err;
+
+	temp = ovl_get_workdir_temp(c);
+	err = PTR_ERR(temp);
 	if (IS_ERR(temp))
-		return PTR_ERR(temp);
+		goto unlock;
 
 	err = ovl_copy_up_inode(c, temp);
 	if (err)
-		goto out;
+		goto cleanup;
 
 	if (S_ISDIR(c->stat.mode) && c->indexed) {
 		err = ovl_create_index(c->dentry, c->lowerpath.dentry, temp);
 		if (err)
-			goto out;
+			goto cleanup;
 	}
 
-	if (c->tmpfile) {
-		inode_lock_nested(udir, I_MUTEX_PARENT);
-		err = ovl_install_temp(c, temp, &newdentry);
-		inode_unlock(udir);
-	} else {
-		err = ovl_install_temp(c, temp, &newdentry);
-	}
+	err = ovl_rename_temp(c, temp, &newdentry);
 	if (err)
-		goto out;
+		goto cleanup;
 
 	if (!c->metacopy)
 		ovl_set_upperdata(d_inode(c->dentry));
@@ -585,13 +585,94 @@ static int ovl_copy_up_locked(struct ovl_copy_up_ctx *c)
 	ovl_inode_update(inode, newdentry);
 	if (S_ISDIR(inode->i_mode))
 		ovl_set_flag(OVL_WHITEOUTS, inode);
+out_dput:
+	dput(temp);
+unlock:
+	unlock_rename(c->workdir, c->destdir);
+
+	return err;
+
+cleanup:
+	ovl_cleanup(d_inode(c->workdir), temp);
+	goto out_dput;
+}
+
+static struct dentry *ovl_get_tmpfile(struct ovl_copy_up_ctx *c)
+{
+	int err;
+	struct dentry *temp;
+	const struct cred *old_creds = NULL;
+	struct cred *new_creds = NULL;
+
+	err = security_inode_copy_up(c->dentry, &new_creds);
+	if (err < 0)
+		return ERR_PTR(err);
+
+	if (new_creds)
+		old_creds = override_creds(new_creds);
+
+	temp = ovl_do_tmpfile(c->workdir, c->stat.mode);
+
+	if (new_creds) {
+		revert_creds(old_creds);
+		put_cred(new_creds);
+	}
+
+	return temp;
+}
+
+/* Link O_TMPFILE into place on upper dir */
+static int ovl_link_tmpfile(struct ovl_copy_up_ctx *c, struct dentry *temp,
+			    struct dentry **newdentry)
+{
+	int err;
+	struct dentry *upper;
+	struct inode *udir = d_inode(c->destdir);
+
+	inode_lock_nested(udir, I_MUTEX_PARENT);
+
+	upper = lookup_one_len(c->destname.name, c->destdir, c->destname.len);
+	err = PTR_ERR(upper);
+	if (IS_ERR(upper))
+		goto unlock;
+
+	err = ovl_do_link(temp, udir, upper);
+	if (!err)
+		*newdentry = dget(upper);
+	dput(upper);
+
+unlock:
+	inode_unlock(udir);
+
+	return err;
+}
+
+/* Copyup using O_TMPFILE which does not require cross dir locking */
+static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c)
+{
+	struct dentry *newdentry = NULL;
+	struct dentry *temp;
+	int err;
+
+	temp = ovl_get_tmpfile(c);
+	if (IS_ERR(temp))
+		return PTR_ERR(temp);
+
+	err = ovl_copy_up_inode(c, temp);
+	if (err)
+		goto out;
+
+	err = ovl_link_tmpfile(c, temp, &newdentry);
+	if (err)
+		goto out;
+
+	if (!c->metacopy)
+		ovl_set_upperdata(d_inode(c->dentry));
+	ovl_inode_update(d_inode(c->dentry), newdentry);
 
 out:
-	if (err && !c->tmpfile)
-		ovl_cleanup(d_inode(c->workdir), temp);
 	dput(temp);
 	return err;
-
 }
 
 /*
@@ -645,18 +726,10 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
 	}
 
 	/* Should we copyup with O_TMPFILE or with workdir? */
-	if (S_ISREG(c->stat.mode) && ofs->tmpfile) {
-		c->tmpfile = true;
-		err = ovl_copy_up_locked(c);
-	} else {
-		err = ovl_lock_rename_workdir(c->workdir, c->destdir);
-		if (!err) {
-			err = ovl_copy_up_locked(c);
-			unlock_rename(c->workdir, c->destdir);
-		}
-	}
-
-
+	if (S_ISREG(c->stat.mode) && ofs->tmpfile)
+		err = ovl_copy_up_tmpfile(c);
+	else
+		err = ovl_copy_up_workdir(c);
 	if (err)
 		goto out;
 
-- 
GitLab


From 6b52243f633eb5521e427bf78c85ccf646795f46 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Fri, 26 Oct 2018 23:34:39 +0200
Subject: [PATCH 0255/1890] ovl: fold copy-up helpers into callers

Now that the workdir and tmpfile copy up modes have been untagled, the
functions become simple enough that the helpers can be folded into the
callers.

Add new helpers where there is any duplication remaining: preparing creds
for creating the object.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/copy_up.c | 175 ++++++++++++++++-------------------------
 1 file changed, 67 insertions(+), 108 deletions(-)

diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 618cffc14f914..d6a3346e2672f 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -490,59 +490,32 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
 	return err;
 }
 
-static struct dentry *ovl_get_workdir_temp(struct ovl_copy_up_ctx *c)
+struct ovl_cu_creds {
+	const struct cred *old;
+	struct cred *new;
+};
+
+static int ovl_prep_cu_creds(struct dentry *dentry, struct ovl_cu_creds *cc)
 {
 	int err;
-	struct dentry *temp;
-	const struct cred *old_creds = NULL;
-	struct cred *new_creds = NULL;
-	struct ovl_cattr cattr = {
-		/* Can't properly set mode on creation because of the umask */
-		.mode = c->stat.mode & S_IFMT,
-		.rdev = c->stat.rdev,
-		.link = c->link
-	};
 
-	err = security_inode_copy_up(c->dentry, &new_creds);
+	cc->old = cc->new = NULL;
+	err = security_inode_copy_up(dentry, &cc->new);
 	if (err < 0)
-		return ERR_PTR(err);
-
-	if (new_creds)
-		old_creds = override_creds(new_creds);
-
-	temp = ovl_create_temp(c->workdir, &cattr);
+		return err;
 
-	if (new_creds) {
-		revert_creds(old_creds);
-		put_cred(new_creds);
-	}
+	if (cc->new)
+		cc->old = override_creds(cc->new);
 
-	return temp;
+	return 0;
 }
 
-/*
- * Move temp file from workdir into place on upper dir.
- * Used when copying up directories and when upper fs doesn't support O_TMPFILE.
- *
- * Caller must hold ovl_lock_rename_workdir().
- */
-static int ovl_rename_temp(struct ovl_copy_up_ctx *c, struct dentry *temp,
-			   struct dentry **newdentry)
+static void ovl_revert_cu_creds(struct ovl_cu_creds *cc)
 {
-	int err;
-	struct dentry *upper;
-	struct inode *udir = d_inode(c->destdir);
-
-	upper = lookup_one_len(c->destname.name, c->destdir, c->destname.len);
-	if (IS_ERR(upper))
-		return PTR_ERR(upper);
-
-	err = ovl_do_rename(d_inode(c->workdir), temp, udir, upper, 0);
-	if (!err)
-		*newdentry = dget(temp);
-	dput(upper);
-
-	return err;
+	if (cc->new) {
+		revert_creds(cc->old);
+		put_cred(cc->new);
+	}
 }
 
 /*
@@ -552,15 +525,28 @@ static int ovl_rename_temp(struct ovl_copy_up_ctx *c, struct dentry *temp,
 static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
 {
 	struct inode *inode;
-	struct dentry *newdentry = NULL;
-	struct dentry *temp;
+	struct inode *udir = d_inode(c->destdir), *wdir = d_inode(c->workdir);
+	struct dentry *temp, *upper;
+	struct ovl_cu_creds cc;
 	int err;
+	struct ovl_cattr cattr = {
+		/* Can't properly set mode on creation because of the umask */
+		.mode = c->stat.mode & S_IFMT,
+		.rdev = c->stat.rdev,
+		.link = c->link
+	};
 
 	err = ovl_lock_rename_workdir(c->workdir, c->destdir);
 	if (err)
 		return err;
 
-	temp = ovl_get_workdir_temp(c);
+	err = ovl_prep_cu_creds(c->dentry, &cc);
+	if (err)
+		goto unlock;
+
+	temp = ovl_create_temp(c->workdir, &cattr);
+	ovl_revert_cu_creds(&cc);
+
 	err = PTR_ERR(temp);
 	if (IS_ERR(temp))
 		goto unlock;
@@ -575,102 +561,75 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
 			goto cleanup;
 	}
 
-	err = ovl_rename_temp(c, temp, &newdentry);
+	upper = lookup_one_len(c->destname.name, c->destdir, c->destname.len);
+	err = PTR_ERR(upper);
+	if (IS_ERR(upper))
+		goto cleanup;
+
+	err = ovl_do_rename(wdir, temp, udir, upper, 0);
+	dput(upper);
 	if (err)
 		goto cleanup;
 
 	if (!c->metacopy)
 		ovl_set_upperdata(d_inode(c->dentry));
 	inode = d_inode(c->dentry);
-	ovl_inode_update(inode, newdentry);
+	ovl_inode_update(inode, temp);
 	if (S_ISDIR(inode->i_mode))
 		ovl_set_flag(OVL_WHITEOUTS, inode);
-out_dput:
-	dput(temp);
 unlock:
 	unlock_rename(c->workdir, c->destdir);
 
 	return err;
 
 cleanup:
-	ovl_cleanup(d_inode(c->workdir), temp);
-	goto out_dput;
+	ovl_cleanup(wdir, temp);
+	dput(temp);
+	goto unlock;
 }
 
-static struct dentry *ovl_get_tmpfile(struct ovl_copy_up_ctx *c)
+/* Copyup using O_TMPFILE which does not require cross dir locking */
+static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c)
 {
+	struct inode *udir = d_inode(c->destdir);
+	struct dentry *temp, *upper;
+	struct ovl_cu_creds cc;
 	int err;
-	struct dentry *temp;
-	const struct cred *old_creds = NULL;
-	struct cred *new_creds = NULL;
 
-	err = security_inode_copy_up(c->dentry, &new_creds);
-	if (err < 0)
-		return ERR_PTR(err);
-
-	if (new_creds)
-		old_creds = override_creds(new_creds);
+	err = ovl_prep_cu_creds(c->dentry, &cc);
+	if (err)
+		return err;
 
 	temp = ovl_do_tmpfile(c->workdir, c->stat.mode);
+	ovl_revert_cu_creds(&cc);
 
-	if (new_creds) {
-		revert_creds(old_creds);
-		put_cred(new_creds);
-	}
-
-	return temp;
-}
+	if (IS_ERR(temp))
+		return PTR_ERR(temp);
 
-/* Link O_TMPFILE into place on upper dir */
-static int ovl_link_tmpfile(struct ovl_copy_up_ctx *c, struct dentry *temp,
-			    struct dentry **newdentry)
-{
-	int err;
-	struct dentry *upper;
-	struct inode *udir = d_inode(c->destdir);
+	err = ovl_copy_up_inode(c, temp);
+	if (err)
+		goto out_dput;
 
 	inode_lock_nested(udir, I_MUTEX_PARENT);
 
 	upper = lookup_one_len(c->destname.name, c->destdir, c->destname.len);
 	err = PTR_ERR(upper);
-	if (IS_ERR(upper))
-		goto unlock;
-
-	err = ovl_do_link(temp, udir, upper);
-	if (!err)
-		*newdentry = dget(upper);
-	dput(upper);
-
-unlock:
+	if (!IS_ERR(upper)) {
+		err = ovl_do_link(temp, udir, upper);
+		dput(upper);
+	}
 	inode_unlock(udir);
 
-	return err;
-}
-
-/* Copyup using O_TMPFILE which does not require cross dir locking */
-static int ovl_copy_up_tmpfile(struct ovl_copy_up_ctx *c)
-{
-	struct dentry *newdentry = NULL;
-	struct dentry *temp;
-	int err;
-
-	temp = ovl_get_tmpfile(c);
-	if (IS_ERR(temp))
-		return PTR_ERR(temp);
-
-	err = ovl_copy_up_inode(c, temp);
-	if (err)
-		goto out;
-
-	err = ovl_link_tmpfile(c, temp, &newdentry);
 	if (err)
-		goto out;
+		goto out_dput;
 
 	if (!c->metacopy)
 		ovl_set_upperdata(d_inode(c->dentry));
-	ovl_inode_update(d_inode(c->dentry), newdentry);
+	ovl_inode_update(d_inode(c->dentry), temp);
 
-out:
+	return 0;
+
+out_dput:
 	dput(temp);
 	return err;
 }
-- 
GitLab


From 9df085f3c9a2d4658a9fe323d70c200aa00ede93 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Mon, 3 Sep 2018 09:12:09 +0300
Subject: [PATCH 0256/1890] ovl: relax requirement for non null uuid of lower
 fs

We use uuid to associate an overlay lower file handle with a lower layer,
so we can accept lower fs with null uuid as long as all lower layers with
null uuid are on the same fs.

This change allows enabling index and nfs_export features for the setup of
single lower fs of type squashfs - squashfs supports file handles, but has
a null uuid. This change also allows enabling index and nfs_export features
for nested overlayfs, where the lower overlay has nfs_export enabled.

Enabling the index feature with single lower squashfs fixes the
unionmount-testsuite test:
  ./run --ov --squashfs --verify

As a by-product, if, like the lower squashfs, upper fs also uses the
generic export_encode_fh() implementation to export 32bit inode file
handles (e.g. ext4), then the xino_auto config/module/mount option will
enable unique overlay inode numbers.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/super.c | 32 ++++++++++++++++++++++++++++++--
 fs/overlayfs/util.c  |  3 +--
 2 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 30adc9d408a0d..22ffb23ea44d9 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1175,9 +1175,29 @@ static int ovl_get_indexdir(struct ovl_fs *ofs, struct ovl_entry *oe,
 	return err;
 }
 
+static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid)
+{
+	unsigned int i;
+
+	if (!ofs->config.nfs_export && !(ofs->config.index && ofs->upper_mnt))
+		return true;
+
+	for (i = 0; i < ofs->numlowerfs; i++) {
+		/*
+		 * We use uuid to associate an overlay lower file handle with a
+		 * lower layer, so we can accept lower fs with null uuid as long
+		 * as all lower layers with null uuid are on the same fs.
+		 */
+		if (uuid_equal(&ofs->lower_fs[i].sb->s_uuid, uuid))
+			return false;
+	}
+	return true;
+}
+
 /* Get a unique fsid for the layer */
-static int ovl_get_fsid(struct ovl_fs *ofs, struct super_block *sb)
+static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path)
 {
+	struct super_block *sb = path->mnt->mnt_sb;
 	unsigned int i;
 	dev_t dev;
 	int err;
@@ -1191,6 +1211,14 @@ static int ovl_get_fsid(struct ovl_fs *ofs, struct super_block *sb)
 			return i + 1;
 	}
 
+	if (!ovl_lower_uuid_ok(ofs, &sb->s_uuid)) {
+		ofs->config.index = false;
+		ofs->config.nfs_export = false;
+		pr_warn("overlayfs: %s uuid detected in lower fs '%pd2', falling back to index=off,nfs_export=off.\n",
+			uuid_is_null(&sb->s_uuid) ? "null" : "conflicting",
+			path->dentry);
+	}
+
 	err = get_anon_bdev(&dev);
 	if (err) {
 		pr_err("overlayfs: failed to get anonymous bdev for lowerpath\n");
@@ -1225,7 +1253,7 @@ static int ovl_get_lower_layers(struct ovl_fs *ofs, struct path *stack,
 		struct vfsmount *mnt;
 		int fsid;
 
-		err = fsid = ovl_get_fsid(ofs, stack[i].mnt->mnt_sb);
+		err = fsid = ovl_get_fsid(ofs, &stack[i]);
 		if (err < 0)
 			goto out;
 
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index ace4fe4c39a93..8cd37baf5d0a4 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -65,8 +65,7 @@ struct super_block *ovl_same_sb(struct super_block *sb)
  */
 int ovl_can_decode_fh(struct super_block *sb)
 {
-	if (!sb->s_export_op || !sb->s_export_op->fh_to_dentry ||
-	    uuid_is_null(&sb->s_uuid))
+	if (!sb->s_export_op || !sb->s_export_op->fh_to_dentry)
 		return 0;
 
 	return sb->s_export_op->encode_fh ? -1 : FILEID_INO32_GEN;
-- 
GitLab


From 0e32992f7faccd50d1b00032de83e87a35fed247 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 18 Oct 2018 18:37:13 +0300
Subject: [PATCH 0257/1890] ovl: remove the 'locked' argument of
 ovl_nlink_{start,end}

It just makes the interface strange without adding any significant value.
The only case where locked is false and return value is 0 is in
ovl_rename() when new is negative, so handle that case explicitly in
ovl_rename().

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/dir.c       | 21 +++++++++++----------
 fs/overlayfs/overlayfs.h |  4 ++--
 fs/overlayfs/util.c      | 28 ++++++++++++----------------
 3 files changed, 25 insertions(+), 28 deletions(-)

diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index e1a55ecb7aba7..e03f39550ccf0 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -652,7 +652,6 @@ static int ovl_link(struct dentry *old, struct inode *newdir,
 		    struct dentry *new)
 {
 	int err;
-	bool locked = false;
 	struct inode *inode;
 
 	err = ovl_want_write(old);
@@ -673,7 +672,7 @@ static int ovl_link(struct dentry *old, struct inode *newdir,
 			goto out_drop_write;
 	}
 
-	err = ovl_nlink_start(old, &locked);
+	err = ovl_nlink_start(old);
 	if (err)
 		goto out_drop_write;
 
@@ -686,7 +685,7 @@ static int ovl_link(struct dentry *old, struct inode *newdir,
 	if (err)
 		iput(inode);
 
-	ovl_nlink_end(old, locked);
+	ovl_nlink_end(old);
 out_drop_write:
 	ovl_drop_write(old);
 out:
@@ -811,7 +810,6 @@ static bool ovl_pure_upper(struct dentry *dentry)
 static int ovl_do_remove(struct dentry *dentry, bool is_dir)
 {
 	int err;
-	bool locked = false;
 	const struct cred *old_cred;
 	struct dentry *upperdentry;
 	bool lower_positive = ovl_lower_positive(dentry);
@@ -832,7 +830,7 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
 	if (err)
 		goto out_drop_write;
 
-	err = ovl_nlink_start(dentry, &locked);
+	err = ovl_nlink_start(dentry);
 	if (err)
 		goto out_drop_write;
 
@@ -848,7 +846,7 @@ static int ovl_do_remove(struct dentry *dentry, bool is_dir)
 		else
 			drop_nlink(dentry->d_inode);
 	}
-	ovl_nlink_end(dentry, locked);
+	ovl_nlink_end(dentry);
 
 	/*
 	 * Copy ctime
@@ -1012,7 +1010,6 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
 		      unsigned int flags)
 {
 	int err;
-	bool locked = false;
 	struct dentry *old_upperdir;
 	struct dentry *new_upperdir;
 	struct dentry *olddentry;
@@ -1021,6 +1018,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
 	bool old_opaque;
 	bool new_opaque;
 	bool cleanup_whiteout = false;
+	bool update_nlink = false;
 	bool overwrite = !(flags & RENAME_EXCHANGE);
 	bool is_dir = d_is_dir(old);
 	bool new_is_dir = d_is_dir(new);
@@ -1078,10 +1076,12 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
 		err = ovl_copy_up(new);
 		if (err)
 			goto out_drop_write;
-	} else {
-		err = ovl_nlink_start(new, &locked);
+	} else if (d_inode(new)) {
+		err = ovl_nlink_start(new);
 		if (err)
 			goto out_drop_write;
+
+		update_nlink = true;
 	}
 
 	old_cred = ovl_override_creds(old->d_sb);
@@ -1210,7 +1210,8 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
 	unlock_rename(new_upperdir, old_upperdir);
 out_revert_creds:
 	revert_creds(old_cred);
-	ovl_nlink_end(new, locked);
+	if (update_nlink)
+		ovl_nlink_end(new);
 out_drop_write:
 	ovl_drop_write(old);
 out:
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index a3c0d95843121..b48ef22ef96bd 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -271,8 +271,8 @@ bool ovl_test_flag(unsigned long flag, struct inode *inode);
 bool ovl_inuse_trylock(struct dentry *dentry);
 void ovl_inuse_unlock(struct dentry *dentry);
 bool ovl_need_index(struct dentry *dentry);
-int ovl_nlink_start(struct dentry *dentry, bool *locked);
-void ovl_nlink_end(struct dentry *dentry, bool locked);
+int ovl_nlink_start(struct dentry *dentry);
+void ovl_nlink_end(struct dentry *dentry);
 int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir);
 int ovl_check_metacopy_xattr(struct dentry *dentry);
 bool ovl_is_metacopy_dentry(struct dentry *dentry);
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 8cd37baf5d0a4..d3a7860067298 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -738,14 +738,14 @@ static void ovl_cleanup_index(struct dentry *dentry)
  * Operations that change overlay inode and upper inode nlink need to be
  * synchronized with copy up for persistent nlink accounting.
  */
-int ovl_nlink_start(struct dentry *dentry, bool *locked)
+int ovl_nlink_start(struct dentry *dentry)
 {
 	struct ovl_inode *oi = OVL_I(d_inode(dentry));
 	const struct cred *old_cred;
 	int err;
 
-	if (!d_inode(dentry))
-		return 0;
+	if (WARN_ON(!d_inode(dentry)))
+		return -ENOENT;
 
 	/*
 	 * With inodes index is enabled, we store the union overlay nlink
@@ -787,26 +787,22 @@ int ovl_nlink_start(struct dentry *dentry, bool *locked)
 out:
 	if (err)
 		mutex_unlock(&oi->lock);
-	else
-		*locked = true;
 
 	return err;
 }
 
-void ovl_nlink_end(struct dentry *dentry, bool locked)
+void ovl_nlink_end(struct dentry *dentry)
 {
-	if (locked) {
-		if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) &&
-		    d_inode(dentry)->i_nlink == 0) {
-			const struct cred *old_cred;
-
-			old_cred = ovl_override_creds(dentry->d_sb);
-			ovl_cleanup_index(dentry);
-			revert_creds(old_cred);
-		}
+	if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) &&
+	    d_inode(dentry)->i_nlink == 0) {
+		const struct cred *old_cred;
 
-		mutex_unlock(&OVL_I(d_inode(dentry))->lock);
+		old_cred = ovl_override_creds(dentry->d_sb);
+		ovl_cleanup_index(dentry);
+		revert_creds(old_cred);
 	}
+
+	mutex_unlock(&OVL_I(d_inode(dentry))->lock);
 }
 
 int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir)
-- 
GitLab


From 1e92e3072c1456abedf51c9dedd245d2ba0daa4f Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 18 Oct 2018 18:37:14 +0300
Subject: [PATCH 0258/1890] ovl: abstract ovl_inode lock with a helper

The abstraction improves code readabilty (to some).

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/overlayfs.h | 10 ++++++++++
 fs/overlayfs/util.c      | 25 +++++++++++++------------
 2 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index b48ef22ef96bd..5e45cb3630a06 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -290,6 +290,16 @@ static inline unsigned int ovl_xino_bits(struct super_block *sb)
 	return ofs->xino_bits;
 }
 
+static inline int ovl_inode_lock(struct inode *inode)
+{
+	return mutex_lock_interruptible(&OVL_I(inode)->lock);
+}
+
+static inline void ovl_inode_unlock(struct inode *inode)
+{
+	mutex_unlock(&OVL_I(inode)->lock);
+}
+
 
 /* namei.c */
 int ovl_check_fh_len(struct ovl_fh *fh, int fh_len);
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index d3a7860067298..7c01327b18520 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -521,13 +521,13 @@ bool ovl_already_copied_up(struct dentry *dentry, int flags)
 
 int ovl_copy_up_start(struct dentry *dentry, int flags)
 {
-	struct ovl_inode *oi = OVL_I(d_inode(dentry));
+	struct inode *inode = d_inode(dentry);
 	int err;
 
-	err = mutex_lock_interruptible(&oi->lock);
+	err = ovl_inode_lock(inode);
 	if (!err && ovl_already_copied_up_locked(dentry, flags)) {
 		err = 1; /* Already copied up */
-		mutex_unlock(&oi->lock);
+		ovl_inode_unlock(inode);
 	}
 
 	return err;
@@ -535,7 +535,7 @@ int ovl_copy_up_start(struct dentry *dentry, int flags)
 
 void ovl_copy_up_end(struct dentry *dentry)
 {
-	mutex_unlock(&OVL_I(d_inode(dentry))->lock);
+	ovl_inode_unlock(d_inode(dentry));
 }
 
 bool ovl_check_origin_xattr(struct dentry *dentry)
@@ -740,11 +740,11 @@ static void ovl_cleanup_index(struct dentry *dentry)
  */
 int ovl_nlink_start(struct dentry *dentry)
 {
-	struct ovl_inode *oi = OVL_I(d_inode(dentry));
+	struct inode *inode = d_inode(dentry);
 	const struct cred *old_cred;
 	int err;
 
-	if (WARN_ON(!d_inode(dentry)))
+	if (WARN_ON(!inode))
 		return -ENOENT;
 
 	/*
@@ -767,11 +767,11 @@ int ovl_nlink_start(struct dentry *dentry)
 			return err;
 	}
 
-	err = mutex_lock_interruptible(&oi->lock);
+	err = ovl_inode_lock(inode);
 	if (err)
 		return err;
 
-	if (d_is_dir(dentry) || !ovl_test_flag(OVL_INDEX, d_inode(dentry)))
+	if (d_is_dir(dentry) || !ovl_test_flag(OVL_INDEX, inode))
 		goto out;
 
 	old_cred = ovl_override_creds(dentry->d_sb);
@@ -786,15 +786,16 @@ int ovl_nlink_start(struct dentry *dentry)
 
 out:
 	if (err)
-		mutex_unlock(&oi->lock);
+		ovl_inode_unlock(inode);
 
 	return err;
 }
 
 void ovl_nlink_end(struct dentry *dentry)
 {
-	if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) &&
-	    d_inode(dentry)->i_nlink == 0) {
+	struct inode *inode = d_inode(dentry);
+
+	if (ovl_test_flag(OVL_INDEX, inode) && inode->i_nlink == 0) {
 		const struct cred *old_cred;
 
 		old_cred = ovl_override_creds(dentry->d_sb);
@@ -802,7 +803,7 @@ void ovl_nlink_end(struct dentry *dentry)
 		revert_creds(old_cred);
 	}
 
-	mutex_unlock(&OVL_I(d_inode(dentry))->lock);
+	ovl_inode_unlock(inode);
 }
 
 int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir)
-- 
GitLab


From 14fa085640a7eb55431eec8a0273bbf0c463ce46 Mon Sep 17 00:00:00 2001
From: Chengguang Xu <cgxu519@gmx.com>
Date: Fri, 22 Jun 2018 09:49:36 +0800
Subject: [PATCH 0259/1890] ovl: using posix_acl_xattr_size() to get size
 instead of posix_acl_to_xattr()

There is no functional change but it seems better to get size by calling
posix_acl_xattr_size() instead of calling posix_acl_to_xattr() with
NULL buffer argument. Additionally, remove unnecessary assignments.

Signed-off-by: Chengguang Xu <cgxu519@gmx.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/dir.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index e03f39550ccf0..ce1857fb54348 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -414,13 +414,12 @@ static int ovl_set_upper_acl(struct dentry *upperdentry, const char *name,
 	if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !acl)
 		return 0;
 
-	size = posix_acl_to_xattr(NULL, acl, NULL, 0);
+	size = posix_acl_xattr_size(acl->a_count);
 	buffer = kmalloc(size, GFP_KERNEL);
 	if (!buffer)
 		return -ENOMEM;
 
-	size = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
-	err = size;
+	err = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
 	if (err < 0)
 		goto out_free;
 
-- 
GitLab


From aedef16a63d5d330afde4fcb54e0e73980e4a116 Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Sat, 27 Oct 2018 17:13:31 +0900
Subject: [PATCH 0260/1890] ALSA: dice: fix to wait for releases of all ALSA
 character devices

In a development period for Linux kernel v4.20, drivers in ALSA firewire
stack were changed to wait for releases of all ALSA character devices at
.remove callback of bus driver. However, ALSA dice driver is partly out
of this change. This bug can bring fault to user process which holds
the last of character device in unplugging.

This commit fixes the driver to wait in the callback.

Fixes: 61ccc6f6b27c ('ALSA: firewire: block .remove callback of bus driver till all of ALSA character devices are released')
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/firewire/dice/dice.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sound/firewire/dice/dice.c b/sound/firewire/dice/dice.c
index 0f6dbcffe711d..ed50b222d36ea 100644
--- a/sound/firewire/dice/dice.c
+++ b/sound/firewire/dice/dice.c
@@ -240,8 +240,8 @@ static void dice_remove(struct fw_unit *unit)
 	cancel_delayed_work_sync(&dice->dwork);
 
 	if (dice->registered) {
-		/* No need to wait for releasing card object in this context. */
-		snd_card_free_when_closed(dice->card);
+		// Block till all of ALSA character devices are released.
+		snd_card_free(dice->card);
 	}
 
 	mutex_destroy(&dice->mutex);
-- 
GitLab


From 0e96f31ea4249b1e94e266fe4dff908c2983a9b3 Mon Sep 17 00:00:00 2001
From: Jordan Borgner <mail@jordan-borgner.de>
Date: Sun, 28 Oct 2018 12:58:28 +0000
Subject: [PATCH 0261/1890] x86: Clean up 'sizeof x' => 'sizeof(x)'

"sizeof(x)" is the canonical coding style used in arch/x86 most of the time.
Fix the few places that didn't follow the convention.

(Also do some whitespace cleanups in a few places while at it.)

[ mingo: Rewrote the changelog. ]

Signed-off-by: Jordan Borgner <mail@jordan-borgner.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20181028125828.7rgammkgzep2wpam@JordanDesktop
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/boot/cpucheck.c             |  2 +-
 arch/x86/boot/early_serial_console.c |  4 +--
 arch/x86/boot/edd.c                  |  6 ++--
 arch/x86/boot/main.c                 |  4 +--
 arch/x86/boot/memory.c               |  2 +-
 arch/x86/boot/regs.c                 |  2 +-
 arch/x86/boot/video-vesa.c           |  6 ++--
 arch/x86/boot/video.c                |  2 +-
 arch/x86/events/intel/core.c         |  2 +-
 arch/x86/kernel/cpu/common.c         |  4 +--
 arch/x86/kernel/cpu/mcheck/mce.c     |  2 +-
 arch/x86/kernel/cpu/mtrr/generic.c   |  2 +-
 arch/x86/kernel/cpu/mtrr/if.c        |  6 ++--
 arch/x86/kernel/head64.c             |  2 +-
 arch/x86/kernel/msr.c                |  8 +++---
 arch/x86/kvm/emulate.c               | 22 +++++++--------
 arch/x86/kvm/lapic.c                 |  2 +-
 arch/x86/kvm/x86.c                   | 42 ++++++++++++++--------------
 arch/x86/tools/relocs.c              |  4 +--
 arch/x86/um/asm/elf.h                |  2 +-
 20 files changed, 63 insertions(+), 63 deletions(-)

diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c
index 8f0c4c9fc9043..51079fc9298fc 100644
--- a/arch/x86/boot/cpucheck.c
+++ b/arch/x86/boot/cpucheck.c
@@ -113,7 +113,7 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr)
 {
 	int err;
 
-	memset(&cpu.flags, 0, sizeof cpu.flags);
+	memset(&cpu.flags, 0, sizeof(cpu.flags));
 	cpu.level = 3;
 
 	if (has_eflag(X86_EFLAGS_AC))
diff --git a/arch/x86/boot/early_serial_console.c b/arch/x86/boot/early_serial_console.c
index b25c53527a940..023bf1c3de8b7 100644
--- a/arch/x86/boot/early_serial_console.c
+++ b/arch/x86/boot/early_serial_console.c
@@ -50,7 +50,7 @@ static void parse_earlyprintk(void)
 	int pos = 0;
 	int port = 0;
 
-	if (cmdline_find_option("earlyprintk", arg, sizeof arg) > 0) {
+	if (cmdline_find_option("earlyprintk", arg, sizeof(arg)) > 0) {
 		char *e;
 
 		if (!strncmp(arg, "serial", 6)) {
@@ -124,7 +124,7 @@ static void parse_console_uart8250(void)
 	 * console=uart8250,io,0x3f8,115200n8
 	 * need to make sure it is last one console !
 	 */
-	if (cmdline_find_option("console", optstr, sizeof optstr) <= 0)
+	if (cmdline_find_option("console", optstr, sizeof(optstr)) <= 0)
 		return;
 
 	options = optstr;
diff --git a/arch/x86/boot/edd.c b/arch/x86/boot/edd.c
index 223e42527077d..6c176b6a42ad0 100644
--- a/arch/x86/boot/edd.c
+++ b/arch/x86/boot/edd.c
@@ -76,7 +76,7 @@ static int get_edd_info(u8 devno, struct edd_info *ei)
 {
 	struct biosregs ireg, oreg;
 
-	memset(ei, 0, sizeof *ei);
+	memset(ei, 0, sizeof(*ei));
 
 	/* Check Extensions Present */
 
@@ -133,7 +133,7 @@ void query_edd(void)
 	struct edd_info ei, *edp;
 	u32 *mbrptr;
 
-	if (cmdline_find_option("edd", eddarg, sizeof eddarg) > 0) {
+	if (cmdline_find_option("edd", eddarg, sizeof(eddarg)) > 0) {
 		if (!strcmp(eddarg, "skipmbr") || !strcmp(eddarg, "skip")) {
 			do_edd = 1;
 			do_mbr = 0;
@@ -166,7 +166,7 @@ void query_edd(void)
 		 */
 		if (!get_edd_info(devno, &ei)
 		    && boot_params.eddbuf_entries < EDDMAXNR) {
-			memcpy(edp, &ei, sizeof ei);
+			memcpy(edp, &ei, sizeof(ei));
 			edp++;
 			boot_params.eddbuf_entries++;
 		}
diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c
index 9bcea386db65e..73532543d6892 100644
--- a/arch/x86/boot/main.c
+++ b/arch/x86/boot/main.c
@@ -36,8 +36,8 @@ static void copy_boot_params(void)
 	const struct old_cmdline * const oldcmd =
 		(const struct old_cmdline *)OLD_CL_ADDRESS;
 
-	BUILD_BUG_ON(sizeof boot_params != 4096);
-	memcpy(&boot_params.hdr, &hdr, sizeof hdr);
+	BUILD_BUG_ON(sizeof(boot_params) != 4096);
+	memcpy(&boot_params.hdr, &hdr, sizeof(hdr));
 
 	if (!boot_params.hdr.cmd_line_ptr &&
 	    oldcmd->cl_magic == OLD_CL_MAGIC) {
diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c
index d9c28c87e4771..7df2b28207be6 100644
--- a/arch/x86/boot/memory.c
+++ b/arch/x86/boot/memory.c
@@ -26,7 +26,7 @@ static int detect_memory_e820(void)
 
 	initregs(&ireg);
 	ireg.ax  = 0xe820;
-	ireg.cx  = sizeof buf;
+	ireg.cx  = sizeof(buf);
 	ireg.edx = SMAP;
 	ireg.di  = (size_t)&buf;
 
diff --git a/arch/x86/boot/regs.c b/arch/x86/boot/regs.c
index c0fb356a3092e..2fe3616ba1613 100644
--- a/arch/x86/boot/regs.c
+++ b/arch/x86/boot/regs.c
@@ -21,7 +21,7 @@
 
 void initregs(struct biosregs *reg)
 {
-	memset(reg, 0, sizeof *reg);
+	memset(reg, 0, sizeof(*reg));
 	reg->eflags |= X86_EFLAGS_CF;
 	reg->ds = ds();
 	reg->es = ds();
diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c
index ba3e100654db0..3ecc11a9c4404 100644
--- a/arch/x86/boot/video-vesa.c
+++ b/arch/x86/boot/video-vesa.c
@@ -62,7 +62,7 @@ static int vesa_probe(void)
 		if (mode & ~0x1ff)
 			continue;
 
-		memset(&vminfo, 0, sizeof vminfo); /* Just in case... */
+		memset(&vminfo, 0, sizeof(vminfo)); /* Just in case... */
 
 		ireg.ax = 0x4f01;
 		ireg.cx = mode;
@@ -109,7 +109,7 @@ static int vesa_set_mode(struct mode_info *mode)
 	int is_graphic;
 	u16 vesa_mode = mode->mode - VIDEO_FIRST_VESA;
 
-	memset(&vminfo, 0, sizeof vminfo); /* Just in case... */
+	memset(&vminfo, 0, sizeof(vminfo)); /* Just in case... */
 
 	initregs(&ireg);
 	ireg.ax = 0x4f01;
@@ -241,7 +241,7 @@ void vesa_store_edid(void)
 	struct biosregs ireg, oreg;
 
 	/* Apparently used as a nonsense token... */
-	memset(&boot_params.edid_info, 0x13, sizeof boot_params.edid_info);
+	memset(&boot_params.edid_info, 0x13, sizeof(boot_params.edid_info));
 
 	if (vginfo.version < 0x0200)
 		return;		/* EDID requires VBE 2.0+ */
diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c
index 77780e386e9b2..ac89b6624a405 100644
--- a/arch/x86/boot/video.c
+++ b/arch/x86/boot/video.c
@@ -115,7 +115,7 @@ static unsigned int get_entry(void)
 		} else if ((key >= '0' && key <= '9') ||
 			   (key >= 'A' && key <= 'Z') ||
 			   (key >= 'a' && key <= 'z')) {
-			if (len < sizeof entry_buf) {
+			if (len < sizeof(entry_buf)) {
 				entry_buf[len++] = key;
 				putchar(key);
 			}
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 0fb8659b20d8d..273c62e815463 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4535,7 +4535,7 @@ __init int intel_pmu_init(void)
 		}
 	}
 
-	snprintf(pmu_name_str, sizeof pmu_name_str, "%s", name);
+	snprintf(pmu_name_str, sizeof(pmu_name_str), "%s", name);
 
 	if (version >= 2 && extra_attr) {
 		x86_pmu.format_attrs = merge_attr(intel_arch3_formats_attr,
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 660d0b22e962e..3a4eb2b25d711 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1074,7 +1074,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
 #endif
 	c->x86_cache_alignment = c->x86_clflush_size;
 
-	memset(&c->x86_capability, 0, sizeof c->x86_capability);
+	memset(&c->x86_capability, 0, sizeof(c->x86_capability));
 	c->extended_cpuid_level = 0;
 
 	if (!have_cpuid_p())
@@ -1317,7 +1317,7 @@ static void identify_cpu(struct cpuinfo_x86 *c)
 	c->x86_virt_bits = 32;
 #endif
 	c->x86_cache_alignment = c->x86_clflush_size;
-	memset(&c->x86_capability, 0, sizeof c->x86_capability);
+	memset(&c->x86_capability, 0, sizeof(c->x86_capability));
 
 	generic_identify(c);
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 8cb3c02980cfa..8c66d2fc8f81d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -2215,7 +2215,7 @@ static int mce_device_create(unsigned int cpu)
 	if (dev)
 		return 0;
 
-	dev = kzalloc(sizeof *dev, GFP_KERNEL);
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
 	if (!dev)
 		return -ENOMEM;
 	dev->id  = cpu;
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index e12ee86906c62..86e277f8daf42 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -798,7 +798,7 @@ static void generic_set_all(void)
 	local_irq_restore(flags);
 
 	/* Use the atomic bitops to update the global mask */
-	for (count = 0; count < sizeof mask * 8; ++count) {
+	for (count = 0; count < sizeof(mask) * 8; ++count) {
 		if (mask & 0x01)
 			set_bit(count, &smp_changes_mask);
 		mask >>= 1;
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index 40eee6cc41248..2e173d47b450d 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -174,12 +174,12 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
 	case MTRRIOC_SET_PAGE_ENTRY:
 	case MTRRIOC_DEL_PAGE_ENTRY:
 	case MTRRIOC_KILL_PAGE_ENTRY:
-		if (copy_from_user(&sentry, arg, sizeof sentry))
+		if (copy_from_user(&sentry, arg, sizeof(sentry)))
 			return -EFAULT;
 		break;
 	case MTRRIOC_GET_ENTRY:
 	case MTRRIOC_GET_PAGE_ENTRY:
-		if (copy_from_user(&gentry, arg, sizeof gentry))
+		if (copy_from_user(&gentry, arg, sizeof(gentry)))
 			return -EFAULT;
 		break;
 #ifdef CONFIG_COMPAT
@@ -332,7 +332,7 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
 	switch (cmd) {
 	case MTRRIOC_GET_ENTRY:
 	case MTRRIOC_GET_PAGE_ENTRY:
-		if (copy_to_user(arg, &gentry, sizeof gentry))
+		if (copy_to_user(arg, &gentry, sizeof(gentry)))
 			err = -EFAULT;
 		break;
 #ifdef CONFIG_COMPAT
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 5dc377dc9d7b5..7663a8eb602bc 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -385,7 +385,7 @@ static void __init copy_bootdata(char *real_mode_data)
 	 */
 	sme_map_bootdata(real_mode_data);
 
-	memcpy(&boot_params, real_mode_data, sizeof boot_params);
+	memcpy(&boot_params, real_mode_data, sizeof(boot_params));
 	sanitize_boot_params(&boot_params);
 	cmd_line_ptr = get_cmd_line_ptr();
 	if (cmd_line_ptr) {
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index ef688804f80d3..4588414e2561c 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -115,14 +115,14 @@ static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg)
 			err = -EBADF;
 			break;
 		}
-		if (copy_from_user(&regs, uregs, sizeof regs)) {
+		if (copy_from_user(&regs, uregs, sizeof(regs))) {
 			err = -EFAULT;
 			break;
 		}
 		err = rdmsr_safe_regs_on_cpu(cpu, regs);
 		if (err)
 			break;
-		if (copy_to_user(uregs, &regs, sizeof regs))
+		if (copy_to_user(uregs, &regs, sizeof(regs)))
 			err = -EFAULT;
 		break;
 
@@ -131,14 +131,14 @@ static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg)
 			err = -EBADF;
 			break;
 		}
-		if (copy_from_user(&regs, uregs, sizeof regs)) {
+		if (copy_from_user(&regs, uregs, sizeof(regs))) {
 			err = -EFAULT;
 			break;
 		}
 		err = wrmsr_safe_regs_on_cpu(cpu, regs);
 		if (err)
 			break;
-		if (copy_to_user(uregs, &regs, sizeof regs))
+		if (copy_to_user(uregs, &regs, sizeof(regs)))
 			err = -EFAULT;
 		break;
 
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 34edf198708f7..78e430f4e15cf 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1509,7 +1509,7 @@ static int read_interrupt_descriptor(struct x86_emulate_ctxt *ctxt,
 		return emulate_gp(ctxt, index << 3 | 0x2);
 
 	addr = dt.address + index * 8;
-	return linear_read_system(ctxt, addr, desc, sizeof *desc);
+	return linear_read_system(ctxt, addr, desc, sizeof(*desc));
 }
 
 static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
@@ -1522,7 +1522,7 @@ static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
 		struct desc_struct desc;
 		u16 sel;
 
-		memset (dt, 0, sizeof *dt);
+		memset(dt, 0, sizeof(*dt));
 		if (!ops->get_segment(ctxt, &sel, &desc, &base3,
 				      VCPU_SREG_LDTR))
 			return;
@@ -1586,7 +1586,7 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
-	return linear_write_system(ctxt, addr, desc, sizeof *desc);
+	return linear_write_system(ctxt, addr, desc, sizeof(*desc));
 }
 
 static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
@@ -1604,7 +1604,7 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 	u16 dummy;
 	u32 base3 = 0;
 
-	memset(&seg_desc, 0, sizeof seg_desc);
+	memset(&seg_desc, 0, sizeof(seg_desc));
 
 	if (ctxt->mode == X86EMUL_MODE_REAL) {
 		/* set real mode segment descriptor (keep limit etc. for
@@ -3075,17 +3075,17 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt,
 	int ret;
 	u32 new_tss_base = get_desc_base(new_desc);
 
-	ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof tss_seg);
+	ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
 	if (ret != X86EMUL_CONTINUE)
 		return ret;
 
 	save_state_to_tss16(ctxt, &tss_seg);
 
-	ret = linear_write_system(ctxt, old_tss_base, &tss_seg, sizeof tss_seg);
+	ret = linear_write_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
 	if (ret != X86EMUL_CONTINUE)
 		return ret;
 
-	ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof tss_seg);
+	ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof(tss_seg));
 	if (ret != X86EMUL_CONTINUE)
 		return ret;
 
@@ -3094,7 +3094,7 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt,
 
 		ret = linear_write_system(ctxt, new_tss_base,
 					  &tss_seg.prev_task_link,
-					  sizeof tss_seg.prev_task_link);
+					  sizeof(tss_seg.prev_task_link));
 		if (ret != X86EMUL_CONTINUE)
 			return ret;
 	}
@@ -3216,7 +3216,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt,
 	u32 eip_offset = offsetof(struct tss_segment_32, eip);
 	u32 ldt_sel_offset = offsetof(struct tss_segment_32, ldt_selector);
 
-	ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof tss_seg);
+	ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof(tss_seg));
 	if (ret != X86EMUL_CONTINUE)
 		return ret;
 
@@ -3228,7 +3228,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt,
 	if (ret != X86EMUL_CONTINUE)
 		return ret;
 
-	ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof tss_seg);
+	ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof(tss_seg));
 	if (ret != X86EMUL_CONTINUE)
 		return ret;
 
@@ -3237,7 +3237,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt,
 
 		ret = linear_write_system(ctxt, new_tss_base,
 					  &tss_seg.prev_task_link,
-					  sizeof tss_seg.prev_task_link);
+					  sizeof(tss_seg.prev_task_link));
 		if (ret != X86EMUL_CONTINUE)
 			return ret;
 	}
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 3cd227ff807fa..89db20f8cb707 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2409,7 +2409,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
 	r = kvm_apic_state_fixup(vcpu, s, true);
 	if (r)
 		return r;
-	memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
+	memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s));
 
 	recalculate_apic_map(vcpu->kvm);
 	kvm_apic_set_version(vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 66d66d77caee5..5cd5647120f2b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2924,7 +2924,7 @@ static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
 	unsigned size;
 
 	r = -EFAULT;
-	if (copy_from_user(&msrs, user_msrs, sizeof msrs))
+	if (copy_from_user(&msrs, user_msrs, sizeof(msrs)))
 		goto out;
 
 	r = -E2BIG;
@@ -3091,11 +3091,11 @@ long kvm_arch_dev_ioctl(struct file *filp,
 		unsigned n;
 
 		r = -EFAULT;
-		if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list))
+		if (copy_from_user(&msr_list, user_msr_list, sizeof(msr_list)))
 			goto out;
 		n = msr_list.nmsrs;
 		msr_list.nmsrs = num_msrs_to_save + num_emulated_msrs;
-		if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
+		if (copy_to_user(user_msr_list, &msr_list, sizeof(msr_list)))
 			goto out;
 		r = -E2BIG;
 		if (n < msr_list.nmsrs)
@@ -3117,7 +3117,7 @@ long kvm_arch_dev_ioctl(struct file *filp,
 		struct kvm_cpuid2 cpuid;
 
 		r = -EFAULT;
-		if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
+		if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
 			goto out;
 
 		r = kvm_dev_ioctl_get_cpuid(&cpuid, cpuid_arg->entries,
@@ -3126,7 +3126,7 @@ long kvm_arch_dev_ioctl(struct file *filp,
 			goto out;
 
 		r = -EFAULT;
-		if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
+		if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid)))
 			goto out;
 		r = 0;
 		break;
@@ -3894,7 +3894,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		struct kvm_interrupt irq;
 
 		r = -EFAULT;
-		if (copy_from_user(&irq, argp, sizeof irq))
+		if (copy_from_user(&irq, argp, sizeof(irq)))
 			goto out;
 		r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
 		break;
@@ -3912,7 +3912,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		struct kvm_cpuid cpuid;
 
 		r = -EFAULT;
-		if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
+		if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
 			goto out;
 		r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
 		break;
@@ -3922,7 +3922,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		struct kvm_cpuid2 cpuid;
 
 		r = -EFAULT;
-		if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
+		if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
 			goto out;
 		r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
 					      cpuid_arg->entries);
@@ -3933,14 +3933,14 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		struct kvm_cpuid2 cpuid;
 
 		r = -EFAULT;
-		if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
+		if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid)))
 			goto out;
 		r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid,
 					      cpuid_arg->entries);
 		if (r)
 			goto out;
 		r = -EFAULT;
-		if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
+		if (copy_to_user(cpuid_arg, &cpuid, sizeof(cpuid)))
 			goto out;
 		r = 0;
 		break;
@@ -3961,13 +3961,13 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		struct kvm_tpr_access_ctl tac;
 
 		r = -EFAULT;
-		if (copy_from_user(&tac, argp, sizeof tac))
+		if (copy_from_user(&tac, argp, sizeof(tac)))
 			goto out;
 		r = vcpu_ioctl_tpr_access_reporting(vcpu, &tac);
 		if (r)
 			goto out;
 		r = -EFAULT;
-		if (copy_to_user(argp, &tac, sizeof tac))
+		if (copy_to_user(argp, &tac, sizeof(tac)))
 			goto out;
 		r = 0;
 		break;
@@ -3980,7 +3980,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		if (!lapic_in_kernel(vcpu))
 			goto out;
 		r = -EFAULT;
-		if (copy_from_user(&va, argp, sizeof va))
+		if (copy_from_user(&va, argp, sizeof(va)))
 			goto out;
 		idx = srcu_read_lock(&vcpu->kvm->srcu);
 		r = kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
@@ -3991,7 +3991,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		u64 mcg_cap;
 
 		r = -EFAULT;
-		if (copy_from_user(&mcg_cap, argp, sizeof mcg_cap))
+		if (copy_from_user(&mcg_cap, argp, sizeof(mcg_cap)))
 			goto out;
 		r = kvm_vcpu_ioctl_x86_setup_mce(vcpu, mcg_cap);
 		break;
@@ -4000,7 +4000,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		struct kvm_x86_mce mce;
 
 		r = -EFAULT;
-		if (copy_from_user(&mce, argp, sizeof mce))
+		if (copy_from_user(&mce, argp, sizeof(mce)))
 			goto out;
 		r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
 		break;
@@ -4536,7 +4536,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		if (kvm->created_vcpus)
 			goto set_identity_unlock;
 		r = -EFAULT;
-		if (copy_from_user(&ident_addr, argp, sizeof ident_addr))
+		if (copy_from_user(&ident_addr, argp, sizeof(ident_addr)))
 			goto set_identity_unlock;
 		r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr);
 set_identity_unlock:
@@ -4620,7 +4620,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		if (r)
 			goto get_irqchip_out;
 		r = -EFAULT;
-		if (copy_to_user(argp, chip, sizeof *chip))
+		if (copy_to_user(argp, chip, sizeof(*chip)))
 			goto get_irqchip_out;
 		r = 0;
 	get_irqchip_out:
@@ -4666,7 +4666,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 	}
 	case KVM_SET_PIT: {
 		r = -EFAULT;
-		if (copy_from_user(&u.ps, argp, sizeof u.ps))
+		if (copy_from_user(&u.ps, argp, sizeof(u.ps)))
 			goto out;
 		r = -ENXIO;
 		if (!kvm->arch.vpit)
@@ -8205,7 +8205,7 @@ static void __get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
 	sregs->efer = vcpu->arch.efer;
 	sregs->apic_base = kvm_get_apic_base(vcpu);
 
-	memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap);
+	memset(sregs->interrupt_bitmap, 0, sizeof(sregs->interrupt_bitmap));
 
 	if (vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft)
 		set_bit(vcpu->arch.interrupt.nr,
@@ -8509,7 +8509,7 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 	fpu->last_opcode = fxsave->fop;
 	fpu->last_ip = fxsave->rip;
 	fpu->last_dp = fxsave->rdp;
-	memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space);
+	memcpy(fpu->xmm, fxsave->xmm_space, sizeof(fxsave->xmm_space));
 
 	vcpu_put(vcpu);
 	return 0;
@@ -8530,7 +8530,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 	fxsave->fop = fpu->last_opcode;
 	fxsave->rip = fpu->last_ip;
 	fxsave->rdp = fpu->last_dp;
-	memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space);
+	memcpy(fxsave->xmm_space, fpu->xmm, sizeof(fxsave->xmm_space));
 
 	vcpu_put(vcpu);
 	return 0;
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index 0b08067c45f3d..b629f6992d9f6 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -130,7 +130,7 @@ static void regex_init(int use_real_mode)
 			      REG_EXTENDED|REG_NOSUB);
 
 		if (err) {
-			regerror(err, &sym_regex_c[i], errbuf, sizeof errbuf);
+			regerror(err, &sym_regex_c[i], errbuf, sizeof(errbuf));
 			die("%s", errbuf);
 		}
         }
@@ -405,7 +405,7 @@ static void read_shdrs(FILE *fp)
 	}
 	for (i = 0; i < ehdr.e_shnum; i++) {
 		struct section *sec = &secs[i];
-		if (fread(&shdr, sizeof shdr, 1, fp) != 1)
+		if (fread(&shdr, sizeof(shdr), 1, fp) != 1)
 			die("Cannot read ELF section headers %d/%d: %s\n",
 			    i, ehdr.e_shnum, strerror(errno));
 		sec->shdr.sh_name      = elf_word_to_cpu(shdr.sh_name);
diff --git a/arch/x86/um/asm/elf.h b/arch/x86/um/asm/elf.h
index 413f3519d9a12..c907b20d49935 100644
--- a/arch/x86/um/asm/elf.h
+++ b/arch/x86/um/asm/elf.h
@@ -194,7 +194,7 @@ extern unsigned long um_vdso_addr;
 
 typedef unsigned long elf_greg_t;
 
-#define ELF_NGREG (sizeof (struct user_regs_struct) / sizeof(elf_greg_t))
+#define ELF_NGREG (sizeof(struct user_regs_struct) / sizeof(elf_greg_t))
 typedef elf_greg_t elf_gregset_t[ELF_NGREG];
 
 typedef struct user_i387_struct elf_fpregset_t;
-- 
GitLab


From a68d75081aeccfb169575bea6f452a5a12b9f49b Mon Sep 17 00:00:00 2001
From: Muchun Song <smuchun@gmail.com>
Date: Sat, 27 Oct 2018 11:05:17 +0800
Subject: [PATCH 0262/1890] sched/rt: Update comment in pick_next_task_rt()

Commit:

  f4ebcbc0d7e0 ("sched/rt: Substract number of tasks of throttled queues from rq->nr_running")

added a new rt_rq->rt_queued field, which is used to indicate the status of
rq->rt enqueue or dequeue. So, the ->rt_nr_running check was removed and we
now check ->rt_queued instead.

Fix the comment in pick_next_task_rt() as well, which was still referencing
the old logic.

Signed-off-by: Muchun Song <smuchun@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20181027030517.23292-1-smuchun@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/rt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 2e2955a8cf8fe..a21ea60219293 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1561,7 +1561,7 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 
 	/*
 	 * We may dequeue prev's rt_rq in put_prev_task().
-	 * So, we update time before rt_nr_running check.
+	 * So, we update time before rt_queued check.
 	 */
 	if (prev->sched_class == &rt_sched_class)
 		update_curr_rt(rq);
-- 
GitLab


From 2022cceb4e30f1bb4c84d40ffa705aa8d8d68adb Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Sat, 27 Oct 2018 00:20:04 +0200
Subject: [PATCH 0263/1890] x86/traps: Use format string with panic() call

Building with -Wformat-nonliteral gives:

  arch/x86/kernel/traps.c:334:2: warning: format not a string literal and no format arguments [-Wformat-nonliteral]
    panic(message);

handle_stack_overflow() can only be called from two places (kernel/traps.c
and via inline asm in mm/fault.c), in both cases with a string not
containing format specifiers, so we might as well silence this warning
using "%s" as a format string.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20181026222004.14193-1-linux@rasmusvillemoes.dk
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/traps.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 8f6dcd88202e8..9b7c4ca8f0a73 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -306,7 +306,7 @@ __visible void __noreturn handle_stack_overflow(const char *message,
 	die(message, regs, 0);
 
 	/* Be absolutely certain we don't return. */
-	panic(message);
+	panic("%s", message);
 }
 #endif
 
-- 
GitLab


From 922dceff8dc1fb4dafc9af78139ba65671408103 Mon Sep 17 00:00:00 2001
From: "Lee, Shawn C" <shawn.c.lee@intel.com>
Date: Sun, 28 Oct 2018 22:49:33 -0700
Subject: [PATCH 0264/1890] drm/edid: Add 6 bpc quirk for BOE panel.

BOE panel (ID: 0x0771) that reports "DFP 1.x compliant TMDS".
But it's 6bpc panel only instead of 8 bpc.

Add panel ID to edid quirk list and set 6 bpc as default to
work around this issue.

Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Gustavo Padovan <gustavo@padovan.org>
Cc: Cooper Chiou <cooper.chiou@intel.com>
Signed-off-by: Lee, Shawn C <shawn.c.lee@intel.com>>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/1540792173-7288-1-git-send-email-shawn.c.lee@intel.com
---
 drivers/gpu/drm/drm_edid.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 3c9fc99648b7c..08821dfa3327c 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -119,6 +119,9 @@ static const struct edid_quirk {
 	/* SDC panel of Lenovo B50-80 reports 8 bpc, but is a 6 bpc panel */
 	{ "SDC", 0x3652, EDID_QUIRK_FORCE_6BPC },
 
+	/* BOE model 0x0771 reports 8 bpc, but is a 6 bpc panel */
+	{ "BOE", 0x0771, EDID_QUIRK_FORCE_6BPC },
+
 	/* Belinea 10 15 55 */
 	{ "MAX", 1516, EDID_QUIRK_PREFER_LARGE_60 },
 	{ "MAX", 0x77e, EDID_QUIRK_PREFER_LARGE_60 },
-- 
GitLab


From 4269fea768a11a447d8de620ce420f2214d4685c Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 26 Oct 2018 11:14:28 +0200
Subject: [PATCH 0265/1890] Revert "netfilter: nft_numgen: add map lookups for
 numgen random operations"

Laura found a better way to do this from userspace without requiring
kernel infrastructure, revert this.

Fixes: 978d8f9055c3 ("netfilter: nft_numgen: add map lookups for numgen random operations")
Signed-off-by: Laura Garcia Liebana <nevola@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h |   4 +-
 net/netfilter/nft_numgen.c               | 127 -----------------------
 2 files changed, 2 insertions(+), 129 deletions(-)

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 579974b0bf0d8..7de4f1bdaf06a 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -1635,8 +1635,8 @@ enum nft_ng_attributes {
 	NFTA_NG_MODULUS,
 	NFTA_NG_TYPE,
 	NFTA_NG_OFFSET,
-	NFTA_NG_SET_NAME,
-	NFTA_NG_SET_ID,
+	NFTA_NG_SET_NAME,	/* deprecated */
+	NFTA_NG_SET_ID,		/* deprecated */
 	__NFTA_NG_MAX
 };
 #define NFTA_NG_MAX	(__NFTA_NG_MAX - 1)
diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c
index 649d1700ec5ba..3cc1b3dc3c3cd 100644
--- a/net/netfilter/nft_numgen.c
+++ b/net/netfilter/nft_numgen.c
@@ -24,7 +24,6 @@ struct nft_ng_inc {
 	u32			modulus;
 	atomic_t		counter;
 	u32			offset;
-	struct nft_set		*map;
 };
 
 static u32 nft_ng_inc_gen(struct nft_ng_inc *priv)
@@ -48,34 +47,11 @@ static void nft_ng_inc_eval(const struct nft_expr *expr,
 	regs->data[priv->dreg] = nft_ng_inc_gen(priv);
 }
 
-static void nft_ng_inc_map_eval(const struct nft_expr *expr,
-				struct nft_regs *regs,
-				const struct nft_pktinfo *pkt)
-{
-	struct nft_ng_inc *priv = nft_expr_priv(expr);
-	const struct nft_set *map = priv->map;
-	const struct nft_set_ext *ext;
-	u32 result;
-	bool found;
-
-	result = nft_ng_inc_gen(priv);
-	found = map->ops->lookup(nft_net(pkt), map, &result, &ext);
-
-	if (!found)
-		return;
-
-	nft_data_copy(&regs->data[priv->dreg],
-		      nft_set_ext_data(ext), map->dlen);
-}
-
 static const struct nla_policy nft_ng_policy[NFTA_NG_MAX + 1] = {
 	[NFTA_NG_DREG]		= { .type = NLA_U32 },
 	[NFTA_NG_MODULUS]	= { .type = NLA_U32 },
 	[NFTA_NG_TYPE]		= { .type = NLA_U32 },
 	[NFTA_NG_OFFSET]	= { .type = NLA_U32 },
-	[NFTA_NG_SET_NAME]	= { .type = NLA_STRING,
-				    .len = NFT_SET_MAXNAMELEN - 1 },
-	[NFTA_NG_SET_ID]	= { .type = NLA_U32 },
 };
 
 static int nft_ng_inc_init(const struct nft_ctx *ctx,
@@ -101,22 +77,6 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx,
 					   NFT_DATA_VALUE, sizeof(u32));
 }
 
-static int nft_ng_inc_map_init(const struct nft_ctx *ctx,
-			       const struct nft_expr *expr,
-			       const struct nlattr * const tb[])
-{
-	struct nft_ng_inc *priv = nft_expr_priv(expr);
-	u8 genmask = nft_genmask_next(ctx->net);
-
-	nft_ng_inc_init(ctx, expr, tb);
-
-	priv->map = nft_set_lookup_global(ctx->net, ctx->table,
-					  tb[NFTA_NG_SET_NAME],
-					  tb[NFTA_NG_SET_ID], genmask);
-
-	return PTR_ERR_OR_ZERO(priv->map);
-}
-
 static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg,
 		       u32 modulus, enum nft_ng_types type, u32 offset)
 {
@@ -143,27 +103,10 @@ static int nft_ng_inc_dump(struct sk_buff *skb, const struct nft_expr *expr)
 			   priv->offset);
 }
 
-static int nft_ng_inc_map_dump(struct sk_buff *skb,
-			       const struct nft_expr *expr)
-{
-	const struct nft_ng_inc *priv = nft_expr_priv(expr);
-
-	if (nft_ng_dump(skb, priv->dreg, priv->modulus,
-			NFT_NG_INCREMENTAL, priv->offset) ||
-	    nla_put_string(skb, NFTA_NG_SET_NAME, priv->map->name))
-		goto nla_put_failure;
-
-	return 0;
-
-nla_put_failure:
-	return -1;
-}
-
 struct nft_ng_random {
 	enum nft_registers      dreg:8;
 	u32			modulus;
 	u32			offset;
-	struct nft_set		*map;
 };
 
 static u32 nft_ng_random_gen(struct nft_ng_random *priv)
@@ -183,25 +126,6 @@ static void nft_ng_random_eval(const struct nft_expr *expr,
 	regs->data[priv->dreg] = nft_ng_random_gen(priv);
 }
 
-static void nft_ng_random_map_eval(const struct nft_expr *expr,
-				   struct nft_regs *regs,
-				   const struct nft_pktinfo *pkt)
-{
-	struct nft_ng_random *priv = nft_expr_priv(expr);
-	const struct nft_set *map = priv->map;
-	const struct nft_set_ext *ext;
-	u32 result;
-	bool found;
-
-	result = nft_ng_random_gen(priv);
-	found = map->ops->lookup(nft_net(pkt), map, &result, &ext);
-	if (!found)
-		return;
-
-	nft_data_copy(&regs->data[priv->dreg],
-		      nft_set_ext_data(ext), map->dlen);
-}
-
 static int nft_ng_random_init(const struct nft_ctx *ctx,
 			      const struct nft_expr *expr,
 			      const struct nlattr * const tb[])
@@ -226,21 +150,6 @@ static int nft_ng_random_init(const struct nft_ctx *ctx,
 					   NFT_DATA_VALUE, sizeof(u32));
 }
 
-static int nft_ng_random_map_init(const struct nft_ctx *ctx,
-				  const struct nft_expr *expr,
-				  const struct nlattr * const tb[])
-{
-	struct nft_ng_random *priv = nft_expr_priv(expr);
-	u8 genmask = nft_genmask_next(ctx->net);
-
-	nft_ng_random_init(ctx, expr, tb);
-	priv->map = nft_set_lookup_global(ctx->net, ctx->table,
-					  tb[NFTA_NG_SET_NAME],
-					  tb[NFTA_NG_SET_ID], genmask);
-
-	return PTR_ERR_OR_ZERO(priv->map);
-}
-
 static int nft_ng_random_dump(struct sk_buff *skb, const struct nft_expr *expr)
 {
 	const struct nft_ng_random *priv = nft_expr_priv(expr);
@@ -249,22 +158,6 @@ static int nft_ng_random_dump(struct sk_buff *skb, const struct nft_expr *expr)
 			   priv->offset);
 }
 
-static int nft_ng_random_map_dump(struct sk_buff *skb,
-				  const struct nft_expr *expr)
-{
-	const struct nft_ng_random *priv = nft_expr_priv(expr);
-
-	if (nft_ng_dump(skb, priv->dreg, priv->modulus,
-			NFT_NG_RANDOM, priv->offset) ||
-	    nla_put_string(skb, NFTA_NG_SET_NAME, priv->map->name))
-		goto nla_put_failure;
-
-	return 0;
-
-nla_put_failure:
-	return -1;
-}
-
 static struct nft_expr_type nft_ng_type;
 static const struct nft_expr_ops nft_ng_inc_ops = {
 	.type		= &nft_ng_type,
@@ -274,14 +167,6 @@ static const struct nft_expr_ops nft_ng_inc_ops = {
 	.dump		= nft_ng_inc_dump,
 };
 
-static const struct nft_expr_ops nft_ng_inc_map_ops = {
-	.type		= &nft_ng_type,
-	.size		= NFT_EXPR_SIZE(sizeof(struct nft_ng_inc)),
-	.eval		= nft_ng_inc_map_eval,
-	.init		= nft_ng_inc_map_init,
-	.dump		= nft_ng_inc_map_dump,
-};
-
 static const struct nft_expr_ops nft_ng_random_ops = {
 	.type		= &nft_ng_type,
 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_ng_random)),
@@ -290,14 +175,6 @@ static const struct nft_expr_ops nft_ng_random_ops = {
 	.dump		= nft_ng_random_dump,
 };
 
-static const struct nft_expr_ops nft_ng_random_map_ops = {
-	.type		= &nft_ng_type,
-	.size		= NFT_EXPR_SIZE(sizeof(struct nft_ng_random)),
-	.eval		= nft_ng_random_map_eval,
-	.init		= nft_ng_random_map_init,
-	.dump		= nft_ng_random_map_dump,
-};
-
 static const struct nft_expr_ops *
 nft_ng_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
 {
@@ -312,12 +189,8 @@ nft_ng_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
 
 	switch (type) {
 	case NFT_NG_INCREMENTAL:
-		if (tb[NFTA_NG_SET_NAME])
-			return &nft_ng_inc_map_ops;
 		return &nft_ng_inc_ops;
 	case NFT_NG_RANDOM:
-		if (tb[NFTA_NG_SET_NAME])
-			return &nft_ng_random_map_ops;
 		return &nft_ng_random_ops;
 	}
 
-- 
GitLab


From 69840466086d2248898020a08dda52732686c4e6 Mon Sep 17 00:00:00 2001
From: Jianchao Wang <jianchao.w.wang@oracle.com>
Date: Sat, 27 Oct 2018 19:52:14 +0800
Subject: [PATCH 0266/1890] block: fix the DISCARD request merge

There are two cases when handle DISCARD merge.
If max_discard_segments == 1, the bios/requests need to be contiguous
to merge. If max_discard_segments > 1, it takes every bio as a range
and different range needn't to be contiguous.

But now, attempt_merge screws this up. It always consider contiguity
for DISCARD for the case max_discard_segments > 1 and cannot merge
contiguous DISCARD for the case max_discard_segments == 1, because
rq_attempt_discard_merge always returns false in this case.
This patch fixes both of the two cases above.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jianchao Wang <jianchao.w.wang@oracle.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-merge.c | 46 ++++++++++++++++++++++++++++++++++++----------
 1 file changed, 36 insertions(+), 10 deletions(-)

diff --git a/block/blk-merge.c b/block/blk-merge.c
index 42a46744c11b4..6b5ad275ed565 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -714,6 +714,31 @@ static void blk_account_io_merge(struct request *req)
 		part_stat_unlock();
 	}
 }
+/*
+ * Two cases of handling DISCARD merge:
+ * If max_discard_segments > 1, the driver takes every bio
+ * as a range and send them to controller together. The ranges
+ * needn't to be contiguous.
+ * Otherwise, the bios/requests will be handled as same as
+ * others which should be contiguous.
+ */
+static inline bool blk_discard_mergable(struct request *req)
+{
+	if (req_op(req) == REQ_OP_DISCARD &&
+	    queue_max_discard_segments(req->q) > 1)
+		return true;
+	return false;
+}
+
+enum elv_merge blk_try_req_merge(struct request *req, struct request *next)
+{
+	if (blk_discard_mergable(req))
+		return ELEVATOR_DISCARD_MERGE;
+	else if (blk_rq_pos(req) + blk_rq_sectors(req) == blk_rq_pos(next))
+		return ELEVATOR_BACK_MERGE;
+
+	return ELEVATOR_NO_MERGE;
+}
 
 /*
  * For non-mq, this has to be called with the request spinlock acquired.
@@ -731,12 +756,6 @@ static struct request *attempt_merge(struct request_queue *q,
 	if (req_op(req) != req_op(next))
 		return NULL;
 
-	/*
-	 * not contiguous
-	 */
-	if (blk_rq_pos(req) + blk_rq_sectors(req) != blk_rq_pos(next))
-		return NULL;
-
 	if (rq_data_dir(req) != rq_data_dir(next)
 	    || req->rq_disk != next->rq_disk
 	    || req_no_special_merge(next))
@@ -760,11 +779,19 @@ static struct request *attempt_merge(struct request_queue *q,
 	 * counts here. Handle DISCARDs separately, as they
 	 * have separate settings.
 	 */
-	if (req_op(req) == REQ_OP_DISCARD) {
+
+	switch (blk_try_req_merge(req, next)) {
+	case ELEVATOR_DISCARD_MERGE:
 		if (!req_attempt_discard_merge(q, req, next))
 			return NULL;
-	} else if (!ll_merge_requests_fn(q, req, next))
+		break;
+	case ELEVATOR_BACK_MERGE:
+		if (!ll_merge_requests_fn(q, req, next))
+			return NULL;
+		break;
+	default:
 		return NULL;
+	}
 
 	/*
 	 * If failfast settings disagree or any of the two is already
@@ -888,8 +915,7 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
 
 enum elv_merge blk_try_merge(struct request *rq, struct bio *bio)
 {
-	if (req_op(rq) == REQ_OP_DISCARD &&
-	    queue_max_discard_segments(rq->q) > 1)
+	if (blk_discard_mergable(rq))
 		return ELEVATOR_DISCARD_MERGE;
 	else if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector)
 		return ELEVATOR_BACK_MERGE;
-- 
GitLab


From cb028e49129f352d2f2645727ceeeacdd64761f4 Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Thu, 25 Oct 2018 15:21:29 -0700
Subject: [PATCH 0267/1890] dt-bindings: drm/panel: simple: Add no-hpd property

Some eDP panels that are designed to be always connected to a board
use their HPD signal to signal that they've finished powering on and
they're ready to be talked to.

However, for various reasons it's possible that the HPD signal from
the panel isn't actually hooked up.  In the case where the HPD isn't
hooked up you can look at the timing diagram on the panel datasheet
and insert a delay for the maximum amount of time that the HPD might
take to come up.

Let's add a property in the device tree for this concept.

Reviewed-by: Sean Paul <sean@poorly.run>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Sean Paul <seanpaul@chromium.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20181025222134.174583-1-dianders@chromium.org
---
 .../devicetree/bindings/display/panel/simple-panel.txt         | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Documentation/devicetree/bindings/display/panel/simple-panel.txt b/Documentation/devicetree/bindings/display/panel/simple-panel.txt
index 45a457ad38f0f..b2b872c710f24 100644
--- a/Documentation/devicetree/bindings/display/panel/simple-panel.txt
+++ b/Documentation/devicetree/bindings/display/panel/simple-panel.txt
@@ -11,6 +11,9 @@ Optional properties:
 - ddc-i2c-bus: phandle of an I2C controller used for DDC EDID probing
 - enable-gpios: GPIO pin to enable or disable the panel
 - backlight: phandle of the backlight device attached to the panel
+- no-hpd: This panel is supposed to communicate that it's ready via HPD
+  (hot plug detect) signal, but the signal isn't hooked up so we should
+  hardcode the max delay from the panel spec when powering up the panel.
 
 Example:
 
-- 
GitLab


From 2ed3e9510f60d0c097ca492e094eeb376ba4330a Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Thu, 25 Oct 2018 15:21:30 -0700
Subject: [PATCH 0268/1890] drm/panel: simple: Support panels with HPD where
 HPD isn't connected

Some eDP panels that are designed to be always connected to a board
use their HPD signal to signal that they've finished powering on and
they're ready to be talked to.

However, for various reasons it's possible that the HPD signal from
the panel isn't actually hooked up.  In the case where the HPD isn't
hooked up you can look at the timing diagram on the panel datasheet
and insert a delay for the maximum amount of time that the HPD might
take to come up.

Let's add support in simple-panel for this concept.

At the moment we will co-opt the existing "prepare" delay to keep
track of the delay and we'll use a boolean to specify that a given
panel should only apply the delay if the "no-hpd" property was
specified.

Reviewed-by: Sean Paul <sean@poorly.run>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Sean Paul <seanpaul@chromium.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20181025222134.174583-2-dianders@chromium.org
---
 drivers/gpu/drm/panel/panel-simple.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c
index 97964f7f2acee..687fd087b9fcd 100644
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c
@@ -56,6 +56,8 @@ struct panel_desc {
 	/**
 	 * @prepare: the time (in milliseconds) that it takes for the panel to
 	 *           become ready and start receiving video data
+	 * @hpd_absent_delay: Add this to the prepare delay if we know Hot
+	 *                    Plug Detect isn't used.
 	 * @enable: the time (in milliseconds) that it takes for the panel to
 	 *          display the first valid frame after starting to receive
 	 *          video data
@@ -66,6 +68,7 @@ struct panel_desc {
 	 */
 	struct {
 		unsigned int prepare;
+		unsigned int hpd_absent_delay;
 		unsigned int enable;
 		unsigned int disable;
 		unsigned int unprepare;
@@ -79,6 +82,7 @@ struct panel_simple {
 	struct drm_panel base;
 	bool prepared;
 	bool enabled;
+	bool no_hpd;
 
 	const struct panel_desc *desc;
 
@@ -202,6 +206,7 @@ static int panel_simple_unprepare(struct drm_panel *panel)
 static int panel_simple_prepare(struct drm_panel *panel)
 {
 	struct panel_simple *p = to_panel_simple(panel);
+	unsigned int delay;
 	int err;
 
 	if (p->prepared)
@@ -215,8 +220,11 @@ static int panel_simple_prepare(struct drm_panel *panel)
 
 	gpiod_set_value_cansleep(p->enable_gpio, 1);
 
-	if (p->desc->delay.prepare)
-		msleep(p->desc->delay.prepare);
+	delay = p->desc->delay.prepare;
+	if (p->no_hpd)
+		delay += p->desc->delay.hpd_absent_delay;
+	if (delay)
+		msleep(delay);
 
 	p->prepared = true;
 
@@ -305,6 +313,8 @@ static int panel_simple_probe(struct device *dev, const struct panel_desc *desc)
 	panel->prepared = false;
 	panel->desc = desc;
 
+	panel->no_hpd = of_property_read_bool(dev->of_node, "no-hpd");
+
 	panel->supply = devm_regulator_get(dev, "power");
 	if (IS_ERR(panel->supply))
 		return PTR_ERR(panel->supply);
-- 
GitLab


From 625d3b5c2ea612e4adf8b289384ad654c207c2bb Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Thu, 25 Oct 2018 15:21:31 -0700
Subject: [PATCH 0269/1890] drm/panel: simple: Add "no-hpd" delay for Innolux
 TV123WAM

If the HPD signal isn't hooked up to this panel we need a 200 ms
delay.  In the datasheet this is shown as the maximum time that HPD
will take to be asserted after power is given to the panel.

Reviewed-by: Sean Paul <sean@poorly.run>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Sean Paul <seanpaul@chromium.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20181025222134.174583-3-dianders@chromium.org
---
 drivers/gpu/drm/panel/panel-simple.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c
index 687fd087b9fcd..88592f9a0018f 100644
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c
@@ -1396,6 +1396,7 @@ static const struct panel_desc innolux_tv123wam = {
 		.height = 173,
 	},
 	.delay = {
+		.hpd_absent_delay = 200,
 		.unprepare = 500,
 	},
 };
-- 
GitLab


From c2bfc223882d4b9d73be970e00ead6fe7868bdb7 Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Thu, 25 Oct 2018 15:21:32 -0700
Subject: [PATCH 0270/1890] drm/bridge: ti-sn65dsi86: Remove the mystery delay

Let's solve the mystery of commit bf1178c98930 ("drm/bridge:
ti-sn65dsi86: Add mystery delay to enable()").  Specifically the
reason we needed that mystery delay is that we weren't paying
attention to HPD.

Looking at the datasheet for the same panel that was tested for the
original commit, I see there's a timing "t3" that times from power on
to the aux channel being operational.  This time is specced as 0 - 200
ms.  The datasheet says that the aux channel is operational at exactly
the same time that HPD is asserted.

Scoping the signals on this board showed that HPD was asserted 84 ms
after power was asserted.  That very closely matches the magic 70 ms
delay that we had.  ...and actually, in my testing the 70 ms wasn't
quite enough of a delay and some percentage of the time the display
didn't come up until I bumped it to 100 ms (presumably 84 ms would
have worked too).

To solve this, we tried to hook up the HPD signal in the bridge.
...but in doing so we found that that the bridge didn't report that
HPD was asserted until ~280 ms after we powered it (!).  This is
explained by looking at the sn65dsi86 datasheet section "8.4.5.1 HPD
(Hot Plug/Unplug Detection)".  Reading there we see that the bridge
isn't even intended to report HPD until 100 ms after it's asserted.
...but that would have left us at 184 ms.  The extra 100 ms
(presumably) comes from this part in the datasheet:

> The HPD state machine operates off an internal ring oscillator. The
> ring oscillator frequency will vary [ ... ]. The min/max range in
> the HPD State Diagram refers to the possible times based off
> variation in the ring oscillator frequency.

Given that the 280 ms we'll end up delaying if we hook up HPD is
_slower_ than the 200 ms we could just hardcode, for now we'll solve
the problem by just hardcoding a 200 ms delay in the panel driver
using the patch in this series ("drm/panel: simple: Support panels
with HPD where HPD isn't connected").

If we later find a panel that needs to use this bridge where we need
HPD then we'll have to come up with some new code to handle it.  Given
the silly debouncing in the bridge chip, though, it seems unlikely.

One last note is that I tried to solve this through another way: In
ti_sn_bridge_enable() I tried to use various combinations of
dp_dpcd_writeb() and dp_dpcd_readb() to detect when the aux channel
was up.  In theory that would let me detect _exactly_ when I could
continue and do link training.  Unfortunately even if I did an aux
transfer w/out waiting I couldn't see any errors.  Possibly I could
keep looping over link training until it came back with success, but
that seemed a little overly hacky to me.

Reviewed-by: Sean Paul <sean@poorly.run>
Reviewed-by: Andrzej Hajda <a.hajda@samsung.com>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Sean Paul <seanpaul@chromium.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20181025222134.174583-4-dianders@chromium.org
---
 drivers/gpu/drm/bridge/ti-sn65dsi86.c | 29 +++++++++++++++------------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
index f8a931cf3665e..680566d97adcf 100644
--- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c
+++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
@@ -458,18 +458,6 @@ static void ti_sn_bridge_enable(struct drm_bridge *bridge)
 	unsigned int val;
 	int ret;
 
-	/*
-	 * FIXME:
-	 * This 70ms was found necessary by experimentation. If it's not
-	 * present, link training fails. It seems like it can go anywhere from
-	 * pre_enable() up to semi-auto link training initiation below.
-	 *
-	 * Neither the datasheet for the bridge nor the panel tested mention a
-	 * delay of this magnitude in the timing requirements. So for now, add
-	 * the mystery delay until someone figures out a better fix.
-	 */
-	msleep(70);
-
 	/* DSI_A lane config */
 	val = CHA_DSI_LANES(4 - pdata->dsi->lanes);
 	regmap_update_bits(pdata->regmap, SN_DSI_LANES_REG,
@@ -536,7 +524,22 @@ static void ti_sn_bridge_pre_enable(struct drm_bridge *bridge)
 	/* configure bridge ref_clk */
 	ti_sn_bridge_set_refclk_freq(pdata);
 
-	/* in case drm_panel is connected then HPD is not supported */
+	/*
+	 * HPD on this bridge chip is a bit useless.  This is an eDP bridge
+	 * so the HPD is an internal signal that's only there to signal that
+	 * the panel is done powering up.  ...but the bridge chip debounces
+	 * this signal by between 100 ms and 400 ms (depending on process,
+	 * voltage, and temperate--I measured it at about 200 ms).  One
+	 * particular panel asserted HPD 84 ms after it was powered on meaning
+	 * that we saw HPD 284 ms after power on.  ...but the same panel said
+	 * that instead of looking at HPD you could just hardcode a delay of
+	 * 200 ms.  We'll assume that the panel driver will have the hardcoded
+	 * delay in its prepare and always disable HPD.
+	 *
+	 * If HPD somehow makes sense on some future panel we'll have to
+	 * change this to be conditional on someone specifying that HPD should
+	 * be used.
+	 */
 	regmap_update_bits(pdata->regmap, SN_HPD_DISABLE_REG, HPD_DISABLE,
 			   HPD_DISABLE);
 
-- 
GitLab


From c0e9ab24723cbc0be32993f02b34477929555e0d Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Thu, 25 Oct 2018 15:21:33 -0700
Subject: [PATCH 0271/1890] dt-bindings: drm/panel: simple: Innolux TV123WAM is
 actually P120ZDG-BF1

As far as I can tell the bindings that were added in commit
9c04400f7ea6 ("dt-bindings: drm/panel: Document Innolux TV123WAM panel
bindings") weren't actually for Innolux TV123WAM but were actually for
Innolux P120ZDG-BF1.

As far as I can tell the Innolux TV123WAM isn't a real panel and but
it's a mosh between the TI TV123WAM and the Innolux P120ZDG-BF1.
Let's unmosh.

Here's my evidence:

* Searching for TV123WAM on the Internet turns up a TI panel.  While
  it's possible that an Innolux panel has the same model number as the
  TI Panel, it seems a little doubtful.  Looking up the datasheet from
  the TI Panel shows that it's 1920 x 1280 and 259.2 mm x 172.8 mm.

* As far as I know, the patch adding the Innolux Panel was supposed to
  be for the board that's sitting in front of me as I type this
  (support for that board is not yet upstream).  On the back of that
  panel I see Innolux P120ZDZ-EZ1 rev B1.

* Someone pointed me at a datasheet that's supposed to be for the
  panel in front of me (sorry, I can't share the datasheet).  That
  datasheet has the string "p120zdg-bf1"

* If I search for "P120ZDG-BF1" on the Internet I get hits for panels
  that are 2160x1440.  They don't have datasheets, but the fact that
  the resolution matches is a good sign.

While we doing the rename, also mention that no-hpd can be used with
this panel.  See the previous patch in this series ("drm/panel:
simple: Add "no-hpd" delay for Innolux TV123WAM").

Fixes: 9c04400f7ea6 ("dt-bindings: drm/panel: Document Innolux TV123WAM panel bindings")
Cc: Sandeep Panda <spanda@codeaurora.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Andrzej Hajda <a.hajda@samsung.com>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Sean Paul <seanpaul@chromium.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20181025222134.174583-5-dianders@chromium.org
---
 .../{innolux,tv123wam.txt => innolux,p120zdg-bf1.txt}     | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)
 rename Documentation/devicetree/bindings/display/panel/{innolux,tv123wam.txt => innolux,p120zdg-bf1.txt} (70%)

diff --git a/Documentation/devicetree/bindings/display/panel/innolux,tv123wam.txt b/Documentation/devicetree/bindings/display/panel/innolux,p120zdg-bf1.txt
similarity index 70%
rename from Documentation/devicetree/bindings/display/panel/innolux,tv123wam.txt
rename to Documentation/devicetree/bindings/display/panel/innolux,p120zdg-bf1.txt
index a9b35265fa13a..513f03466abac 100644
--- a/Documentation/devicetree/bindings/display/panel/innolux,tv123wam.txt
+++ b/Documentation/devicetree/bindings/display/panel/innolux,p120zdg-bf1.txt
@@ -1,20 +1,22 @@
-Innolux TV123WAM 12.3 inch eDP 2K display panel
+Innolux P120ZDG-BF1 12.02 inch eDP 2K display panel
 
 This binding is compatible with the simple-panel binding, which is specified
 in simple-panel.txt in this directory.
 
 Required properties:
-- compatible: should be "innolux,tv123wam"
+- compatible: should be "innolux,p120zdg-bf1"
 - power-supply: regulator to provide the supply voltage
 
 Optional properties:
 - enable-gpios: GPIO pin to enable or disable the panel
 - backlight: phandle of the backlight device attached to the panel
+- no-hpd: If HPD isn't hooked up; add this property.
 
 Example:
 	panel_edp: panel-edp {
-		compatible = "innolux,tv123wam";
+		compatible = "innolux,p120zdg-bf1";
 		enable-gpios = <&msmgpio 31 GPIO_ACTIVE_LOW>;
 		power-supply = <&pm8916_l2>;
 		backlight = <&backlight>;
+		no-hpd;
 	};
-- 
GitLab


From 8f054b6f53ff34fb787bde4c5940f86a9c175177 Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Thu, 25 Oct 2018 15:21:34 -0700
Subject: [PATCH 0272/1890] drm/panel: simple: Innolux TV123WAM is actually
 P120ZDG-BF1

As far as I can tell the panel that was added in commit da50bd4258db
("drm/panel: simple: Add Innolux TV123WAM panel driver support")
wasn't actually an Innolux TV123WAM but was actually an Innolux
P120ZDG-BF1.

As far as I can tell the Innolux TV123WAM isn't a real panel and but
it's a mosh between the TI TV123WAM and the Innolux P120ZDG-BF1.
Let's unmosh.

Here's my evidence:

* Searching for TV123WAM on the Internet turns up a TI panel.  While
  it's possible that an Innolux panel has the same model number as the
  TI Panel, it seems a little doubtful.  Looking up the datasheet from
  the TI Panel shows that it's 1920 x 1280 and 259.2 mm x 172.8 mm.

* As far as I know, the patch adding the Innolux Panel was supposed to
  be for the board that's sitting in front of me as I type this
  (support for that board is not yet upstream).  On the back of that
  panel I see Innolux P120ZDZ-EZ1 rev B1.

* Someone pointed me at a datasheet that's supposed to be for the
  panel in front of me (sorry, I can't share the datasheet).  That
  datasheet has the string "p120zdg-bf1"

* If I search for "P120ZDG-BF1" on the Internet I get hits for panels
  that are 2160x1440.  They don't have datasheets, but the fact that
  the resolution matches is a good sign.

In any case, let's update the name and also the physical size to match
the correct panel.

Fixes: da50bd4258db ("drm/panel: simple: Add Innolux TV123WAM panel driver support")
Cc: Sandeep Panda <spanda@codeaurora.org>
Reviewed-by: Abhinav Kumar <abhinavk@codeaurora.org>
Reviewed-by: Sean Paul <sean@poorly.run>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Sean Paul <seanpaul@chromium.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20181025222134.174583-6-dianders@chromium.org
---
 drivers/gpu/drm/panel/panel-simple.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c
index 88592f9a0018f..a04ffb3b21742 100644
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c
@@ -1373,7 +1373,7 @@ static const struct panel_desc innolux_n156bge_l21 = {
 	},
 };
 
-static const struct drm_display_mode innolux_tv123wam_mode = {
+static const struct drm_display_mode innolux_p120zdg_bf1_mode = {
 	.clock = 206016,
 	.hdisplay = 2160,
 	.hsync_start = 2160 + 48,
@@ -1387,13 +1387,13 @@ static const struct drm_display_mode innolux_tv123wam_mode = {
 	.flags = DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC,
 };
 
-static const struct panel_desc innolux_tv123wam = {
-	.modes = &innolux_tv123wam_mode,
+static const struct panel_desc innolux_p120zdg_bf1 = {
+	.modes = &innolux_p120zdg_bf1_mode,
 	.num_modes = 1,
 	.bpc = 8,
 	.size = {
-		.width = 259,
-		.height = 173,
+		.width = 254,
+		.height = 169,
 	},
 	.delay = {
 		.hpd_absent_delay = 200,
@@ -2456,8 +2456,8 @@ static const struct of_device_id platform_of_match[] = {
 		.compatible = "innolux,n156bge-l21",
 		.data = &innolux_n156bge_l21,
 	}, {
-		.compatible = "innolux,tv123wam",
-		.data = &innolux_tv123wam,
+		.compatible = "innolux,p120zdg-bf1",
+		.data = &innolux_p120zdg_bf1,
 	}, {
 		.compatible = "innolux,zj070na-01p",
 		.data = &innolux_zj070na_01p,
-- 
GitLab


From ac237c28d5ac1b241d58b1b7b4b9fa10efb22fb5 Mon Sep 17 00:00:00 2001
From: Alex Stanoev <alex@astanoev.com>
Date: Sun, 28 Oct 2018 16:55:12 +0000
Subject: [PATCH 0273/1890] ALSA: ca0106: Disable IZD on SB0570 DAC to fix
 audio pops

The Creative Audigy SE (SB0570) card currently exhibits an audible pop
whenever playback is stopped or resumed, or during silent periods of an
audio stream. Initialise the IZD bit to the 0 to eliminate these pops.

The Infinite Zero Detection (IZD) feature on the DAC causes the output
to be shunted to Vcap after 2048 samples of silence. This discharges the
AC coupling capacitor through the output and causes the aforementioned
pop/click noise.

The behaviour of the IZD bit is described on page 15 of the WM8768GEDS
datasheet: "With IZD=1, applying MUTE for 1024 consecutive input samples
will cause all outputs to be connected directly to VCAP. This also
happens if 2048 consecutive zero input samples are applied to all 6
channels, and IZD=0. It will be removed as soon as any channel receives
a non-zero input". I believe the second sentence might be referring to
IZD=1 instead of IZD=0 given the observed behaviour of the card.

This change should make the DAC initialisation consistent with
Creative's Windows driver, as this popping persists when initialising
the card in Linux and soft rebooting into Windows, but is not present on
a cold boot to Windows.

Signed-off-by: Alex Stanoev <alex@astanoev.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/ca0106/ca0106.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/pci/ca0106/ca0106.h b/sound/pci/ca0106/ca0106.h
index 04402c14cb239..9847b669cf3cf 100644
--- a/sound/pci/ca0106/ca0106.h
+++ b/sound/pci/ca0106/ca0106.h
@@ -582,7 +582,7 @@
 #define SPI_PL_BIT_R_R		(2<<7)	/* right channel = right */
 #define SPI_PL_BIT_R_C		(3<<7)	/* right channel = (L+R)/2 */
 #define SPI_IZD_REG		2
-#define SPI_IZD_BIT		(1<<4)	/* infinite zero detect */
+#define SPI_IZD_BIT		(0<<4)	/* infinite zero detect */
 
 #define SPI_FMT_REG		3
 #define SPI_FMT_BIT_RJ		(0<<0)	/* right justified mode */
-- 
GitLab


From 8af1909580595a303b03d5999e410d407b7a6db7 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Mon, 29 Oct 2018 16:01:16 +0100
Subject: [PATCH 0274/1890] x86/paravirt: Remove GPL from pv_ops export

Commit 5c83511bdb9832 ("x86/paravirt: Use a single ops structure")
introduced a regression for out-of-tree modules using spinlocks, as
pv_lock_ops was exported via EXPORT_SYMBOL(), while the new pv_ops
structure now containing the pv lock operations is exported via
EXPORT_SYMBOL_GPL().

Change that by using EXPORT_SYMBOL(pv_ops).

Fixes: 5c83511bdb9832 ("x86/paravirt: Use a single ops structure")
Signed-off-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: xen-devel@lists.xenproject.org
Cc: virtualization@lists.linux-foundation.org
Cc: akataria@vmware.com
Cc: rusty@rustcorp.com.au
Cc: boris.ostrovsky@oracle.com
Cc: hpa@zytor.com
Link: https://lkml.kernel.org/r/20181029150116.25372-1-jgross@suse.com
---
 arch/x86/kernel/paravirt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index e4d4df37922a3..45123b116c056 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -483,5 +483,5 @@ NOKPROBE_SYMBOL(native_set_debugreg);
 NOKPROBE_SYMBOL(native_load_idt);
 #endif
 
-EXPORT_SYMBOL_GPL(pv_ops);
+EXPORT_SYMBOL(pv_ops);
 EXPORT_SYMBOL_GPL(pv_info);
-- 
GitLab


From f77084d96355f5fba8e2c1fb3a51a393b1570de7 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 17 Oct 2018 12:34:32 +0200
Subject: [PATCH 0275/1890] x86/mm/pat: Disable preemption around
 __flush_tlb_all()

The WARN_ON_ONCE(__read_cr3() != build_cr3()) in switch_mm_irqs_off()
triggers every once in a while during a snapshotted system upgrade.

The warning triggers since commit decab0888e6e ("x86/mm: Remove
preempt_disable/enable() from __native_flush_tlb()"). The callchain is:

  get_page_from_freelist() -> post_alloc_hook() -> __kernel_map_pages()

with CONFIG_DEBUG_PAGEALLOC enabled.

Disable preemption during CR3 reset / __flush_tlb_all() and add a comment
why preemption has to be disabled so it won't be removed accidentaly.

Add another preemptible() check in __flush_tlb_all() to catch callers with
enabled preemption when PGE is enabled, because PGE enabled does not
trigger the warning in __native_flush_tlb(). Suggested by Andy Lutomirski.

Fixes: decab0888e6e ("x86/mm: Remove preempt_disable/enable() from __native_flush_tlb()")
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20181017103432.zgv46nlu3hc7k4rq@linutronix.de
---
 arch/x86/include/asm/tlbflush.h | 6 ++++++
 arch/x86/mm/pageattr.c          | 6 +++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 323a313947e01..d760611cfc351 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -453,6 +453,12 @@ static inline void __native_flush_tlb_one_user(unsigned long addr)
  */
 static inline void __flush_tlb_all(void)
 {
+	/*
+	 * This is to catch users with enabled preemption and the PGE feature
+	 * and don't trigger the warning in __native_flush_tlb().
+	 */
+	VM_WARN_ON_ONCE(preemptible());
+
 	if (boot_cpu_has(X86_FEATURE_PGE)) {
 		__flush_tlb_global();
 	} else {
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 62bb30b4bd2ab..a1004dec98ea7 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -2309,9 +2309,13 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
 
 	/*
 	 * We should perform an IPI and flush all tlbs,
-	 * but that can deadlock->flush only current cpu:
+	 * but that can deadlock->flush only current cpu.
+	 * Preemption needs to be disabled around __flush_tlb_all() due to
+	 * CR3 reload in __native_flush_tlb().
 	 */
+	preempt_disable();
 	__flush_tlb_all();
+	preempt_enable();
 
 	arch_flush_lazy_mmu_mode();
 }
-- 
GitLab


From 5b49f64db299d0b3f7c2170088186aa593d0be7d Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 30 Oct 2018 10:40:22 +1100
Subject: [PATCH 0276/1890] vfs: vfs_clone_file_prep_inodes should return
 EINVAL for a clone from beyond EOF

vfs_clone_file_prep_inodes cannot return 0 if it is asked to remap from
a zero byte file because that's what btrfs does.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/read_write.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/fs/read_write.c b/fs/read_write.c
index 8a2737f0d61d3..260797b018512 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1740,10 +1740,7 @@ int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
 	if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
 		return -EINVAL;
 
-	/* Are we going all the way to the end? */
 	isize = i_size_read(inode_in);
-	if (isize == 0)
-		return 0;
 
 	/* Zero length dedupe exits immediately; reflink goes to EOF. */
 	if (*len == 0) {
-- 
GitLab


From 1383a7ed67490fb00d793e36c7a4d599ff88a64d Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 30 Oct 2018 10:40:31 +1100
Subject: [PATCH 0277/1890] vfs: check file ranges before cloning files

Move the file range checks from vfs_clone_file_prep into a separate
generic_remap_checks function so that all the checks are collected in a
central location.  This forms the basis for adding more checks from
generic_write_checks that will make cloning's input checking more
consistent with write input checking.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/ocfs2/refcounttree.c |  2 +-
 fs/read_write.c         | 55 ++++++++------------------------
 fs/xfs/xfs_reflink.c    |  2 +-
 include/linux/fs.h      |  9 ++++--
 mm/filemap.c            | 69 +++++++++++++++++++++++++++++++++++++++++
 5 files changed, 90 insertions(+), 47 deletions(-)

diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 7a5ee145c733f..19e03936c5e1a 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4850,7 +4850,7 @@ int ocfs2_reflink_remap_range(struct file *file_in,
 	    (OCFS2_I(inode_out)->ip_flags & OCFS2_INODE_SYSTEM_FILE))
 		goto out_unlock;
 
-	ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out,
+	ret = vfs_clone_file_prep(file_in, pos_in, file_out, pos_out,
 			&len, is_dedupe);
 	if (ret <= 0)
 		goto out_unlock;
diff --git a/fs/read_write.c b/fs/read_write.c
index 260797b018512..d6e8e242a15fa 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1717,13 +1717,12 @@ static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool write)
  * Returns: 0 for "nothing to clone", 1 for "something to clone", or
  * the usual negative error code.
  */
-int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
-			       struct inode *inode_out, loff_t pos_out,
-			       u64 *len, bool is_dedupe)
+int vfs_clone_file_prep(struct file *file_in, loff_t pos_in,
+			struct file *file_out, loff_t pos_out,
+			u64 *len, bool is_dedupe)
 {
-	loff_t bs = inode_out->i_sb->s_blocksize;
-	loff_t blen;
-	loff_t isize;
+	struct inode *inode_in = file_inode(file_in);
+	struct inode *inode_out = file_inode(file_out);
 	bool same_inode = (inode_in == inode_out);
 	int ret;
 
@@ -1740,10 +1739,10 @@ int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
 	if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
 		return -EINVAL;
 
-	isize = i_size_read(inode_in);
-
 	/* Zero length dedupe exits immediately; reflink goes to EOF. */
 	if (*len == 0) {
+		loff_t isize = i_size_read(inode_in);
+
 		if (is_dedupe || pos_in == isize)
 			return 0;
 		if (pos_in > isize)
@@ -1751,36 +1750,11 @@ int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
 		*len = isize - pos_in;
 	}
 
-	/* Ensure offsets don't wrap and the input is inside i_size */
-	if (pos_in + *len < pos_in || pos_out + *len < pos_out ||
-	    pos_in + *len > isize)
-		return -EINVAL;
-
-	/* Don't allow dedupe past EOF in the dest file */
-	if (is_dedupe) {
-		loff_t	disize;
-
-		disize = i_size_read(inode_out);
-		if (pos_out >= disize || pos_out + *len > disize)
-			return -EINVAL;
-	}
-
-	/* If we're linking to EOF, continue to the block boundary. */
-	if (pos_in + *len == isize)
-		blen = ALIGN(isize, bs) - pos_in;
-	else
-		blen = *len;
-
-	/* Only reflink if we're aligned to block boundaries */
-	if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) ||
-	    !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs))
-		return -EINVAL;
-
-	/* Don't allow overlapped reflink within the same file */
-	if (same_inode) {
-		if (pos_out + blen > pos_in && pos_out < pos_in + blen)
-			return -EINVAL;
-	}
+	/* Check that we don't violate system file offset limits. */
+	ret = generic_remap_checks(file_in, pos_in, file_out, pos_out, len,
+			is_dedupe);
+	if (ret)
+		return ret;
 
 	/* Wait for the completion of any pending IOs on both files */
 	inode_dio_wait(inode_in);
@@ -1813,7 +1787,7 @@ int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
 
 	return 1;
 }
-EXPORT_SYMBOL(vfs_clone_file_prep_inodes);
+EXPORT_SYMBOL(vfs_clone_file_prep);
 
 int do_clone_file_range(struct file *file_in, loff_t pos_in,
 			struct file *file_out, loff_t pos_out, u64 len)
@@ -1851,9 +1825,6 @@ int do_clone_file_range(struct file *file_in, loff_t pos_in,
 	if (ret)
 		return ret;
 
-	if (pos_in + len > i_size_read(inode_in))
-		return -EINVAL;
-
 	ret = file_in->f_op->clone_file_range(file_in, pos_in,
 			file_out, pos_out, len);
 	if (!ret) {
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 42ea7bab9144c..281d5f53f2ec2 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1326,7 +1326,7 @@ xfs_reflink_remap_prep(
 	if (IS_DAX(inode_in) || IS_DAX(inode_out))
 		goto out_unlock;
 
-	ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out,
+	ret = vfs_clone_file_prep(file_in, pos_in, file_out, pos_out,
 			len, is_dedupe);
 	if (ret <= 0)
 		goto out_unlock;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 897eae8faee1b..ba93a6e7dac4e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1825,9 +1825,9 @@ extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
 		unsigned long, loff_t *, rwf_t);
 extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
 				   loff_t, size_t, unsigned int);
-extern int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
-				      struct inode *inode_out, loff_t pos_out,
-				      u64 *len, bool is_dedupe);
+extern int vfs_clone_file_prep(struct file *file_in, loff_t pos_in,
+			       struct file *file_out, loff_t pos_out,
+			       u64 *count, bool is_dedupe);
 extern int do_clone_file_range(struct file *file_in, loff_t pos_in,
 			       struct file *file_out, loff_t pos_out, u64 len);
 extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
@@ -2967,6 +2967,9 @@ extern int sb_min_blocksize(struct super_block *, int);
 extern int generic_file_mmap(struct file *, struct vm_area_struct *);
 extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
 extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *);
+extern int generic_remap_checks(struct file *file_in, loff_t pos_in,
+				struct file *file_out, loff_t pos_out,
+				uint64_t *count, bool is_dedupe);
 extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
diff --git a/mm/filemap.c b/mm/filemap.c
index 52517f28e6f4a..47e6bfd45a916 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2974,6 +2974,75 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
 }
 EXPORT_SYMBOL(generic_write_checks);
 
+/*
+ * Performs necessary checks before doing a clone.
+ *
+ * Can adjust amount of bytes to clone.
+ * Returns appropriate error code that caller should return or
+ * zero in case the clone should be allowed.
+ */
+int generic_remap_checks(struct file *file_in, loff_t pos_in,
+			 struct file *file_out, loff_t pos_out,
+			 uint64_t *req_count, bool is_dedupe)
+{
+	struct inode *inode_in = file_in->f_mapping->host;
+	struct inode *inode_out = file_out->f_mapping->host;
+	uint64_t count = *req_count;
+	uint64_t bcount;
+	loff_t size_in, size_out;
+	loff_t bs = inode_out->i_sb->s_blocksize;
+
+	/* The start of both ranges must be aligned to an fs block. */
+	if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_out, bs))
+		return -EINVAL;
+
+	/* Ensure offsets don't wrap. */
+	if (pos_in + count < pos_in || pos_out + count < pos_out)
+		return -EINVAL;
+
+	size_in = i_size_read(inode_in);
+	size_out = i_size_read(inode_out);
+
+	/* Dedupe requires both ranges to be within EOF. */
+	if (is_dedupe &&
+	    (pos_in >= size_in || pos_in + count > size_in ||
+	     pos_out >= size_out || pos_out + count > size_out))
+		return -EINVAL;
+
+	/* Ensure the infile range is within the infile. */
+	if (pos_in >= size_in)
+		return -EINVAL;
+	count = min(count, size_in - (uint64_t)pos_in);
+
+	/*
+	 * If the user wanted us to link to the infile's EOF, round up to the
+	 * next block boundary for this check.
+	 *
+	 * Otherwise, make sure the count is also block-aligned, having
+	 * already confirmed the starting offsets' block alignment.
+	 */
+	if (pos_in + count == size_in) {
+		bcount = ALIGN(size_in, bs) - pos_in;
+	} else {
+		if (!IS_ALIGNED(count, bs))
+			return -EINVAL;
+
+		bcount = count;
+	}
+
+	/* Don't allow overlapped cloning within the same file. */
+	if (inode_in == inode_out &&
+	    pos_out + bcount > pos_in &&
+	    pos_out < pos_in + bcount)
+		return -EINVAL;
+
+	/* For now we don't support changing the length. */
+	if (*req_count != count)
+		return -EINVAL;
+
+	return 0;
+}
+
 int pagecache_write_begin(struct file *file, struct address_space *mapping,
 				loff_t pos, unsigned len, unsigned flags,
 				struct page **pagep, void **fsdata)
-- 
GitLab


From 2c5773f102c9bb07d5328467f61f0a88f2f2892d Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 30 Oct 2018 10:40:39 +1100
Subject: [PATCH 0278/1890] vfs: exit early from zero length remap operations

If a remap caller asks us to remap to the source file's EOF and the
source file length leaves us with a zero byte request, exit early.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/read_write.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/read_write.c b/fs/read_write.c
index d6e8e242a15fa..2456da3f8a414 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1748,6 +1748,8 @@ int vfs_clone_file_prep(struct file *file_in, loff_t pos_in,
 		if (pos_in > isize)
 			return -EINVAL;
 		*len = isize - pos_in;
+		if (*len == 0)
+			return 0;
 	}
 
 	/* Check that we don't violate system file offset limits. */
-- 
GitLab


From 9fd91a90cb9837372af24a804853e15c11aed93e Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 30 Oct 2018 10:40:46 +1100
Subject: [PATCH 0279/1890] vfs: strengthen checking of file range inputs to
 generic_remap_checks

File range remapping, if allowed to run past the destination file's EOF,
is an optimization on a regular file write.  Regular file writes that
extend the file length are subject to various constraints which are not
checked by range cloning.

This is a correctness problem because we're never allowed to touch
ranges that the page cache can't support (s_maxbytes); we're not
supposed to deal with large offsets (MAX_NON_LFS) if O_LARGEFILE isn't
set; and we must obey resource limits (RLIMIT_FSIZE).

Therefore, add these checks to the new generic_remap_checks function so
that we curtail unexpected behavior.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 mm/filemap.c | 84 ++++++++++++++++++++++++++++++++--------------------
 1 file changed, 52 insertions(+), 32 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 47e6bfd45a916..84b7301e41a01 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2915,6 +2915,42 @@ struct page *read_cache_page_gfp(struct address_space *mapping,
 }
 EXPORT_SYMBOL(read_cache_page_gfp);
 
+/*
+ * Don't operate on ranges the page cache doesn't support, and don't exceed the
+ * LFS limits.  If pos is under the limit it becomes a short access.  If it
+ * exceeds the limit we return -EFBIG.
+ */
+static int generic_access_check_limits(struct file *file, loff_t pos,
+				       loff_t *count)
+{
+	struct inode *inode = file->f_mapping->host;
+	loff_t max_size = inode->i_sb->s_maxbytes;
+
+	if (!(file->f_flags & O_LARGEFILE))
+		max_size = MAX_NON_LFS;
+
+	if (unlikely(pos >= max_size))
+		return -EFBIG;
+	*count = min(*count, max_size - pos);
+	return 0;
+}
+
+static int generic_write_check_limits(struct file *file, loff_t pos,
+				      loff_t *count)
+{
+	loff_t limit = rlimit(RLIMIT_FSIZE);
+
+	if (limit != RLIM_INFINITY) {
+		if (pos >= limit) {
+			send_sig(SIGXFSZ, current, 0);
+			return -EFBIG;
+		}
+		*count = min(*count, limit - pos);
+	}
+
+	return generic_access_check_limits(file, pos, count);
+}
+
 /*
  * Performs necessary checks before doing a write
  *
@@ -2926,8 +2962,8 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
-	unsigned long limit = rlimit(RLIMIT_FSIZE);
-	loff_t pos;
+	loff_t count;
+	int ret;
 
 	if (!iov_iter_count(from))
 		return 0;
@@ -2936,40 +2972,15 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
 	if (iocb->ki_flags & IOCB_APPEND)
 		iocb->ki_pos = i_size_read(inode);
 
-	pos = iocb->ki_pos;
-
 	if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
 		return -EINVAL;
 
-	if (limit != RLIM_INFINITY) {
-		if (iocb->ki_pos >= limit) {
-			send_sig(SIGXFSZ, current, 0);
-			return -EFBIG;
-		}
-		iov_iter_truncate(from, limit - (unsigned long)pos);
-	}
-
-	/*
-	 * LFS rule
-	 */
-	if (unlikely(pos + iov_iter_count(from) > MAX_NON_LFS &&
-				!(file->f_flags & O_LARGEFILE))) {
-		if (pos >= MAX_NON_LFS)
-			return -EFBIG;
-		iov_iter_truncate(from, MAX_NON_LFS - (unsigned long)pos);
-	}
-
-	/*
-	 * Are we about to exceed the fs block limit ?
-	 *
-	 * If we have written data it becomes a short write.  If we have
-	 * exceeded without writing data we send a signal and return EFBIG.
-	 * Linus frestrict idea will clean these up nicely..
-	 */
-	if (unlikely(pos >= inode->i_sb->s_maxbytes))
-		return -EFBIG;
+	count = iov_iter_count(from);
+	ret = generic_write_check_limits(file, iocb->ki_pos, &count);
+	if (ret)
+		return ret;
 
-	iov_iter_truncate(from, inode->i_sb->s_maxbytes - pos);
+	iov_iter_truncate(from, count);
 	return iov_iter_count(from);
 }
 EXPORT_SYMBOL(generic_write_checks);
@@ -2991,6 +3002,7 @@ int generic_remap_checks(struct file *file_in, loff_t pos_in,
 	uint64_t bcount;
 	loff_t size_in, size_out;
 	loff_t bs = inode_out->i_sb->s_blocksize;
+	int ret;
 
 	/* The start of both ranges must be aligned to an fs block. */
 	if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_out, bs))
@@ -3014,6 +3026,14 @@ int generic_remap_checks(struct file *file_in, loff_t pos_in,
 		return -EINVAL;
 	count = min(count, size_in - (uint64_t)pos_in);
 
+	ret = generic_access_check_limits(file_in, pos_in, &count);
+	if (ret)
+		return ret;
+
+	ret = generic_write_check_limits(file_out, pos_out, &count);
+	if (ret)
+		return ret;
+
 	/*
 	 * If the user wanted us to link to the infile's EOF, round up to the
 	 * next block boundary for this check.
-- 
GitLab


From 07d19dc9fbe9128378b9e226abe886fd8fd473df Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 30 Oct 2018 10:40:55 +1100
Subject: [PATCH 0280/1890] vfs: avoid problematic remapping requests into
 partial EOF block

A deduplication data corruption is exposed in XFS and btrfs. It is
caused by extending the block match range to include the partial EOF
block, but then allowing unknown data beyond EOF to be considered a
"match" to data in the destination file because the comparison is only
made to the end of the source file. This corrupts the destination file
when the source extent is shared with it.

The VFS remapping prep functions  only support whole block dedupe, but
we still need to appear to support whole file dedupe correctly.  Hence
if the dedupe request includes the last block of the souce file, don't
include it in the actual dedupe operation. If the rest of the range
dedupes successfully, then reject the entire request.  A subsequent
patch will enable us to shorten dedupe requests correctly.

When reflinking sub-file ranges, a data corruption can occur when the
source file range includes a partial EOF block. This shares the unknown
data beyond EOF into the second file at a position inside EOF, exposing
stale data in the second file.

If the reflink request includes the last block of the souce file, only
proceed with the reflink operation if it lands at or past the
destination file's current EOF. If it lands within the destination file
EOF, reject the entire request with -EINVAL and make the caller go the
hard way.  A subsequent patch will enable us to shorten reflink requests
correctly.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/read_write.c | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/fs/read_write.c b/fs/read_write.c
index 2456da3f8a414..0f0a6efdd502c 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1708,6 +1708,34 @@ static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool write)
 
 	return security_file_permission(file, write ? MAY_WRITE : MAY_READ);
 }
+/*
+ * Ensure that we don't remap a partial EOF block in the middle of something
+ * else.  Assume that the offsets have already been checked for block
+ * alignment.
+ *
+ * For deduplication we always scale down to the previous block because we
+ * can't meaningfully compare post-EOF contents.
+ *
+ * For clone we only link a partial EOF block above the destination file's EOF.
+ */
+static int generic_remap_check_len(struct inode *inode_in,
+				   struct inode *inode_out,
+				   loff_t pos_out,
+				   u64 *len,
+				   bool is_dedupe)
+{
+	u64 blkmask = i_blocksize(inode_in) - 1;
+
+	if ((*len & blkmask) == 0)
+		return 0;
+
+	if (is_dedupe)
+		*len &= ~blkmask;
+	else if (pos_out + *len < i_size_read(inode_out))
+		return -EINVAL;
+
+	return 0;
+}
 
 /*
  * Check that the two inodes are eligible for cloning, the ranges make
@@ -1787,6 +1815,11 @@ int vfs_clone_file_prep(struct file *file_in, loff_t pos_in,
 			return -EBADE;
 	}
 
+	ret = generic_remap_check_len(inode_in, inode_out, pos_out, len,
+			is_dedupe);
+	if (ret)
+		return ret;
+
 	return 1;
 }
 EXPORT_SYMBOL(vfs_clone_file_prep);
-- 
GitLab


From 9aae20500d9cd3e7d55d0536d359bdd1c869db89 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 30 Oct 2018 10:41:01 +1100
Subject: [PATCH 0281/1890] vfs: skip zero-length dedupe requests

Don't bother calling the filesystem for a zero-length dedupe request;
we can return zero and exit.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/read_write.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fs/read_write.c b/fs/read_write.c
index 0f0a6efdd502c..f5395d8da741e 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -2009,6 +2009,11 @@ int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
 	if (!dst_file->f_op->dedupe_file_range)
 		goto out_drop_write;
 
+	if (len == 0) {
+		ret = 0;
+		goto out_drop_write;
+	}
+
 	ret = dst_file->f_op->dedupe_file_range(src_file, src_pos,
 						dst_file, dst_pos, len);
 out_drop_write:
-- 
GitLab


From a83ab01a62e61616ebb8b97f90f568c1214dc10d Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 30 Oct 2018 10:41:08 +1100
Subject: [PATCH 0282/1890] vfs: rename vfs_clone_file_prep to be more
 descriptive

The vfs_clone_file_prep is a generic function to be called by filesystem
implementations only.  Rename the prefix to generic_ and make it more
clear that it applies to remap operations, not just clones.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/ocfs2/refcounttree.c | 2 +-
 fs/read_write.c         | 8 ++++----
 fs/xfs/xfs_reflink.c    | 2 +-
 include/linux/fs.h      | 6 +++---
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 19e03936c5e1a..36c56dfbe4853 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4850,7 +4850,7 @@ int ocfs2_reflink_remap_range(struct file *file_in,
 	    (OCFS2_I(inode_out)->ip_flags & OCFS2_INODE_SYSTEM_FILE))
 		goto out_unlock;
 
-	ret = vfs_clone_file_prep(file_in, pos_in, file_out, pos_out,
+	ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out,
 			&len, is_dedupe);
 	if (ret <= 0)
 		goto out_unlock;
diff --git a/fs/read_write.c b/fs/read_write.c
index f5395d8da741e..aca75a97a6953 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1745,9 +1745,9 @@ static int generic_remap_check_len(struct inode *inode_in,
  * Returns: 0 for "nothing to clone", 1 for "something to clone", or
  * the usual negative error code.
  */
-int vfs_clone_file_prep(struct file *file_in, loff_t pos_in,
-			struct file *file_out, loff_t pos_out,
-			u64 *len, bool is_dedupe)
+int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
+				  struct file *file_out, loff_t pos_out,
+				  u64 *len, bool is_dedupe)
 {
 	struct inode *inode_in = file_inode(file_in);
 	struct inode *inode_out = file_inode(file_out);
@@ -1822,7 +1822,7 @@ int vfs_clone_file_prep(struct file *file_in, loff_t pos_in,
 
 	return 1;
 }
-EXPORT_SYMBOL(vfs_clone_file_prep);
+EXPORT_SYMBOL(generic_remap_file_range_prep);
 
 int do_clone_file_range(struct file *file_in, loff_t pos_in,
 			struct file *file_out, loff_t pos_out, u64 len)
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 281d5f53f2ec2..a7757a128a78f 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1326,7 +1326,7 @@ xfs_reflink_remap_prep(
 	if (IS_DAX(inode_in) || IS_DAX(inode_out))
 		goto out_unlock;
 
-	ret = vfs_clone_file_prep(file_in, pos_in, file_out, pos_out,
+	ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out,
 			len, is_dedupe);
 	if (ret <= 0)
 		goto out_unlock;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ba93a6e7dac4e..55729e1c2e759 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1825,9 +1825,9 @@ extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
 		unsigned long, loff_t *, rwf_t);
 extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
 				   loff_t, size_t, unsigned int);
-extern int vfs_clone_file_prep(struct file *file_in, loff_t pos_in,
-			       struct file *file_out, loff_t pos_out,
-			       u64 *count, bool is_dedupe);
+extern int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
+					 struct file *file_out, loff_t pos_out,
+					 u64 *count, bool is_dedupe);
 extern int do_clone_file_range(struct file *file_in, loff_t pos_in,
 			       struct file *file_out, loff_t pos_out, u64 len);
 extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
-- 
GitLab


From 6095028b455d775e369ae27875f698ff0f6fdeb8 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 30 Oct 2018 10:41:14 +1100
Subject: [PATCH 0283/1890] vfs: rename clone_verify_area to remap_verify_area

Since we use clone_verify_area for both clone and dedupe range checks,
rename the function to make it clear that it's for both.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/read_write.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/read_write.c b/fs/read_write.c
index aca75a97a6953..734c5661fb690 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1686,7 +1686,7 @@ SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in,
 	return ret;
 }
 
-static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool write)
+static int remap_verify_area(struct file *file, loff_t pos, u64 len, bool write)
 {
 	struct inode *inode = file_inode(file);
 
@@ -1852,11 +1852,11 @@ int do_clone_file_range(struct file *file_in, loff_t pos_in,
 	if (!file_in->f_op->clone_file_range)
 		return -EOPNOTSUPP;
 
-	ret = clone_verify_area(file_in, pos_in, len, false);
+	ret = remap_verify_area(file_in, pos_in, len, false);
 	if (ret)
 		return ret;
 
-	ret = clone_verify_area(file_out, pos_out, len, true);
+	ret = remap_verify_area(file_out, pos_out, len, true);
 	if (ret)
 		return ret;
 
@@ -1989,7 +1989,7 @@ int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
 	if (ret)
 		return ret;
 
-	ret = clone_verify_area(dst_file, dst_pos, len, true);
+	ret = remap_verify_area(dst_file, dst_pos, len, true);
 	if (ret < 0)
 		goto out_drop_write;
 
@@ -2051,7 +2051,7 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
 	if (!S_ISREG(src->i_mode))
 		goto out;
 
-	ret = clone_verify_area(file, off, len, false);
+	ret = remap_verify_area(file, off, len, false);
 	if (ret < 0)
 		goto out;
 	ret = 0;
-- 
GitLab


From 2e5dfc99f2e61c42083ba742395e7a7b353513d1 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 30 Oct 2018 10:41:21 +1100
Subject: [PATCH 0284/1890] vfs: combine the clone and dedupe into a single
 remap_file_range

Combine the clone_file_range and dedupe_file_range operations into a
single remap_file_range file operation dispatch since they're
fundamentally the same operation.  The differences between the two can
be made in the prep functions.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 Documentation/filesystems/porting |  5 ++++
 Documentation/filesystems/vfs.txt | 20 +++++++++-----
 fs/btrfs/ctree.h                  |  8 +++---
 fs/btrfs/file.c                   |  3 +--
 fs/btrfs/ioctl.c                  | 45 ++++++++++++++++---------------
 fs/cifs/cifsfs.c                  | 22 ++++++++-------
 fs/nfs/nfs4file.c                 | 10 ++++---
 fs/ocfs2/file.c                   | 24 ++++++-----------
 fs/overlayfs/file.c               | 30 ++++++++++++---------
 fs/read_write.c                   | 18 ++++++-------
 fs/xfs/xfs_file.c                 | 23 +++++-----------
 include/linux/fs.h                | 25 ++++++++++++++---
 12 files changed, 127 insertions(+), 106 deletions(-)

diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index 7b7b845c490a4..e6d4466268dd1 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -622,3 +622,8 @@ in your dentry operations instead.
 	alloc_file_clone(file, flags, ops) does not affect any caller's references.
 	On success you get a new struct file sharing the mount/dentry with the
 	original, on failure - ERR_PTR().
+--
+[mandatory]
+	->clone_file_range() and ->dedupe_file_range have been replaced with
+	->remap_file_range().  See Documentation/filesystems/vfs.txt for more
+	information.
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index a6c6a8af48a29..6f5babfee27b7 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -883,8 +883,9 @@ struct file_operations {
 	unsigned (*mmap_capabilities)(struct file *);
 #endif
 	ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int);
-	int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t, u64);
-	int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t, u64);
+	int (*remap_file_range)(struct file *file_in, loff_t pos_in,
+				struct file *file_out, loff_t pos_out,
+				u64 len, unsigned int remap_flags);
 	int (*fadvise)(struct file *, loff_t, loff_t, int);
 };
 
@@ -960,11 +961,16 @@ otherwise noted.
 
   copy_file_range: called by the copy_file_range(2) system call.
 
-  clone_file_range: called by the ioctl(2) system call for FICLONERANGE and
-	FICLONE commands.
-
-  dedupe_file_range: called by the ioctl(2) system call for FIDEDUPERANGE
-	command.
+  remap_file_range: called by the ioctl(2) system call for FICLONERANGE and
+	FICLONE and FIDEDUPERANGE commands to remap file ranges.  An
+	implementation should remap len bytes at pos_in of the source file into
+	the dest file at pos_out.  Implementations must handle callers passing
+	in len == 0; this means "remap to the end of the source file".  The
+	return value should be zero if all bytes were remapped, or the usual
+	negative error code if the remapping did not succeed completely.
+	The remap_flags parameter accepts REMAP_FILE_* flags.  If
+	REMAP_FILE_DEDUP is set then the implementation must only remap if the
+	requested file ranges have identical contents.
 
   fadvise: possibly called by the fadvise64() system call.
 
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2cddfe7806a41..124a05662fc29 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3218,9 +3218,6 @@ void btrfs_get_block_group_info(struct list_head *groups_list,
 				struct btrfs_ioctl_space_info *space);
 void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
 			       struct btrfs_ioctl_balance_args *bargs);
-int btrfs_dedupe_file_range(struct file *src_file, loff_t src_loff,
-			    struct file *dst_file, loff_t dst_loff,
-			    u64 olen);
 
 /* file.c */
 int __init btrfs_auto_defrag_init(void);
@@ -3250,8 +3247,9 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages,
 		      size_t num_pages, loff_t pos, size_t write_bytes,
 		      struct extent_state **cached);
 int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end);
-int btrfs_clone_file_range(struct file *file_in, loff_t pos_in,
-			   struct file *file_out, loff_t pos_out, u64 len);
+int btrfs_remap_file_range(struct file *file_in, loff_t pos_in,
+			   struct file *file_out, loff_t pos_out, u64 len,
+			   unsigned int remap_flags);
 
 /* tree-defrag.c */
 int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 2be00e873e92a..9a963f061393e 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -3269,8 +3269,7 @@ const struct file_operations btrfs_file_operations = {
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= btrfs_compat_ioctl,
 #endif
-	.clone_file_range = btrfs_clone_file_range,
-	.dedupe_file_range = btrfs_dedupe_file_range,
+	.remap_file_range = btrfs_remap_file_range,
 };
 
 void __cold btrfs_auto_defrag_exit(void)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index d60b6caf09e85..bfd99c66723e2 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3627,26 +3627,6 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
 	return ret;
 }
 
-int btrfs_dedupe_file_range(struct file *src_file, loff_t src_loff,
-			    struct file *dst_file, loff_t dst_loff,
-			    u64 olen)
-{
-	struct inode *src = file_inode(src_file);
-	struct inode *dst = file_inode(dst_file);
-	u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
-
-	if (WARN_ON_ONCE(bs < PAGE_SIZE)) {
-		/*
-		 * Btrfs does not support blocksize < page_size. As a
-		 * result, btrfs_cmp_data() won't correctly handle
-		 * this situation without an update.
-		 */
-		return -EINVAL;
-	}
-
-	return btrfs_extent_same(src, src_loff, olen, dst, dst_loff);
-}
-
 static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
 				     struct inode *inode,
 				     u64 endoff,
@@ -4348,9 +4328,30 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
 	return ret;
 }
 
-int btrfs_clone_file_range(struct file *src_file, loff_t off,
-		struct file *dst_file, loff_t destoff, u64 len)
+int btrfs_remap_file_range(struct file *src_file, loff_t off,
+		struct file *dst_file, loff_t destoff, u64 len,
+		unsigned int remap_flags)
 {
+	if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
+		return -EINVAL;
+
+	if (remap_flags & REMAP_FILE_DEDUP) {
+		struct inode *src = file_inode(src_file);
+		struct inode *dst = file_inode(dst_file);
+		u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
+
+		if (WARN_ON_ONCE(bs < PAGE_SIZE)) {
+			/*
+			 * Btrfs does not support blocksize < page_size. As a
+			 * result, btrfs_cmp_data() won't correctly handle
+			 * this situation without an update.
+			 */
+			return -EINVAL;
+		}
+
+		return btrfs_extent_same(src, off, len, dst, destoff);
+	}
+
 	return btrfs_clone_files(dst_file, src_file, off, len, destoff);
 }
 
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 7065426b3280f..e8144d0dcde2e 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -975,8 +975,9 @@ const struct inode_operations cifs_symlink_inode_ops = {
 	.listxattr = cifs_listxattr,
 };
 
-static int cifs_clone_file_range(struct file *src_file, loff_t off,
-		struct file *dst_file, loff_t destoff, u64 len)
+static int cifs_remap_file_range(struct file *src_file, loff_t off,
+		struct file *dst_file, loff_t destoff, u64 len,
+		unsigned int remap_flags)
 {
 	struct inode *src_inode = file_inode(src_file);
 	struct inode *target_inode = file_inode(dst_file);
@@ -986,6 +987,9 @@ static int cifs_clone_file_range(struct file *src_file, loff_t off,
 	unsigned int xid;
 	int rc;
 
+	if (remap_flags & ~REMAP_FILE_ADVISORY)
+		return -EINVAL;
+
 	cifs_dbg(FYI, "clone range\n");
 
 	xid = get_xid();
@@ -1134,7 +1138,7 @@ const struct file_operations cifs_file_ops = {
 	.llseek = cifs_llseek,
 	.unlocked_ioctl	= cifs_ioctl,
 	.copy_file_range = cifs_copy_file_range,
-	.clone_file_range = cifs_clone_file_range,
+	.remap_file_range = cifs_remap_file_range,
 	.setlease = cifs_setlease,
 	.fallocate = cifs_fallocate,
 };
@@ -1153,7 +1157,7 @@ const struct file_operations cifs_file_strict_ops = {
 	.llseek = cifs_llseek,
 	.unlocked_ioctl	= cifs_ioctl,
 	.copy_file_range = cifs_copy_file_range,
-	.clone_file_range = cifs_clone_file_range,
+	.remap_file_range = cifs_remap_file_range,
 	.setlease = cifs_setlease,
 	.fallocate = cifs_fallocate,
 };
@@ -1172,7 +1176,7 @@ const struct file_operations cifs_file_direct_ops = {
 	.splice_write = iter_file_splice_write,
 	.unlocked_ioctl  = cifs_ioctl,
 	.copy_file_range = cifs_copy_file_range,
-	.clone_file_range = cifs_clone_file_range,
+	.remap_file_range = cifs_remap_file_range,
 	.llseek = cifs_llseek,
 	.setlease = cifs_setlease,
 	.fallocate = cifs_fallocate,
@@ -1191,7 +1195,7 @@ const struct file_operations cifs_file_nobrl_ops = {
 	.llseek = cifs_llseek,
 	.unlocked_ioctl	= cifs_ioctl,
 	.copy_file_range = cifs_copy_file_range,
-	.clone_file_range = cifs_clone_file_range,
+	.remap_file_range = cifs_remap_file_range,
 	.setlease = cifs_setlease,
 	.fallocate = cifs_fallocate,
 };
@@ -1209,7 +1213,7 @@ const struct file_operations cifs_file_strict_nobrl_ops = {
 	.llseek = cifs_llseek,
 	.unlocked_ioctl	= cifs_ioctl,
 	.copy_file_range = cifs_copy_file_range,
-	.clone_file_range = cifs_clone_file_range,
+	.remap_file_range = cifs_remap_file_range,
 	.setlease = cifs_setlease,
 	.fallocate = cifs_fallocate,
 };
@@ -1227,7 +1231,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = {
 	.splice_write = iter_file_splice_write,
 	.unlocked_ioctl  = cifs_ioctl,
 	.copy_file_range = cifs_copy_file_range,
-	.clone_file_range = cifs_clone_file_range,
+	.remap_file_range = cifs_remap_file_range,
 	.llseek = cifs_llseek,
 	.setlease = cifs_setlease,
 	.fallocate = cifs_fallocate,
@@ -1239,7 +1243,7 @@ const struct file_operations cifs_dir_ops = {
 	.read    = generic_read_dir,
 	.unlocked_ioctl  = cifs_ioctl,
 	.copy_file_range = cifs_copy_file_range,
-	.clone_file_range = cifs_clone_file_range,
+	.remap_file_range = cifs_remap_file_range,
 	.llseek = generic_file_llseek,
 	.fsync = cifs_dir_fsync,
 };
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 4288a6ecaf756..ae5780ce41dcf 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -180,8 +180,9 @@ static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t
 	return nfs42_proc_allocate(filep, offset, len);
 }
 
-static int nfs42_clone_file_range(struct file *src_file, loff_t src_off,
-		struct file *dst_file, loff_t dst_off, u64 count)
+static int nfs42_remap_file_range(struct file *src_file, loff_t src_off,
+		struct file *dst_file, loff_t dst_off, u64 count,
+		unsigned int remap_flags)
 {
 	struct inode *dst_inode = file_inode(dst_file);
 	struct nfs_server *server = NFS_SERVER(dst_inode);
@@ -190,6 +191,9 @@ static int nfs42_clone_file_range(struct file *src_file, loff_t src_off,
 	bool same_inode = false;
 	int ret;
 
+	if (remap_flags & ~REMAP_FILE_ADVISORY)
+		return -EINVAL;
+
 	/* check alignment w.r.t. clone_blksize */
 	ret = -EINVAL;
 	if (bs) {
@@ -262,7 +266,7 @@ const struct file_operations nfs4_file_operations = {
 	.copy_file_range = nfs4_copy_file_range,
 	.llseek		= nfs4_file_llseek,
 	.fallocate	= nfs42_fallocate,
-	.clone_file_range = nfs42_clone_file_range,
+	.remap_file_range = nfs42_remap_file_range,
 #else
 	.llseek		= nfs_file_llseek,
 #endif
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 9fa35cb6f6e0b..0b757a24567c8 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2527,24 +2527,18 @@ static loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int whence)
 	return offset;
 }
 
-static int ocfs2_file_clone_range(struct file *file_in,
+static int ocfs2_remap_file_range(struct file *file_in,
 				  loff_t pos_in,
 				  struct file *file_out,
 				  loff_t pos_out,
-				  u64 len)
+				  u64 len,
+				  unsigned int remap_flags)
 {
-	return ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out,
-					 len, false);
-}
+	if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
+		return -EINVAL;
 
-static int ocfs2_file_dedupe_range(struct file *file_in,
-				   loff_t pos_in,
-				   struct file *file_out,
-				   loff_t pos_out,
-				   u64 len)
-{
 	return ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out,
-					  len, true);
+					 len, remap_flags & REMAP_FILE_DEDUP);
 }
 
 const struct inode_operations ocfs2_file_iops = {
@@ -2586,8 +2580,7 @@ const struct file_operations ocfs2_fops = {
 	.splice_read	= generic_file_splice_read,
 	.splice_write	= iter_file_splice_write,
 	.fallocate	= ocfs2_fallocate,
-	.clone_file_range = ocfs2_file_clone_range,
-	.dedupe_file_range = ocfs2_file_dedupe_range,
+	.remap_file_range = ocfs2_remap_file_range,
 };
 
 const struct file_operations ocfs2_dops = {
@@ -2633,8 +2626,7 @@ const struct file_operations ocfs2_fops_no_plocks = {
 	.splice_read	= generic_file_splice_read,
 	.splice_write	= iter_file_splice_write,
 	.fallocate	= ocfs2_fallocate,
-	.clone_file_range = ocfs2_file_clone_range,
-	.dedupe_file_range = ocfs2_file_dedupe_range,
+	.remap_file_range = ocfs2_remap_file_range,
 };
 
 const struct file_operations ocfs2_dops_no_plocks = {
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 986313da0c889..fffb36fd5920a 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -489,26 +489,31 @@ static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in,
 			    OVL_COPY);
 }
 
-static int ovl_clone_file_range(struct file *file_in, loff_t pos_in,
-				struct file *file_out, loff_t pos_out, u64 len)
+static int ovl_remap_file_range(struct file *file_in, loff_t pos_in,
+				struct file *file_out, loff_t pos_out,
+				u64 len, unsigned int remap_flags)
 {
-	return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, 0,
-			    OVL_CLONE);
-}
+	enum ovl_copyop op;
+
+	if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
+		return -EINVAL;
+
+	if (remap_flags & REMAP_FILE_DEDUP)
+		op = OVL_DEDUPE;
+	else
+		op = OVL_CLONE;
 
-static int ovl_dedupe_file_range(struct file *file_in, loff_t pos_in,
-				 struct file *file_out, loff_t pos_out, u64 len)
-{
 	/*
 	 * Don't copy up because of a dedupe request, this wouldn't make sense
 	 * most of the time (data would be duplicated instead of deduplicated).
 	 */
-	if (!ovl_inode_upper(file_inode(file_in)) ||
-	    !ovl_inode_upper(file_inode(file_out)))
+	if (op == OVL_DEDUPE &&
+	    (!ovl_inode_upper(file_inode(file_in)) ||
+	     !ovl_inode_upper(file_inode(file_out))))
 		return -EPERM;
 
 	return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, 0,
-			    OVL_DEDUPE);
+			    op);
 }
 
 const struct file_operations ovl_file_operations = {
@@ -525,6 +530,5 @@ const struct file_operations ovl_file_operations = {
 	.compat_ioctl	= ovl_compat_ioctl,
 
 	.copy_file_range	= ovl_copy_file_range,
-	.clone_file_range	= ovl_clone_file_range,
-	.dedupe_file_range	= ovl_dedupe_file_range,
+	.remap_file_range	= ovl_remap_file_range,
 };
diff --git a/fs/read_write.c b/fs/read_write.c
index 734c5661fb690..766bdcb381f34 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1588,9 +1588,9 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
 	 * Try cloning first, this is supported by more file systems, and
 	 * more efficient if both clone and copy are supported (e.g. NFS).
 	 */
-	if (file_in->f_op->clone_file_range) {
-		ret = file_in->f_op->clone_file_range(file_in, pos_in,
-				file_out, pos_out, len);
+	if (file_in->f_op->remap_file_range) {
+		ret = file_in->f_op->remap_file_range(file_in, pos_in,
+				file_out, pos_out, len, 0);
 		if (ret == 0) {
 			ret = len;
 			goto done;
@@ -1849,7 +1849,7 @@ int do_clone_file_range(struct file *file_in, loff_t pos_in,
 	    (file_out->f_flags & O_APPEND))
 		return -EBADF;
 
-	if (!file_in->f_op->clone_file_range)
+	if (!file_in->f_op->remap_file_range)
 		return -EOPNOTSUPP;
 
 	ret = remap_verify_area(file_in, pos_in, len, false);
@@ -1860,8 +1860,8 @@ int do_clone_file_range(struct file *file_in, loff_t pos_in,
 	if (ret)
 		return ret;
 
-	ret = file_in->f_op->clone_file_range(file_in, pos_in,
-			file_out, pos_out, len);
+	ret = file_in->f_op->remap_file_range(file_in, pos_in,
+			file_out, pos_out, len, 0);
 	if (!ret) {
 		fsnotify_access(file_in);
 		fsnotify_modify(file_out);
@@ -2006,7 +2006,7 @@ int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
 		goto out_drop_write;
 
 	ret = -EINVAL;
-	if (!dst_file->f_op->dedupe_file_range)
+	if (!dst_file->f_op->remap_file_range)
 		goto out_drop_write;
 
 	if (len == 0) {
@@ -2014,8 +2014,8 @@ int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
 		goto out_drop_write;
 	}
 
-	ret = dst_file->f_op->dedupe_file_range(src_file, src_pos,
-						dst_file, dst_pos, len);
+	ret = dst_file->f_op->remap_file_range(src_file, src_pos, dst_file,
+			dst_pos, len, REMAP_FILE_DEDUP);
 out_drop_write:
 	mnt_drop_write_file(dst_file);
 
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 61a5ad2600e86..2ad94d508f80f 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -920,27 +920,19 @@ xfs_file_fallocate(
 }
 
 STATIC int
-xfs_file_clone_range(
+xfs_file_remap_range(
 	struct file	*file_in,
 	loff_t		pos_in,
 	struct file	*file_out,
 	loff_t		pos_out,
-	u64		len)
+	u64		len,
+	unsigned int	remap_flags)
 {
-	return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
-				     len, false);
-}
+	if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
+		return -EINVAL;
 
-STATIC int
-xfs_file_dedupe_range(
-	struct file	*file_in,
-	loff_t		pos_in,
-	struct file	*file_out,
-	loff_t		pos_out,
-	u64		len)
-{
 	return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
-				     len, true);
+			len, remap_flags & REMAP_FILE_DEDUP);
 }
 
 STATIC int
@@ -1175,8 +1167,7 @@ const struct file_operations xfs_file_operations = {
 	.fsync		= xfs_file_fsync,
 	.get_unmapped_area = thp_get_unmapped_area,
 	.fallocate	= xfs_file_fallocate,
-	.clone_file_range = xfs_file_clone_range,
-	.dedupe_file_range = xfs_file_dedupe_range,
+	.remap_file_range = xfs_file_remap_range,
 };
 
 const struct file_operations xfs_dir_file_operations = {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 55729e1c2e759..888cef35c7d70 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1721,6 +1721,24 @@ struct block_device_operations;
 #define NOMMU_VMFLAGS \
 	(NOMMU_MAP_READ | NOMMU_MAP_WRITE | NOMMU_MAP_EXEC)
 
+/*
+ * These flags control the behavior of the remap_file_range function pointer.
+ * If it is called with len == 0 that means "remap to end of source file".
+ * See Documentation/filesystems/vfs.txt for more details about this call.
+ *
+ * REMAP_FILE_DEDUP: only remap if contents identical (i.e. deduplicate)
+ */
+#define REMAP_FILE_DEDUP		(1 << 0)
+
+/*
+ * These flags signal that the caller is ok with altering various aspects of
+ * the behavior of the remap operation.  The changes must be made by the
+ * implementation; the vfs remap helper functions can take advantage of them.
+ * Flags in this category exist to preserve the quirky behavior of the hoisted
+ * btrfs clone/dedupe ioctls.
+ * There are no flags yet, but subsequent commits will add some.
+ */
+#define REMAP_FILE_ADVISORY		(0)
 
 struct iov_iter;
 
@@ -1759,10 +1777,9 @@ struct file_operations {
 #endif
 	ssize_t (*copy_file_range)(struct file *, loff_t, struct file *,
 			loff_t, size_t, unsigned int);
-	int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t,
-			u64);
-	int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t,
-			u64);
+	int (*remap_file_range)(struct file *file_in, loff_t pos_in,
+				struct file *file_out, loff_t pos_out,
+				u64 len, unsigned int remap_flags);
 	int (*fadvise)(struct file *, loff_t, loff_t, int);
 } __randomize_layout;
 
-- 
GitLab


From a91ae49bbaf43910edb09e03fedf26b23875bd52 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 30 Oct 2018 10:41:28 +1100
Subject: [PATCH 0285/1890] vfs: pass remap flags to
 generic_remap_file_range_prep

Plumb the remap flags through the filesystem from the vfs function
dispatcher all the way to the prep function to prepare for behavior
changes in subsequent patches.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/ocfs2/file.c         |  2 +-
 fs/ocfs2/refcounttree.c |  4 ++--
 fs/ocfs2/refcounttree.h |  2 +-
 fs/read_write.c         | 14 +++++++-------
 fs/xfs/xfs_file.c       |  2 +-
 fs/xfs/xfs_reflink.c    | 21 +++++++++++----------
 fs/xfs/xfs_reflink.h    |  3 ++-
 include/linux/fs.h      |  2 +-
 8 files changed, 26 insertions(+), 24 deletions(-)

diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 0b757a24567c8..9809b0e5746f5 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2538,7 +2538,7 @@ static int ocfs2_remap_file_range(struct file *file_in,
 		return -EINVAL;
 
 	return ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out,
-					 len, remap_flags & REMAP_FILE_DEDUP);
+					 len, remap_flags);
 }
 
 const struct inode_operations ocfs2_file_iops = {
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 36c56dfbe4853..df9781567ec05 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4825,7 +4825,7 @@ int ocfs2_reflink_remap_range(struct file *file_in,
 			      struct file *file_out,
 			      loff_t pos_out,
 			      u64 len,
-			      bool is_dedupe)
+			      unsigned int remap_flags)
 {
 	struct inode *inode_in = file_inode(file_in);
 	struct inode *inode_out = file_inode(file_out);
@@ -4851,7 +4851,7 @@ int ocfs2_reflink_remap_range(struct file *file_in,
 		goto out_unlock;
 
 	ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out,
-			&len, is_dedupe);
+			&len, remap_flags);
 	if (ret <= 0)
 		goto out_unlock;
 
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index 4af55bf4b35b9..d2c5f526edff4 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -120,6 +120,6 @@ int ocfs2_reflink_remap_range(struct file *file_in,
 			      struct file *file_out,
 			      loff_t pos_out,
 			      u64 len,
-			      bool is_dedupe);
+			      unsigned int remap_flags);
 
 #endif /* OCFS2_REFCOUNTTREE_H */
diff --git a/fs/read_write.c b/fs/read_write.c
index 766bdcb381f34..2013816892846 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1722,14 +1722,14 @@ static int generic_remap_check_len(struct inode *inode_in,
 				   struct inode *inode_out,
 				   loff_t pos_out,
 				   u64 *len,
-				   bool is_dedupe)
+				   unsigned int remap_flags)
 {
 	u64 blkmask = i_blocksize(inode_in) - 1;
 
 	if ((*len & blkmask) == 0)
 		return 0;
 
-	if (is_dedupe)
+	if (remap_flags & REMAP_FILE_DEDUP)
 		*len &= ~blkmask;
 	else if (pos_out + *len < i_size_read(inode_out))
 		return -EINVAL;
@@ -1747,7 +1747,7 @@ static int generic_remap_check_len(struct inode *inode_in,
  */
 int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
 				  struct file *file_out, loff_t pos_out,
-				  u64 *len, bool is_dedupe)
+				  u64 *len, unsigned int remap_flags)
 {
 	struct inode *inode_in = file_inode(file_in);
 	struct inode *inode_out = file_inode(file_out);
@@ -1771,7 +1771,7 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
 	if (*len == 0) {
 		loff_t isize = i_size_read(inode_in);
 
-		if (is_dedupe || pos_in == isize)
+		if ((remap_flags & REMAP_FILE_DEDUP) || pos_in == isize)
 			return 0;
 		if (pos_in > isize)
 			return -EINVAL;
@@ -1782,7 +1782,7 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
 
 	/* Check that we don't violate system file offset limits. */
 	ret = generic_remap_checks(file_in, pos_in, file_out, pos_out, len,
-			is_dedupe);
+			(remap_flags & REMAP_FILE_DEDUP));
 	if (ret)
 		return ret;
 
@@ -1804,7 +1804,7 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
 	/*
 	 * Check that the extents are the same.
 	 */
-	if (is_dedupe) {
+	if (remap_flags & REMAP_FILE_DEDUP) {
 		bool		is_same = false;
 
 		ret = vfs_dedupe_file_range_compare(inode_in, pos_in,
@@ -1816,7 +1816,7 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
 	}
 
 	ret = generic_remap_check_len(inode_in, inode_out, pos_out, len,
-			is_dedupe);
+			remap_flags);
 	if (ret)
 		return ret;
 
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 2ad94d508f80f..20314eb4677a0 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -932,7 +932,7 @@ xfs_file_remap_range(
 		return -EINVAL;
 
 	return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
-			len, remap_flags & REMAP_FILE_DEDUP);
+			len, remap_flags);
 }
 
 STATIC int
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index a7757a128a78f..29aab196ce7e1 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -921,13 +921,14 @@ xfs_reflink_update_dest(
 	struct xfs_inode	*dest,
 	xfs_off_t		newlen,
 	xfs_extlen_t		cowextsize,
-	bool			is_dedupe)
+	unsigned int		remap_flags)
 {
 	struct xfs_mount	*mp = dest->i_mount;
 	struct xfs_trans	*tp;
 	int			error;
 
-	if (is_dedupe && newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0)
+	if ((remap_flags & REMAP_FILE_DEDUP) &&
+	    newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0)
 		return 0;
 
 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
@@ -948,7 +949,7 @@ xfs_reflink_update_dest(
 		dest->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE;
 	}
 
-	if (!is_dedupe) {
+	if (!(remap_flags & REMAP_FILE_DEDUP)) {
 		xfs_trans_ichgtime(tp, dest,
 				   XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
 	}
@@ -1296,7 +1297,7 @@ xfs_reflink_remap_prep(
 	struct file		*file_out,
 	loff_t			pos_out,
 	u64			*len,
-	bool			is_dedupe)
+	unsigned int		remap_flags)
 {
 	struct inode		*inode_in = file_inode(file_in);
 	struct xfs_inode	*src = XFS_I(inode_in);
@@ -1327,7 +1328,7 @@ xfs_reflink_remap_prep(
 		goto out_unlock;
 
 	ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out,
-			len, is_dedupe);
+			len, remap_flags);
 	if (ret <= 0)
 		goto out_unlock;
 
@@ -1336,7 +1337,7 @@ xfs_reflink_remap_prep(
 	 * from the source file so we don't try to dedupe the partial
 	 * EOF block.
 	 */
-	if (is_dedupe) {
+	if (remap_flags & REMAP_FILE_DEDUP) {
 		*len &= ~blkmask;
 	} else if (*len & blkmask) {
 		/*
@@ -1372,7 +1373,7 @@ xfs_reflink_remap_prep(
 				   PAGE_ALIGN(pos_out + *len) - 1);
 
 	/* If we're altering the file contents... */
-	if (!is_dedupe) {
+	if (!(remap_flags & REMAP_FILE_DEDUP)) {
 		/*
 		 * ...update the timestamps (which will grab the ilock again
 		 * from xfs_fs_dirty_inode, so we have to call it before we
@@ -1410,7 +1411,7 @@ xfs_reflink_remap_range(
 	struct file		*file_out,
 	loff_t			pos_out,
 	u64			len,
-	bool			is_dedupe)
+	unsigned int		remap_flags)
 {
 	struct inode		*inode_in = file_inode(file_in);
 	struct xfs_inode	*src = XFS_I(inode_in);
@@ -1430,7 +1431,7 @@ xfs_reflink_remap_range(
 
 	/* Prepare and then clone file data. */
 	ret = xfs_reflink_remap_prep(file_in, pos_in, file_out, pos_out,
-			&len, is_dedupe);
+			&len, remap_flags);
 	if (ret <= 0)
 		return ret;
 
@@ -1457,7 +1458,7 @@ xfs_reflink_remap_range(
 		cowextsize = src->i_d.di_cowextsize;
 
 	ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize,
-			is_dedupe);
+			remap_flags);
 
 out_unlock:
 	xfs_reflink_remap_unlock(file_in, file_out);
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index c585ad9552b23..6f82d628bf175 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -28,7 +28,8 @@ extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
 		xfs_off_t count);
 extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
 extern int xfs_reflink_remap_range(struct file *file_in, loff_t pos_in,
-		struct file *file_out, loff_t pos_out, u64 len, bool is_dedupe);
+		struct file *file_out, loff_t pos_out, u64 len,
+		unsigned int remap_flags);
 extern int xfs_reflink_inode_has_shared_extents(struct xfs_trans *tp,
 		struct xfs_inode *ip, bool *has_shared);
 extern int xfs_reflink_clear_inode_flag(struct xfs_inode *ip,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 888cef35c7d70..631c28ce1436d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1844,7 +1844,7 @@ extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
 				   loff_t, size_t, unsigned int);
 extern int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
 					 struct file *file_out, loff_t pos_out,
-					 u64 *count, bool is_dedupe);
+					 u64 *count, unsigned int remap_flags);
 extern int do_clone_file_range(struct file *file_in, loff_t pos_in,
 			       struct file *file_out, loff_t pos_out, u64 len);
 extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
-- 
GitLab


From 3d28193e1df043764deb7abdaba5e3a6660bc393 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 30 Oct 2018 10:41:34 +1100
Subject: [PATCH 0286/1890] vfs: pass remap flags to generic_remap_checks

Pass the same remap flags to generic_remap_checks for consistency.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/read_write.c    | 2 +-
 include/linux/fs.h | 2 +-
 mm/filemap.c       | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/read_write.c b/fs/read_write.c
index 2013816892846..ebcbfc4f29075 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1782,7 +1782,7 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
 
 	/* Check that we don't violate system file offset limits. */
 	ret = generic_remap_checks(file_in, pos_in, file_out, pos_out, len,
-			(remap_flags & REMAP_FILE_DEDUP));
+			remap_flags);
 	if (ret)
 		return ret;
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 631c28ce1436d..c5435ca811329 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2986,7 +2986,7 @@ extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
 extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *);
 extern int generic_remap_checks(struct file *file_in, loff_t pos_in,
 				struct file *file_out, loff_t pos_out,
-				uint64_t *count, bool is_dedupe);
+				uint64_t *count, unsigned int remap_flags);
 extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
diff --git a/mm/filemap.c b/mm/filemap.c
index 84b7301e41a01..410dc58f7b166 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2994,7 +2994,7 @@ EXPORT_SYMBOL(generic_write_checks);
  */
 int generic_remap_checks(struct file *file_in, loff_t pos_in,
 			 struct file *file_out, loff_t pos_out,
-			 uint64_t *req_count, bool is_dedupe)
+			 uint64_t *req_count, unsigned int remap_flags)
 {
 	struct inode *inode_in = file_in->f_mapping->host;
 	struct inode *inode_out = file_out->f_mapping->host;
@@ -3016,7 +3016,7 @@ int generic_remap_checks(struct file *file_in, loff_t pos_in,
 	size_out = i_size_read(inode_out);
 
 	/* Dedupe requires both ranges to be within EOF. */
-	if (is_dedupe &&
+	if ((remap_flags & REMAP_FILE_DEDUP) &&
 	    (pos_in >= size_in || pos_in + count > size_in ||
 	     pos_out >= size_out || pos_out + count > size_out))
 		return -EINVAL;
-- 
GitLab


From 8dde90bca6fca3736ea20109654bcf6dcf2ecf1d Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 30 Oct 2018 10:41:41 +1100
Subject: [PATCH 0287/1890] vfs: remap helper should update destination inode
 metadata

Extend generic_remap_file_range_prep to handle inode metadata updates
when remapping into a file.  If the operation can possibly alter the
file contents, we must update the ctime and mtime and remove security
privileges, just like we do for regular file writes.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/read_write.c      | 19 +++++++++++++++++++
 fs/xfs/xfs_reflink.c | 23 -----------------------
 2 files changed, 19 insertions(+), 23 deletions(-)

diff --git a/fs/read_write.c b/fs/read_write.c
index ebcbfc4f29075..b61bd3fc71545 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1820,6 +1820,25 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
 	if (ret)
 		return ret;
 
+	/* If can't alter the file contents, we're done. */
+	if (!(remap_flags & REMAP_FILE_DEDUP)) {
+		/* Update the timestamps, since we can alter file contents. */
+		if (!(file_out->f_mode & FMODE_NOCMTIME)) {
+			ret = file_update_time(file_out);
+			if (ret)
+				return ret;
+		}
+
+		/*
+		 * Clear the security bits if the process is not being run by
+		 * root.  This keeps people from modifying setuid and setgid
+		 * binaries.
+		 */
+		ret = file_remove_privs(file_out);
+		if (ret)
+			return ret;
+	}
+
 	return 1;
 }
 EXPORT_SYMBOL(generic_remap_file_range_prep);
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 29aab196ce7e1..2d7dd8b28d7c2 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1372,29 +1372,6 @@ xfs_reflink_remap_prep(
 	truncate_inode_pages_range(&inode_out->i_data, pos_out,
 				   PAGE_ALIGN(pos_out + *len) - 1);
 
-	/* If we're altering the file contents... */
-	if (!(remap_flags & REMAP_FILE_DEDUP)) {
-		/*
-		 * ...update the timestamps (which will grab the ilock again
-		 * from xfs_fs_dirty_inode, so we have to call it before we
-		 * take the ilock).
-		 */
-		if (!(file_out->f_mode & FMODE_NOCMTIME)) {
-			ret = file_update_time(file_out);
-			if (ret)
-				goto out_unlock;
-		}
-
-		/*
-		 * ...clear the security bits if the process is not being run
-		 * by root.  This keeps people from modifying setuid and setgid
-		 * binaries.
-		 */
-		ret = file_remove_privs(file_out);
-		if (ret)
-			goto out_unlock;
-	}
-
 	return 1;
 out_unlock:
 	xfs_reflink_remap_unlock(file_in, file_out);
-- 
GitLab


From 42ec3d4c02187a18e27ff94b409ec27234bf2ffd Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 30 Oct 2018 10:41:49 +1100
Subject: [PATCH 0288/1890] vfs: make remap_file_range functions take and
 return bytes completed

Change the remap_file_range functions to take a number of bytes to
operate upon and return the number of bytes they operated on.  This is a
requirement for allowing fs implementations to return short clone/dedupe
results to the user, which will enable us to obey resource limits in a
graceful manner.

A subsequent patch will enable copy_file_range to signal to the
->clone_file_range implementation that it can handle a short length,
which will be returned in the function's return value.  For now the
short return is not implemented anywhere so the behavior won't change --
either copy_file_range manages to clone the entire range or it tries an
alternative.

Neither clone ioctl can take advantage of this, alas.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 Documentation/filesystems/vfs.txt | 10 +++----
 fs/btrfs/ctree.h                  |  6 ++--
 fs/btrfs/ioctl.c                  | 13 ++++----
 fs/cifs/cifsfs.c                  |  6 ++--
 fs/ioctl.c                        | 10 ++++++-
 fs/nfs/nfs4file.c                 |  6 ++--
 fs/nfsd/vfs.c                     |  8 +++--
 fs/ocfs2/file.c                   | 16 +++++-----
 fs/ocfs2/refcounttree.c           |  2 +-
 fs/ocfs2/refcounttree.h           |  2 +-
 fs/overlayfs/copy_up.c            |  6 ++--
 fs/overlayfs/file.c               | 12 ++++----
 fs/read_write.c                   | 49 +++++++++++++++++--------------
 fs/xfs/xfs_file.c                 |  9 ++++--
 fs/xfs/xfs_reflink.c              |  4 +--
 fs/xfs/xfs_reflink.h              |  2 +-
 include/linux/fs.h                | 27 +++++++++--------
 mm/filemap.c                      |  2 +-
 18 files changed, 108 insertions(+), 82 deletions(-)

diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 6f5babfee27b7..1bd2919deaca0 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -883,9 +883,9 @@ struct file_operations {
 	unsigned (*mmap_capabilities)(struct file *);
 #endif
 	ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int);
-	int (*remap_file_range)(struct file *file_in, loff_t pos_in,
-				struct file *file_out, loff_t pos_out,
-				u64 len, unsigned int remap_flags);
+	loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in,
+				   struct file *file_out, loff_t pos_out,
+				   loff_t len, unsigned int remap_flags);
 	int (*fadvise)(struct file *, loff_t, loff_t, int);
 };
 
@@ -966,8 +966,8 @@ otherwise noted.
 	implementation should remap len bytes at pos_in of the source file into
 	the dest file at pos_out.  Implementations must handle callers passing
 	in len == 0; this means "remap to the end of the source file".  The
-	return value should be zero if all bytes were remapped, or the usual
-	negative error code if the remapping did not succeed completely.
+	return value should the number of bytes remapped, or the usual
+	negative error code if errors occurred before any bytes were remapped.
 	The remap_flags parameter accepts REMAP_FILE_* flags.  If
 	REMAP_FILE_DEDUP is set then the implementation must only remap if the
 	requested file ranges have identical contents.
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 124a05662fc29..771a961d77ad4 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3247,9 +3247,9 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages,
 		      size_t num_pages, loff_t pos, size_t write_bytes,
 		      struct extent_state **cached);
 int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end);
-int btrfs_remap_file_range(struct file *file_in, loff_t pos_in,
-			   struct file *file_out, loff_t pos_out, u64 len,
-			   unsigned int remap_flags);
+loff_t btrfs_remap_file_range(struct file *file_in, loff_t pos_in,
+			      struct file *file_out, loff_t pos_out,
+			      loff_t len, unsigned int remap_flags);
 
 /* tree-defrag.c */
 int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index bfd99c66723e2..b0c513e10977b 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -4328,10 +4328,12 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
 	return ret;
 }
 
-int btrfs_remap_file_range(struct file *src_file, loff_t off,
-		struct file *dst_file, loff_t destoff, u64 len,
+loff_t btrfs_remap_file_range(struct file *src_file, loff_t off,
+		struct file *dst_file, loff_t destoff, loff_t len,
 		unsigned int remap_flags)
 {
+	int ret;
+
 	if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
 		return -EINVAL;
 
@@ -4349,10 +4351,11 @@ int btrfs_remap_file_range(struct file *src_file, loff_t off,
 			return -EINVAL;
 		}
 
-		return btrfs_extent_same(src, off, len, dst, destoff);
+		ret = btrfs_extent_same(src, off, len, dst, destoff);
+	} else {
+		ret = btrfs_clone_files(dst_file, src_file, off, len, destoff);
 	}
-
-	return btrfs_clone_files(dst_file, src_file, off, len, destoff);
+	return ret < 0 ? ret : len;
 }
 
 static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index e8144d0dcde2e..5ca71c6c8be2f 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -975,8 +975,8 @@ const struct inode_operations cifs_symlink_inode_ops = {
 	.listxattr = cifs_listxattr,
 };
 
-static int cifs_remap_file_range(struct file *src_file, loff_t off,
-		struct file *dst_file, loff_t destoff, u64 len,
+static loff_t cifs_remap_file_range(struct file *src_file, loff_t off,
+		struct file *dst_file, loff_t destoff, loff_t len,
 		unsigned int remap_flags)
 {
 	struct inode *src_inode = file_inode(src_file);
@@ -1029,7 +1029,7 @@ static int cifs_remap_file_range(struct file *src_file, loff_t off,
 	unlock_two_nondirectories(src_inode, target_inode);
 out:
 	free_xid(xid);
-	return rc;
+	return rc < 0 ? rc : len;
 }
 
 ssize_t cifs_file_copychunk_range(unsigned int xid,
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 2005529af5608..72537b68c272d 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -223,6 +223,7 @@ static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd,
 			     u64 off, u64 olen, u64 destoff)
 {
 	struct fd src_file = fdget(srcfd);
+	loff_t cloned;
 	int ret;
 
 	if (!src_file.file)
@@ -230,7 +231,14 @@ static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd,
 	ret = -EXDEV;
 	if (src_file.file->f_path.mnt != dst_file->f_path.mnt)
 		goto fdput;
-	ret = vfs_clone_file_range(src_file.file, off, dst_file, destoff, olen);
+	cloned = vfs_clone_file_range(src_file.file, off, dst_file, destoff,
+				      olen);
+	if (cloned < 0)
+		ret = cloned;
+	else if (olen && cloned != olen)
+		ret = -EINVAL;
+	else
+		ret = 0;
 fdput:
 	fdput(src_file);
 	return ret;
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index ae5780ce41dcf..46d691ba04bc8 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -180,8 +180,8 @@ static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t
 	return nfs42_proc_allocate(filep, offset, len);
 }
 
-static int nfs42_remap_file_range(struct file *src_file, loff_t src_off,
-		struct file *dst_file, loff_t dst_off, u64 count,
+static loff_t nfs42_remap_file_range(struct file *src_file, loff_t src_off,
+		struct file *dst_file, loff_t dst_off, loff_t count,
 		unsigned int remap_flags)
 {
 	struct inode *dst_inode = file_inode(dst_file);
@@ -244,7 +244,7 @@ static int nfs42_remap_file_range(struct file *src_file, loff_t src_off,
 		inode_unlock(src_inode);
 	}
 out:
-	return ret;
+	return ret < 0 ? ret : count;
 }
 #endif /* CONFIG_NFS_V4_2 */
 
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index b53e76391e525..ac6cb6101cbec 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -541,8 +541,12 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
 __be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst,
 		u64 dst_pos, u64 count)
 {
-	return nfserrno(vfs_clone_file_range(src, src_pos, dst, dst_pos,
-					     count));
+	loff_t cloned;
+
+	cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count);
+	if (count && cloned != count)
+		cloned = -EINVAL;
+	return nfserrno(cloned < 0 ? cloned : 0);
 }
 
 ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst,
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 9809b0e5746f5..fbaeafe44b5fa 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2527,18 +2527,18 @@ static loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int whence)
 	return offset;
 }
 
-static int ocfs2_remap_file_range(struct file *file_in,
-				  loff_t pos_in,
-				  struct file *file_out,
-				  loff_t pos_out,
-				  u64 len,
-				  unsigned int remap_flags)
+static loff_t ocfs2_remap_file_range(struct file *file_in, loff_t pos_in,
+				     struct file *file_out, loff_t pos_out,
+				     loff_t len, unsigned int remap_flags)
 {
+	int ret;
+
 	if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
 		return -EINVAL;
 
-	return ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out,
-					 len, remap_flags);
+	ret = ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out,
+					len, remap_flags);
+	return ret < 0 ? ret : len;
 }
 
 const struct inode_operations ocfs2_file_iops = {
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index df9781567ec05..6a42c04ac0ab0 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4824,7 +4824,7 @@ int ocfs2_reflink_remap_range(struct file *file_in,
 			      loff_t pos_in,
 			      struct file *file_out,
 			      loff_t pos_out,
-			      u64 len,
+			      loff_t len,
 			      unsigned int remap_flags)
 {
 	struct inode *inode_in = file_inode(file_in);
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index d2c5f526edff4..eb65c1d0843c2 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -119,7 +119,7 @@ int ocfs2_reflink_remap_range(struct file *file_in,
 			      loff_t pos_in,
 			      struct file *file_out,
 			      loff_t pos_out,
-			      u64 len,
+			      loff_t len,
 			      unsigned int remap_flags);
 
 #endif /* OCFS2_REFCOUNTTREE_H */
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 1cc797a08a5b5..8750b72355169 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -125,6 +125,7 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
 	struct file *new_file;
 	loff_t old_pos = 0;
 	loff_t new_pos = 0;
+	loff_t cloned;
 	int error = 0;
 
 	if (len == 0)
@@ -141,11 +142,10 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
 	}
 
 	/* Try to use clone_file_range to clone up within the same fs */
-	error = do_clone_file_range(old_file, 0, new_file, 0, len);
-	if (!error)
+	cloned = do_clone_file_range(old_file, 0, new_file, 0, len);
+	if (cloned == len)
 		goto out;
 	/* Couldn't clone, so now we try to copy the data */
-	error = 0;
 
 	/* FIXME: copy up sparse files efficiently */
 	while (len) {
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index fffb36fd5920a..6c3fec6168e9a 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -434,14 +434,14 @@ enum ovl_copyop {
 	OVL_DEDUPE,
 };
 
-static ssize_t ovl_copyfile(struct file *file_in, loff_t pos_in,
+static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in,
 			    struct file *file_out, loff_t pos_out,
-			    u64 len, unsigned int flags, enum ovl_copyop op)
+			    loff_t len, unsigned int flags, enum ovl_copyop op)
 {
 	struct inode *inode_out = file_inode(file_out);
 	struct fd real_in, real_out;
 	const struct cred *old_cred;
-	ssize_t ret;
+	loff_t ret;
 
 	ret = ovl_real_fdget(file_out, &real_out);
 	if (ret)
@@ -489,9 +489,9 @@ static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in,
 			    OVL_COPY);
 }
 
-static int ovl_remap_file_range(struct file *file_in, loff_t pos_in,
-				struct file *file_out, loff_t pos_out,
-				u64 len, unsigned int remap_flags)
+static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in,
+				   struct file *file_out, loff_t pos_out,
+				   loff_t len, unsigned int remap_flags)
 {
 	enum ovl_copyop op;
 
diff --git a/fs/read_write.c b/fs/read_write.c
index b61bd3fc71545..356641afa4874 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1589,10 +1589,13 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
 	 * more efficient if both clone and copy are supported (e.g. NFS).
 	 */
 	if (file_in->f_op->remap_file_range) {
-		ret = file_in->f_op->remap_file_range(file_in, pos_in,
-				file_out, pos_out, len, 0);
-		if (ret == 0) {
-			ret = len;
+		loff_t cloned;
+
+		cloned = file_in->f_op->remap_file_range(file_in, pos_in,
+				file_out, pos_out,
+				min_t(loff_t, MAX_RW_COUNT, len), 0);
+		if (cloned > 0) {
+			ret = cloned;
 			goto done;
 		}
 	}
@@ -1686,11 +1689,12 @@ SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in,
 	return ret;
 }
 
-static int remap_verify_area(struct file *file, loff_t pos, u64 len, bool write)
+static int remap_verify_area(struct file *file, loff_t pos, loff_t len,
+			     bool write)
 {
 	struct inode *inode = file_inode(file);
 
-	if (unlikely(pos < 0))
+	if (unlikely(pos < 0 || len < 0))
 		return -EINVAL;
 
 	 if (unlikely((loff_t) (pos + len) < 0))
@@ -1721,7 +1725,7 @@ static int remap_verify_area(struct file *file, loff_t pos, u64 len, bool write)
 static int generic_remap_check_len(struct inode *inode_in,
 				   struct inode *inode_out,
 				   loff_t pos_out,
-				   u64 *len,
+				   loff_t *len,
 				   unsigned int remap_flags)
 {
 	u64 blkmask = i_blocksize(inode_in) - 1;
@@ -1747,7 +1751,7 @@ static int generic_remap_check_len(struct inode *inode_in,
  */
 int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
 				  struct file *file_out, loff_t pos_out,
-				  u64 *len, unsigned int remap_flags)
+				  loff_t *len, unsigned int remap_flags)
 {
 	struct inode *inode_in = file_inode(file_in);
 	struct inode *inode_out = file_inode(file_out);
@@ -1843,12 +1847,12 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
 }
 EXPORT_SYMBOL(generic_remap_file_range_prep);
 
-int do_clone_file_range(struct file *file_in, loff_t pos_in,
-			struct file *file_out, loff_t pos_out, u64 len)
+loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
+			   struct file *file_out, loff_t pos_out, loff_t len)
 {
 	struct inode *inode_in = file_inode(file_in);
 	struct inode *inode_out = file_inode(file_out);
-	int ret;
+	loff_t ret;
 
 	if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
 		return -EISDIR;
@@ -1881,19 +1885,19 @@ int do_clone_file_range(struct file *file_in, loff_t pos_in,
 
 	ret = file_in->f_op->remap_file_range(file_in, pos_in,
 			file_out, pos_out, len, 0);
-	if (!ret) {
-		fsnotify_access(file_in);
-		fsnotify_modify(file_out);
-	}
+	if (ret < 0)
+		return ret;
 
+	fsnotify_access(file_in);
+	fsnotify_modify(file_out);
 	return ret;
 }
 EXPORT_SYMBOL(do_clone_file_range);
 
-int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
-			 struct file *file_out, loff_t pos_out, u64 len)
+loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
+			    struct file *file_out, loff_t pos_out, loff_t len)
 {
-	int ret;
+	loff_t ret;
 
 	file_start_write(file_out);
 	ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len);
@@ -1999,10 +2003,11 @@ int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
 }
 EXPORT_SYMBOL(vfs_dedupe_file_range_compare);
 
-int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
-			      struct file *dst_file, loff_t dst_pos, u64 len)
+loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
+				 struct file *dst_file, loff_t dst_pos,
+				 loff_t len)
 {
-	s64 ret;
+	loff_t ret;
 
 	ret = mnt_want_write_file(dst_file);
 	if (ret)
@@ -2051,7 +2056,7 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
 	int i;
 	int ret;
 	u16 count = same->dest_count;
-	int deduped;
+	loff_t deduped;
 
 	if (!(file->f_mode & FMODE_READ))
 		return -EINVAL;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 20314eb4677a0..38fde4e117148 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -919,20 +919,23 @@ xfs_file_fallocate(
 	return error;
 }
 
-STATIC int
+STATIC loff_t
 xfs_file_remap_range(
 	struct file	*file_in,
 	loff_t		pos_in,
 	struct file	*file_out,
 	loff_t		pos_out,
-	u64		len,
+	loff_t		len,
 	unsigned int	remap_flags)
 {
+	int		ret;
+
 	if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
 		return -EINVAL;
 
-	return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
+	ret = xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
 			len, remap_flags);
+	return ret < 0 ? ret : len;
 }
 
 STATIC int
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 2d7dd8b28d7c2..3dbe5fb7e9c06 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1296,7 +1296,7 @@ xfs_reflink_remap_prep(
 	loff_t			pos_in,
 	struct file		*file_out,
 	loff_t			pos_out,
-	u64			*len,
+	loff_t			*len,
 	unsigned int		remap_flags)
 {
 	struct inode		*inode_in = file_inode(file_in);
@@ -1387,7 +1387,7 @@ xfs_reflink_remap_range(
 	loff_t			pos_in,
 	struct file		*file_out,
 	loff_t			pos_out,
-	u64			len,
+	loff_t			len,
 	unsigned int		remap_flags)
 {
 	struct inode		*inode_in = file_inode(file_in);
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index 6f82d628bf175..c3c46c276fe18 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -28,7 +28,7 @@ extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
 		xfs_off_t count);
 extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
 extern int xfs_reflink_remap_range(struct file *file_in, loff_t pos_in,
-		struct file *file_out, loff_t pos_out, u64 len,
+		struct file *file_out, loff_t pos_out, loff_t len,
 		unsigned int remap_flags);
 extern int xfs_reflink_inode_has_shared_extents(struct xfs_trans *tp,
 		struct xfs_inode *ip, bool *has_shared);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c5435ca811329..c72d8c3c065a8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1777,9 +1777,9 @@ struct file_operations {
 #endif
 	ssize_t (*copy_file_range)(struct file *, loff_t, struct file *,
 			loff_t, size_t, unsigned int);
-	int (*remap_file_range)(struct file *file_in, loff_t pos_in,
-				struct file *file_out, loff_t pos_out,
-				u64 len, unsigned int remap_flags);
+	loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in,
+				   struct file *file_out, loff_t pos_out,
+				   loff_t len, unsigned int remap_flags);
 	int (*fadvise)(struct file *, loff_t, loff_t, int);
 } __randomize_layout;
 
@@ -1844,19 +1844,22 @@ extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
 				   loff_t, size_t, unsigned int);
 extern int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
 					 struct file *file_out, loff_t pos_out,
-					 u64 *count, unsigned int remap_flags);
-extern int do_clone_file_range(struct file *file_in, loff_t pos_in,
-			       struct file *file_out, loff_t pos_out, u64 len);
-extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
-				struct file *file_out, loff_t pos_out, u64 len);
+					 loff_t *count,
+					 unsigned int remap_flags);
+extern loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
+				  struct file *file_out, loff_t pos_out,
+				  loff_t len);
+extern loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
+				   struct file *file_out, loff_t pos_out,
+				   loff_t len);
 extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
 					 struct inode *dest, loff_t destoff,
 					 loff_t len, bool *is_same);
 extern int vfs_dedupe_file_range(struct file *file,
 				 struct file_dedupe_range *same);
-extern int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
-				     struct file *dst_file, loff_t dst_pos,
-				     u64 len);
+extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
+					struct file *dst_file, loff_t dst_pos,
+					loff_t len);
 
 
 struct super_operations {
@@ -2986,7 +2989,7 @@ extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
 extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *);
 extern int generic_remap_checks(struct file *file_in, loff_t pos_in,
 				struct file *file_out, loff_t pos_out,
-				uint64_t *count, unsigned int remap_flags);
+				loff_t *count, unsigned int remap_flags);
 extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
diff --git a/mm/filemap.c b/mm/filemap.c
index 410dc58f7b166..e9091d731f846 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2994,7 +2994,7 @@ EXPORT_SYMBOL(generic_write_checks);
  */
 int generic_remap_checks(struct file *file_in, loff_t pos_in,
 			 struct file *file_out, loff_t pos_out,
-			 uint64_t *req_count, unsigned int remap_flags)
+			 loff_t *req_count, unsigned int remap_flags)
 {
 	struct inode *inode_in = file_in->f_mapping->host;
 	struct inode *inode_out = file_out->f_mapping->host;
-- 
GitLab


From 452ce65951a2f0719e4e119ecca134c06cfe22ee Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 30 Oct 2018 10:41:56 +1100
Subject: [PATCH 0289/1890] vfs: plumb remap flags through the vfs clone
 functions

Plumb a remap_flags argument through the {do,vfs}_clone_file_range
functions so that clone can take advantage of it.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/ioctl.c             |  2 +-
 fs/nfsd/vfs.c          |  2 +-
 fs/overlayfs/copy_up.c |  2 +-
 fs/overlayfs/file.c    |  6 +++---
 fs/read_write.c        | 13 +++++++++----
 include/linux/fs.h     |  4 ++--
 6 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/fs/ioctl.c b/fs/ioctl.c
index 72537b68c272d..505275ec55969 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -232,7 +232,7 @@ static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd,
 	if (src_file.file->f_path.mnt != dst_file->f_path.mnt)
 		goto fdput;
 	cloned = vfs_clone_file_range(src_file.file, off, dst_file, destoff,
-				      olen);
+				      olen, 0);
 	if (cloned < 0)
 		ret = cloned;
 	else if (olen && cloned != olen)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index ac6cb6101cbec..726fc5b2b27a5 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -543,7 +543,7 @@ __be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst,
 {
 	loff_t cloned;
 
-	cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count);
+	cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count, 0);
 	if (count && cloned != count)
 		cloned = -EINVAL;
 	return nfserrno(cloned < 0 ? cloned : 0);
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 8750b72355169..5f82fece64a06 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -142,7 +142,7 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
 	}
 
 	/* Try to use clone_file_range to clone up within the same fs */
-	cloned = do_clone_file_range(old_file, 0, new_file, 0, len);
+	cloned = do_clone_file_range(old_file, 0, new_file, 0, len, 0);
 	if (cloned == len)
 		goto out;
 	/* Couldn't clone, so now we try to copy the data */
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 6c3fec6168e9a..0393815c89717 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -462,7 +462,7 @@ static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in,
 
 	case OVL_CLONE:
 		ret = vfs_clone_file_range(real_in.file, pos_in,
-					   real_out.file, pos_out, len);
+					   real_out.file, pos_out, len, flags);
 		break;
 
 	case OVL_DEDUPE:
@@ -512,8 +512,8 @@ static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in,
 	     !ovl_inode_upper(file_inode(file_out))))
 		return -EPERM;
 
-	return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, 0,
-			    op);
+	return ovl_copyfile(file_in, pos_in, file_out, pos_out, len,
+			    remap_flags, op);
 }
 
 const struct file_operations ovl_file_operations = {
diff --git a/fs/read_write.c b/fs/read_write.c
index 356641afa4874..0d1ac1b9bc223 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1848,12 +1848,15 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
 EXPORT_SYMBOL(generic_remap_file_range_prep);
 
 loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
-			   struct file *file_out, loff_t pos_out, loff_t len)
+			   struct file *file_out, loff_t pos_out,
+			   loff_t len, unsigned int remap_flags)
 {
 	struct inode *inode_in = file_inode(file_in);
 	struct inode *inode_out = file_inode(file_out);
 	loff_t ret;
 
+	WARN_ON_ONCE(remap_flags);
+
 	if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
 		return -EISDIR;
 	if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
@@ -1884,7 +1887,7 @@ loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
 		return ret;
 
 	ret = file_in->f_op->remap_file_range(file_in, pos_in,
-			file_out, pos_out, len, 0);
+			file_out, pos_out, len, remap_flags);
 	if (ret < 0)
 		return ret;
 
@@ -1895,12 +1898,14 @@ loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
 EXPORT_SYMBOL(do_clone_file_range);
 
 loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
-			    struct file *file_out, loff_t pos_out, loff_t len)
+			    struct file *file_out, loff_t pos_out,
+			    loff_t len, unsigned int remap_flags)
 {
 	loff_t ret;
 
 	file_start_write(file_out);
-	ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len);
+	ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len,
+				  remap_flags);
 	file_end_write(file_out);
 
 	return ret;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c72d8c3c065a8..1c5e55d2a67d3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1848,10 +1848,10 @@ extern int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
 					 unsigned int remap_flags);
 extern loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
 				  struct file *file_out, loff_t pos_out,
-				  loff_t len);
+				  loff_t len, unsigned int remap_flags);
 extern loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
 				   struct file *file_out, loff_t pos_out,
-				   loff_t len);
+				   loff_t len, unsigned int remap_flags);
 extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
 					 struct inode *dest, loff_t destoff,
 					 loff_t len, bool *is_same);
-- 
GitLab


From df3658361951e17364f1e1c3fa92862a990ad8bd Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 30 Oct 2018 10:42:03 +1100
Subject: [PATCH 0290/1890] vfs: plumb remap flags through the vfs dedupe
 functions

Plumb a remap_flags argument through the vfs_dedupe_file_range_one
functions so that dedupe can take advantage of it.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/overlayfs/file.c | 3 ++-
 fs/read_write.c     | 9 ++++++---
 include/linux/fs.h  | 2 +-
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 0393815c89717..84dd957efa24a 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -467,7 +467,8 @@ static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in,
 
 	case OVL_DEDUPE:
 		ret = vfs_dedupe_file_range_one(real_in.file, pos_in,
-						real_out.file, pos_out, len);
+						real_out.file, pos_out, len,
+						flags);
 		break;
 	}
 	revert_creds(old_cred);
diff --git a/fs/read_write.c b/fs/read_write.c
index 0d1ac1b9bc223..ea30666013b0d 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -2010,10 +2010,12 @@ EXPORT_SYMBOL(vfs_dedupe_file_range_compare);
 
 loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
 				 struct file *dst_file, loff_t dst_pos,
-				 loff_t len)
+				 loff_t len, unsigned int remap_flags)
 {
 	loff_t ret;
 
+	WARN_ON_ONCE(remap_flags & ~(REMAP_FILE_DEDUP));
+
 	ret = mnt_want_write_file(dst_file);
 	if (ret)
 		return ret;
@@ -2044,7 +2046,7 @@ loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
 	}
 
 	ret = dst_file->f_op->remap_file_range(src_file, src_pos, dst_file,
-			dst_pos, len, REMAP_FILE_DEDUP);
+			dst_pos, len, remap_flags | REMAP_FILE_DEDUP);
 out_drop_write:
 	mnt_drop_write_file(dst_file);
 
@@ -2112,7 +2114,8 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
 		}
 
 		deduped = vfs_dedupe_file_range_one(file, off, dst_file,
-						    info->dest_offset, len);
+						    info->dest_offset, len,
+						    0);
 		if (deduped == -EBADE)
 			info->status = FILE_DEDUPE_RANGE_DIFFERS;
 		else if (deduped < 0)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1c5e55d2a67d3..544ab5083b488 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1859,7 +1859,7 @@ extern int vfs_dedupe_file_range(struct file *file,
 				 struct file_dedupe_range *same);
 extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
 					struct file *dst_file, loff_t dst_pos,
-					loff_t len);
+					loff_t len, unsigned int remap_flags);
 
 
 struct super_operations {
-- 
GitLab


From eca3654e3cc7d93e9734d0fa96cfb15c7f356244 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 30 Oct 2018 10:42:10 +1100
Subject: [PATCH 0291/1890] vfs: enable remap callers that can handle short
 operations

Plumb in a remap flag that enables the filesystem remap handler to
shorten remapping requests for callers that can handle it.  Now
copy_file_range can report partial success (in case we run up against
alignment problems, resource limits, etc.).

We also enable CAN_SHORTEN for fideduperange to maintain existing
userspace-visible behavior where xfs/btrfs shorten the dedupe range to
avoid stale post-eof data exposure.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 Documentation/filesystems/vfs.txt |  4 +++-
 fs/read_write.c                   | 28 ++++++++++++++++++++--------
 include/linux/fs.h                |  5 +++--
 mm/filemap.c                      | 11 +++++++----
 4 files changed, 33 insertions(+), 15 deletions(-)

diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 1bd2919deaca0..5f71a252e2e0f 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -970,7 +970,9 @@ otherwise noted.
 	negative error code if errors occurred before any bytes were remapped.
 	The remap_flags parameter accepts REMAP_FILE_* flags.  If
 	REMAP_FILE_DEDUP is set then the implementation must only remap if the
-	requested file ranges have identical contents.
+	requested file ranges have identical contents.  If REMAP_CAN_SHORTEN is
+	set, the caller is ok with the implementation shortening the request
+	length to satisfy alignment or EOF requirements (or any other reason).
 
   fadvise: possibly called by the fadvise64() system call.
 
diff --git a/fs/read_write.c b/fs/read_write.c
index ea30666013b0d..c0bcc1a206502 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1593,7 +1593,8 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
 
 		cloned = file_in->f_op->remap_file_range(file_in, pos_in,
 				file_out, pos_out,
-				min_t(loff_t, MAX_RW_COUNT, len), 0);
+				min_t(loff_t, MAX_RW_COUNT, len),
+				REMAP_FILE_CAN_SHORTEN);
 		if (cloned > 0) {
 			ret = cloned;
 			goto done;
@@ -1721,6 +1722,8 @@ static int remap_verify_area(struct file *file, loff_t pos, loff_t len,
  * can't meaningfully compare post-EOF contents.
  *
  * For clone we only link a partial EOF block above the destination file's EOF.
+ *
+ * Shorten the request if possible.
  */
 static int generic_remap_check_len(struct inode *inode_in,
 				   struct inode *inode_out,
@@ -1729,16 +1732,24 @@ static int generic_remap_check_len(struct inode *inode_in,
 				   unsigned int remap_flags)
 {
 	u64 blkmask = i_blocksize(inode_in) - 1;
+	loff_t new_len = *len;
 
 	if ((*len & blkmask) == 0)
 		return 0;
 
-	if (remap_flags & REMAP_FILE_DEDUP)
-		*len &= ~blkmask;
-	else if (pos_out + *len < i_size_read(inode_out))
-		return -EINVAL;
+	if ((remap_flags & REMAP_FILE_DEDUP) ||
+	    pos_out + *len < i_size_read(inode_out))
+		new_len &= ~blkmask;
 
-	return 0;
+	if (new_len == *len)
+		return 0;
+
+	if (remap_flags & REMAP_FILE_CAN_SHORTEN) {
+		*len = new_len;
+		return 0;
+	}
+
+	return (remap_flags & REMAP_FILE_DEDUP) ? -EBADE : -EINVAL;
 }
 
 /*
@@ -2014,7 +2025,8 @@ loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
 {
 	loff_t ret;
 
-	WARN_ON_ONCE(remap_flags & ~(REMAP_FILE_DEDUP));
+	WARN_ON_ONCE(remap_flags & ~(REMAP_FILE_DEDUP |
+				     REMAP_FILE_CAN_SHORTEN));
 
 	ret = mnt_want_write_file(dst_file);
 	if (ret)
@@ -2115,7 +2127,7 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
 
 		deduped = vfs_dedupe_file_range_one(file, off, dst_file,
 						    info->dest_offset, len,
-						    0);
+						    REMAP_FILE_CAN_SHORTEN);
 		if (deduped == -EBADE)
 			info->status = FILE_DEDUPE_RANGE_DIFFERS;
 		else if (deduped < 0)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 544ab5083b488..34c22d6950113 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1727,8 +1727,10 @@ struct block_device_operations;
  * See Documentation/filesystems/vfs.txt for more details about this call.
  *
  * REMAP_FILE_DEDUP: only remap if contents identical (i.e. deduplicate)
+ * REMAP_FILE_CAN_SHORTEN: caller can handle a shortened request
  */
 #define REMAP_FILE_DEDUP		(1 << 0)
+#define REMAP_FILE_CAN_SHORTEN		(1 << 1)
 
 /*
  * These flags signal that the caller is ok with altering various aspects of
@@ -1736,9 +1738,8 @@ struct block_device_operations;
  * implementation; the vfs remap helper functions can take advantage of them.
  * Flags in this category exist to preserve the quirky behavior of the hoisted
  * btrfs clone/dedupe ioctls.
- * There are no flags yet, but subsequent commits will add some.
  */
-#define REMAP_FILE_ADVISORY		(0)
+#define REMAP_FILE_ADVISORY		(REMAP_FILE_CAN_SHORTEN)
 
 struct iov_iter;
 
diff --git a/mm/filemap.c b/mm/filemap.c
index e9091d731f846..1775d4ad3317c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3045,8 +3045,7 @@ int generic_remap_checks(struct file *file_in, loff_t pos_in,
 		bcount = ALIGN(size_in, bs) - pos_in;
 	} else {
 		if (!IS_ALIGNED(count, bs))
-			return -EINVAL;
-
+			count = ALIGN_DOWN(count, bs);
 		bcount = count;
 	}
 
@@ -3056,10 +3055,14 @@ int generic_remap_checks(struct file *file_in, loff_t pos_in,
 	    pos_out < pos_in + bcount)
 		return -EINVAL;
 
-	/* For now we don't support changing the length. */
-	if (*req_count != count)
+	/*
+	 * We shortened the request but the caller can't deal with that, so
+	 * bounce the request back to userspace.
+	 */
+	if (*req_count != count && !(remap_flags & REMAP_FILE_CAN_SHORTEN))
 		return -EINVAL;
 
+	*req_count = count;
 	return 0;
 }
 
-- 
GitLab


From c32e5f39953fa6bbff35c655bdcb7b3128f1e79f Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 30 Oct 2018 10:42:17 +1100
Subject: [PATCH 0292/1890] vfs: hide file range comparison function

There are no callers of vfs_dedupe_file_range_compare, so we might as
well make it a static helper and remove the export.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/read_write.c    | 187 ++++++++++++++++++++++-----------------------
 include/linux/fs.h |   3 -
 2 files changed, 91 insertions(+), 99 deletions(-)

diff --git a/fs/read_write.c b/fs/read_write.c
index c0bcc1a206502..e4d295d0d2361 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1752,6 +1752,97 @@ static int generic_remap_check_len(struct inode *inode_in,
 	return (remap_flags & REMAP_FILE_DEDUP) ? -EBADE : -EINVAL;
 }
 
+/*
+ * Read a page's worth of file data into the page cache.  Return the page
+ * locked.
+ */
+static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset)
+{
+	struct page *page;
+
+	page = read_mapping_page(inode->i_mapping, offset >> PAGE_SHIFT, NULL);
+	if (IS_ERR(page))
+		return page;
+	if (!PageUptodate(page)) {
+		put_page(page);
+		return ERR_PTR(-EIO);
+	}
+	lock_page(page);
+	return page;
+}
+
+/*
+ * Compare extents of two files to see if they are the same.
+ * Caller must have locked both inodes to prevent write races.
+ */
+static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
+					 struct inode *dest, loff_t destoff,
+					 loff_t len, bool *is_same)
+{
+	loff_t src_poff;
+	loff_t dest_poff;
+	void *src_addr;
+	void *dest_addr;
+	struct page *src_page;
+	struct page *dest_page;
+	loff_t cmp_len;
+	bool same;
+	int error;
+
+	error = -EINVAL;
+	same = true;
+	while (len) {
+		src_poff = srcoff & (PAGE_SIZE - 1);
+		dest_poff = destoff & (PAGE_SIZE - 1);
+		cmp_len = min(PAGE_SIZE - src_poff,
+			      PAGE_SIZE - dest_poff);
+		cmp_len = min(cmp_len, len);
+		if (cmp_len <= 0)
+			goto out_error;
+
+		src_page = vfs_dedupe_get_page(src, srcoff);
+		if (IS_ERR(src_page)) {
+			error = PTR_ERR(src_page);
+			goto out_error;
+		}
+		dest_page = vfs_dedupe_get_page(dest, destoff);
+		if (IS_ERR(dest_page)) {
+			error = PTR_ERR(dest_page);
+			unlock_page(src_page);
+			put_page(src_page);
+			goto out_error;
+		}
+		src_addr = kmap_atomic(src_page);
+		dest_addr = kmap_atomic(dest_page);
+
+		flush_dcache_page(src_page);
+		flush_dcache_page(dest_page);
+
+		if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len))
+			same = false;
+
+		kunmap_atomic(dest_addr);
+		kunmap_atomic(src_addr);
+		unlock_page(dest_page);
+		unlock_page(src_page);
+		put_page(dest_page);
+		put_page(src_page);
+
+		if (!same)
+			break;
+
+		srcoff += cmp_len;
+		destoff += cmp_len;
+		len -= cmp_len;
+	}
+
+	*is_same = same;
+	return 0;
+
+out_error:
+	return error;
+}
+
 /*
  * Check that the two inodes are eligible for cloning, the ranges make
  * sense, and then flush all dirty data.  Caller must ensure that the
@@ -1923,102 +2014,6 @@ loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
 }
 EXPORT_SYMBOL(vfs_clone_file_range);
 
-/*
- * Read a page's worth of file data into the page cache.  Return the page
- * locked.
- */
-static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset)
-{
-	struct address_space *mapping;
-	struct page *page;
-	pgoff_t n;
-
-	n = offset >> PAGE_SHIFT;
-	mapping = inode->i_mapping;
-	page = read_mapping_page(mapping, n, NULL);
-	if (IS_ERR(page))
-		return page;
-	if (!PageUptodate(page)) {
-		put_page(page);
-		return ERR_PTR(-EIO);
-	}
-	lock_page(page);
-	return page;
-}
-
-/*
- * Compare extents of two files to see if they are the same.
- * Caller must have locked both inodes to prevent write races.
- */
-int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
-				  struct inode *dest, loff_t destoff,
-				  loff_t len, bool *is_same)
-{
-	loff_t src_poff;
-	loff_t dest_poff;
-	void *src_addr;
-	void *dest_addr;
-	struct page *src_page;
-	struct page *dest_page;
-	loff_t cmp_len;
-	bool same;
-	int error;
-
-	error = -EINVAL;
-	same = true;
-	while (len) {
-		src_poff = srcoff & (PAGE_SIZE - 1);
-		dest_poff = destoff & (PAGE_SIZE - 1);
-		cmp_len = min(PAGE_SIZE - src_poff,
-			      PAGE_SIZE - dest_poff);
-		cmp_len = min(cmp_len, len);
-		if (cmp_len <= 0)
-			goto out_error;
-
-		src_page = vfs_dedupe_get_page(src, srcoff);
-		if (IS_ERR(src_page)) {
-			error = PTR_ERR(src_page);
-			goto out_error;
-		}
-		dest_page = vfs_dedupe_get_page(dest, destoff);
-		if (IS_ERR(dest_page)) {
-			error = PTR_ERR(dest_page);
-			unlock_page(src_page);
-			put_page(src_page);
-			goto out_error;
-		}
-		src_addr = kmap_atomic(src_page);
-		dest_addr = kmap_atomic(dest_page);
-
-		flush_dcache_page(src_page);
-		flush_dcache_page(dest_page);
-
-		if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len))
-			same = false;
-
-		kunmap_atomic(dest_addr);
-		kunmap_atomic(src_addr);
-		unlock_page(dest_page);
-		unlock_page(src_page);
-		put_page(dest_page);
-		put_page(src_page);
-
-		if (!same)
-			break;
-
-		srcoff += cmp_len;
-		destoff += cmp_len;
-		len -= cmp_len;
-	}
-
-	*is_same = same;
-	return 0;
-
-out_error:
-	return error;
-}
-EXPORT_SYMBOL(vfs_dedupe_file_range_compare);
-
 loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
 				 struct file *dst_file, loff_t dst_pos,
 				 loff_t len, unsigned int remap_flags)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 34c22d6950113..346036a84f184 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1853,9 +1853,6 @@ extern loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
 extern loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
 				   struct file *file_out, loff_t pos_out,
 				   loff_t len, unsigned int remap_flags);
-extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
-					 struct inode *dest, loff_t destoff,
-					 loff_t len, bool *is_same);
 extern int vfs_dedupe_file_range(struct file *file,
 				 struct file_dedupe_range *same);
 extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
-- 
GitLab


From 8c5c836bd6c3b9f9fc1c5a210d630b8c42f4f7df Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 30 Oct 2018 10:42:24 +1100
Subject: [PATCH 0293/1890] vfs: clean up generic_remap_file_range_prep return
 value

Since the remap prep function can update the length of the remap
request, we can change this function to return the usual return status
instead of the odd behavior it has now.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/ocfs2/refcounttree.c | 2 +-
 fs/read_write.c         | 6 +++---
 fs/xfs/xfs_reflink.c    | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 6a42c04ac0ab0..46bbd315c39f4 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4852,7 +4852,7 @@ int ocfs2_reflink_remap_range(struct file *file_in,
 
 	ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out,
 			&len, remap_flags);
-	if (ret <= 0)
+	if (ret < 0 || len == 0)
 		goto out_unlock;
 
 	/* Lock out changes to the allocation maps and remap. */
diff --git a/fs/read_write.c b/fs/read_write.c
index e4d295d0d2361..6b40a43edf18c 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1848,8 +1848,8 @@ static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
  * sense, and then flush all dirty data.  Caller must ensure that the
  * inodes have been locked against any other modifications.
  *
- * Returns: 0 for "nothing to clone", 1 for "something to clone", or
- * the usual negative error code.
+ * If there's an error, then the usual negative error code is returned.
+ * Otherwise returns 0 with *len set to the request length.
  */
 int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
 				  struct file *file_out, loff_t pos_out,
@@ -1945,7 +1945,7 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
 			return ret;
 	}
 
-	return 1;
+	return 0;
 }
 EXPORT_SYMBOL(generic_remap_file_range_prep);
 
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 3dbe5fb7e9c06..9b1ea42c81d13 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1329,7 +1329,7 @@ xfs_reflink_remap_prep(
 
 	ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out,
 			len, remap_flags);
-	if (ret <= 0)
+	if (ret < 0 || *len == 0)
 		goto out_unlock;
 
 	/*
@@ -1409,7 +1409,7 @@ xfs_reflink_remap_range(
 	/* Prepare and then clone file data. */
 	ret = xfs_reflink_remap_prep(file_in, pos_in, file_out, pos_out,
 			&len, remap_flags);
-	if (ret <= 0)
+	if (ret < 0 || len == 0)
 		return ret;
 
 	trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
-- 
GitLab


From 2587b1f1fae2d992de67e57b519f36e7c9ba70cd Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 30 Oct 2018 10:42:56 +1100
Subject: [PATCH 0294/1890] ocfs2: truncate page cache for clone destination
 file before remapping

When cloning blocks into another file, truncate the page cache before we
start remapping blocks so that concurrent reads wait for us to finish.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/ocfs2/refcounttree.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 46bbd315c39f4..2a5c96bc96774 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4861,14 +4861,12 @@ int ocfs2_reflink_remap_range(struct file *file_in,
 		down_write_nested(&OCFS2_I(inode_out)->ip_alloc_sem,
 				  SINGLE_DEPTH_NESTING);
 
-	ret = ocfs2_reflink_remap_blocks(inode_in, in_bh, pos_in, inode_out,
-					 out_bh, pos_out, len);
-
 	/* Zap any page cache for the destination file's range. */
-	if (!ret)
-		truncate_inode_pages_range(&inode_out->i_data, pos_out,
-					   PAGE_ALIGN(pos_out + len) - 1);
+	truncate_inode_pages_range(&inode_out->i_data, pos_out,
+				   PAGE_ALIGN(pos_out + len) - 1);
 
+	ret = ocfs2_reflink_remap_blocks(inode_in, in_bh, pos_in, inode_out,
+					 out_bh, pos_out, len);
 	up_write(&OCFS2_I(inode_in)->ip_alloc_sem);
 	if (!same_inode)
 		up_write(&OCFS2_I(inode_out)->ip_alloc_sem);
-- 
GitLab


From a8a94302c9754d05069ee149cf01df8aa5116aaa Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 30 Oct 2018 10:43:16 +1100
Subject: [PATCH 0295/1890] ocfs2: fix pagecache truncation prior to reflink

Prior to remapping blocks, it is necessary to remove pages from the
destination file's page cache.  Unfortunately, the truncation is not
aggressive enough -- if page size > block size, we'll end up zeroing
subpage blocks instead of removing them.  So, round the start offset
down and the end offset up to page boundaries.  We already wrote all
the dirty data so the larger range should be fine.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/ocfs2/refcounttree.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 2a5c96bc96774..7c709229e1084 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4862,8 +4862,9 @@ int ocfs2_reflink_remap_range(struct file *file_in,
 				  SINGLE_DEPTH_NESTING);
 
 	/* Zap any page cache for the destination file's range. */
-	truncate_inode_pages_range(&inode_out->i_data, pos_out,
-				   PAGE_ALIGN(pos_out + len) - 1);
+	truncate_inode_pages_range(&inode_out->i_data,
+				   round_down(pos_out, PAGE_SIZE),
+				   round_up(pos_out + len, PAGE_SIZE) - 1);
 
 	ret = ocfs2_reflink_remap_blocks(inode_in, in_bh, pos_in, inode_out,
 					 out_bh, pos_out, len);
-- 
GitLab


From 900611a1bd06ef4a79980e58babc61ef056c81ab Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 30 Oct 2018 10:44:45 +1100
Subject: [PATCH 0296/1890] ocfs2: support partial clone range and dedupe range

Change the ocfs2 remap code to allow for returning partial results.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/ocfs2/file.c         |  7 ++--
 fs/ocfs2/refcounttree.c | 72 ++++++++++++++++++++++-------------------
 fs/ocfs2/refcounttree.h | 12 +++----
 3 files changed, 46 insertions(+), 45 deletions(-)

diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index fbaeafe44b5fa..8125c5ccf821a 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2531,14 +2531,11 @@ static loff_t ocfs2_remap_file_range(struct file *file_in, loff_t pos_in,
 				     struct file *file_out, loff_t pos_out,
 				     loff_t len, unsigned int remap_flags)
 {
-	int ret;
-
 	if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
 		return -EINVAL;
 
-	ret = ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out,
-					len, remap_flags);
-	return ret < 0 ? ret : len;
+	return ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out,
+			len, remap_flags);
 }
 
 const struct inode_operations ocfs2_file_iops = {
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 7c709229e1084..c7409578657b7 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4507,14 +4507,14 @@ static int ocfs2_reflink_update_dest(struct inode *dest,
 }
 
 /* Remap the range pos_in:len in s_inode to pos_out:len in t_inode. */
-static int ocfs2_reflink_remap_extent(struct inode *s_inode,
-				      struct buffer_head *s_bh,
-				      loff_t pos_in,
-				      struct inode *t_inode,
-				      struct buffer_head *t_bh,
-				      loff_t pos_out,
-				      loff_t len,
-				      struct ocfs2_cached_dealloc_ctxt *dealloc)
+static loff_t ocfs2_reflink_remap_extent(struct inode *s_inode,
+					 struct buffer_head *s_bh,
+					 loff_t pos_in,
+					 struct inode *t_inode,
+					 struct buffer_head *t_bh,
+					 loff_t pos_out,
+					 loff_t len,
+					 struct ocfs2_cached_dealloc_ctxt *dealloc)
 {
 	struct ocfs2_extent_tree s_et;
 	struct ocfs2_extent_tree t_et;
@@ -4522,8 +4522,9 @@ static int ocfs2_reflink_remap_extent(struct inode *s_inode,
 	struct buffer_head *ref_root_bh = NULL;
 	struct ocfs2_refcount_tree *ref_tree;
 	struct ocfs2_super *osb;
+	loff_t remapped_bytes = 0;
 	loff_t pstart, plen;
-	u32 p_cluster, num_clusters, slast, spos, tpos;
+	u32 p_cluster, num_clusters, slast, spos, tpos, remapped_clus = 0;
 	unsigned int ext_flags;
 	int ret = 0;
 
@@ -4605,30 +4606,34 @@ static int ocfs2_reflink_remap_extent(struct inode *s_inode,
 next_loop:
 		spos += num_clusters;
 		tpos += num_clusters;
+		remapped_clus += num_clusters;
 	}
 
-out:
-	return ret;
+	goto out;
 out_unlock_refcount:
 	ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
 	brelse(ref_root_bh);
-	return ret;
+out:
+	remapped_bytes = ocfs2_clusters_to_bytes(t_inode->i_sb, remapped_clus);
+	remapped_bytes = min_t(loff_t, len, remapped_bytes);
+
+	return remapped_bytes > 0 ? remapped_bytes : ret;
 }
 
 /* Set up refcount tree and remap s_inode to t_inode. */
-static int ocfs2_reflink_remap_blocks(struct inode *s_inode,
-				      struct buffer_head *s_bh,
-				      loff_t pos_in,
-				      struct inode *t_inode,
-				      struct buffer_head *t_bh,
-				      loff_t pos_out,
-				      loff_t len)
+static loff_t ocfs2_reflink_remap_blocks(struct inode *s_inode,
+					 struct buffer_head *s_bh,
+					 loff_t pos_in,
+					 struct inode *t_inode,
+					 struct buffer_head *t_bh,
+					 loff_t pos_out,
+					 loff_t len)
 {
 	struct ocfs2_cached_dealloc_ctxt dealloc;
 	struct ocfs2_super *osb;
 	struct ocfs2_dinode *dis;
 	struct ocfs2_dinode *dit;
-	int ret;
+	loff_t ret;
 
 	osb = OCFS2_SB(s_inode->i_sb);
 	dis = (struct ocfs2_dinode *)s_bh->b_data;
@@ -4700,7 +4705,7 @@ static int ocfs2_reflink_remap_blocks(struct inode *s_inode,
 	/* Actually remap extents now. */
 	ret = ocfs2_reflink_remap_extent(s_inode, s_bh, pos_in, t_inode, t_bh,
 					 pos_out, len, &dealloc);
-	if (ret) {
+	if (ret < 0) {
 		mlog_errno(ret);
 		goto out;
 	}
@@ -4820,18 +4825,19 @@ static void ocfs2_reflink_inodes_unlock(struct inode *s_inode,
 }
 
 /* Link a range of blocks from one file to another. */
-int ocfs2_reflink_remap_range(struct file *file_in,
-			      loff_t pos_in,
-			      struct file *file_out,
-			      loff_t pos_out,
-			      loff_t len,
-			      unsigned int remap_flags)
+loff_t ocfs2_reflink_remap_range(struct file *file_in,
+				 loff_t pos_in,
+				 struct file *file_out,
+				 loff_t pos_out,
+				 loff_t len,
+				 unsigned int remap_flags)
 {
 	struct inode *inode_in = file_inode(file_in);
 	struct inode *inode_out = file_inode(file_out);
 	struct ocfs2_super *osb = OCFS2_SB(inode_in->i_sb);
 	struct buffer_head *in_bh = NULL, *out_bh = NULL;
 	bool same_inode = (inode_in == inode_out);
+	loff_t remapped = 0;
 	ssize_t ret;
 
 	if (!ocfs2_refcount_tree(osb))
@@ -4866,12 +4872,13 @@ int ocfs2_reflink_remap_range(struct file *file_in,
 				   round_down(pos_out, PAGE_SIZE),
 				   round_up(pos_out + len, PAGE_SIZE) - 1);
 
-	ret = ocfs2_reflink_remap_blocks(inode_in, in_bh, pos_in, inode_out,
-					 out_bh, pos_out, len);
+	remapped = ocfs2_reflink_remap_blocks(inode_in, in_bh, pos_in,
+			inode_out, out_bh, pos_out, len);
 	up_write(&OCFS2_I(inode_in)->ip_alloc_sem);
 	if (!same_inode)
 		up_write(&OCFS2_I(inode_out)->ip_alloc_sem);
-	if (ret) {
+	if (remapped < 0) {
+		ret = remapped;
 		mlog_errno(ret);
 		goto out_unlock;
 	}
@@ -4889,10 +4896,7 @@ int ocfs2_reflink_remap_range(struct file *file_in,
 		goto out_unlock;
 	}
 
-	ocfs2_reflink_inodes_unlock(inode_in, in_bh, inode_out, out_bh);
-	return 0;
-
 out_unlock:
 	ocfs2_reflink_inodes_unlock(inode_in, in_bh, inode_out, out_bh);
-	return ret;
+	return remapped > 0 ? remapped : ret;
 }
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index eb65c1d0843c2..9e64daba395de 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -115,11 +115,11 @@ int ocfs2_reflink_ioctl(struct inode *inode,
 			const char __user *oldname,
 			const char __user *newname,
 			bool preserve);
-int ocfs2_reflink_remap_range(struct file *file_in,
-			      loff_t pos_in,
-			      struct file *file_out,
-			      loff_t pos_out,
-			      loff_t len,
-			      unsigned int remap_flags);
+loff_t ocfs2_reflink_remap_range(struct file *file_in,
+				 loff_t pos_in,
+				 struct file *file_out,
+				 loff_t pos_out,
+				 loff_t len,
+				 unsigned int remap_flags);
 
 #endif /* OCFS2_REFCOUNTTREE_H */
-- 
GitLab


From 65f098e91ffbb64d7ca2ed93b6ab2428a2e34452 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 30 Oct 2018 10:45:48 +1100
Subject: [PATCH 0297/1890] ocfs2: remove ocfs2_reflink_remap_range

Since ocfs2_remap_file_range is a thin shell around
ocfs2_remap_remap_range, move everything from the latter into the
former.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/ocfs2/file.c         |  68 +++++++++++++++++++++++-
 fs/ocfs2/refcounttree.c | 113 +++++++---------------------------------
 fs/ocfs2/refcounttree.h |  24 ++++++---
 3 files changed, 102 insertions(+), 103 deletions(-)

diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 8125c5ccf821a..fe570824b9913 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2531,11 +2531,75 @@ static loff_t ocfs2_remap_file_range(struct file *file_in, loff_t pos_in,
 				     struct file *file_out, loff_t pos_out,
 				     loff_t len, unsigned int remap_flags)
 {
+	struct inode *inode_in = file_inode(file_in);
+	struct inode *inode_out = file_inode(file_out);
+	struct ocfs2_super *osb = OCFS2_SB(inode_in->i_sb);
+	struct buffer_head *in_bh = NULL, *out_bh = NULL;
+	bool same_inode = (inode_in == inode_out);
+	loff_t remapped = 0;
+	ssize_t ret;
+
 	if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
 		return -EINVAL;
+	if (!ocfs2_refcount_tree(osb))
+		return -EOPNOTSUPP;
+	if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
+		return -EROFS;
 
-	return ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out,
-			len, remap_flags);
+	/* Lock both files against IO */
+	ret = ocfs2_reflink_inodes_lock(inode_in, &in_bh, inode_out, &out_bh);
+	if (ret)
+		return ret;
+
+	/* Check file eligibility and prepare for block sharing. */
+	ret = -EINVAL;
+	if ((OCFS2_I(inode_in)->ip_flags & OCFS2_INODE_SYSTEM_FILE) ||
+	    (OCFS2_I(inode_out)->ip_flags & OCFS2_INODE_SYSTEM_FILE))
+		goto out_unlock;
+
+	ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out,
+			&len, remap_flags);
+	if (ret < 0 || len == 0)
+		goto out_unlock;
+
+	/* Lock out changes to the allocation maps and remap. */
+	down_write(&OCFS2_I(inode_in)->ip_alloc_sem);
+	if (!same_inode)
+		down_write_nested(&OCFS2_I(inode_out)->ip_alloc_sem,
+				  SINGLE_DEPTH_NESTING);
+
+	/* Zap any page cache for the destination file's range. */
+	truncate_inode_pages_range(&inode_out->i_data,
+				   round_down(pos_out, PAGE_SIZE),
+				   round_up(pos_out + len, PAGE_SIZE) - 1);
+
+	remapped = ocfs2_reflink_remap_blocks(inode_in, in_bh, pos_in,
+			inode_out, out_bh, pos_out, len);
+	up_write(&OCFS2_I(inode_in)->ip_alloc_sem);
+	if (!same_inode)
+		up_write(&OCFS2_I(inode_out)->ip_alloc_sem);
+	if (remapped < 0) {
+		ret = remapped;
+		mlog_errno(ret);
+		goto out_unlock;
+	}
+
+	/*
+	 * Empty the extent map so that we may get the right extent
+	 * record from the disk.
+	 */
+	ocfs2_extent_map_trunc(inode_in, 0);
+	ocfs2_extent_map_trunc(inode_out, 0);
+
+	ret = ocfs2_reflink_update_dest(inode_out, out_bh, pos_out + len);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_unlock;
+	}
+
+out_unlock:
+	ocfs2_reflink_inodes_unlock(inode_in, in_bh, inode_out, out_bh);
+	return remapped > 0 ? remapped : ret;
 }
 
 const struct inode_operations ocfs2_file_iops = {
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index c7409578657b7..dc66b80585ec8 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4468,9 +4468,9 @@ int ocfs2_reflink_ioctl(struct inode *inode,
 }
 
 /* Update destination inode size, if necessary. */
-static int ocfs2_reflink_update_dest(struct inode *dest,
-				     struct buffer_head *d_bh,
-				     loff_t newlen)
+int ocfs2_reflink_update_dest(struct inode *dest,
+			      struct buffer_head *d_bh,
+			      loff_t newlen)
 {
 	handle_t *handle;
 	int ret;
@@ -4621,13 +4621,13 @@ static loff_t ocfs2_reflink_remap_extent(struct inode *s_inode,
 }
 
 /* Set up refcount tree and remap s_inode to t_inode. */
-static loff_t ocfs2_reflink_remap_blocks(struct inode *s_inode,
-					 struct buffer_head *s_bh,
-					 loff_t pos_in,
-					 struct inode *t_inode,
-					 struct buffer_head *t_bh,
-					 loff_t pos_out,
-					 loff_t len)
+loff_t ocfs2_reflink_remap_blocks(struct inode *s_inode,
+				  struct buffer_head *s_bh,
+				  loff_t pos_in,
+				  struct inode *t_inode,
+				  struct buffer_head *t_bh,
+				  loff_t pos_out,
+				  loff_t len)
 {
 	struct ocfs2_cached_dealloc_ctxt dealloc;
 	struct ocfs2_super *osb;
@@ -4720,10 +4720,10 @@ static loff_t ocfs2_reflink_remap_blocks(struct inode *s_inode,
 }
 
 /* Lock an inode and grab a bh pointing to the inode. */
-static int ocfs2_reflink_inodes_lock(struct inode *s_inode,
-				     struct buffer_head **bh1,
-				     struct inode *t_inode,
-				     struct buffer_head **bh2)
+int ocfs2_reflink_inodes_lock(struct inode *s_inode,
+			      struct buffer_head **bh1,
+			      struct inode *t_inode,
+			      struct buffer_head **bh2)
 {
 	struct inode *inode1;
 	struct inode *inode2;
@@ -4808,10 +4808,10 @@ static int ocfs2_reflink_inodes_lock(struct inode *s_inode,
 }
 
 /* Unlock both inodes and release buffers. */
-static void ocfs2_reflink_inodes_unlock(struct inode *s_inode,
-					struct buffer_head *s_bh,
-					struct inode *t_inode,
-					struct buffer_head *t_bh)
+void ocfs2_reflink_inodes_unlock(struct inode *s_inode,
+				 struct buffer_head *s_bh,
+				 struct inode *t_inode,
+				 struct buffer_head *t_bh)
 {
 	ocfs2_inode_unlock(s_inode, 1);
 	ocfs2_rw_unlock(s_inode, 1);
@@ -4823,80 +4823,3 @@ static void ocfs2_reflink_inodes_unlock(struct inode *s_inode,
 	}
 	unlock_two_nondirectories(s_inode, t_inode);
 }
-
-/* Link a range of blocks from one file to another. */
-loff_t ocfs2_reflink_remap_range(struct file *file_in,
-				 loff_t pos_in,
-				 struct file *file_out,
-				 loff_t pos_out,
-				 loff_t len,
-				 unsigned int remap_flags)
-{
-	struct inode *inode_in = file_inode(file_in);
-	struct inode *inode_out = file_inode(file_out);
-	struct ocfs2_super *osb = OCFS2_SB(inode_in->i_sb);
-	struct buffer_head *in_bh = NULL, *out_bh = NULL;
-	bool same_inode = (inode_in == inode_out);
-	loff_t remapped = 0;
-	ssize_t ret;
-
-	if (!ocfs2_refcount_tree(osb))
-		return -EOPNOTSUPP;
-	if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
-		return -EROFS;
-
-	/* Lock both files against IO */
-	ret = ocfs2_reflink_inodes_lock(inode_in, &in_bh, inode_out, &out_bh);
-	if (ret)
-		return ret;
-
-	/* Check file eligibility and prepare for block sharing. */
-	ret = -EINVAL;
-	if ((OCFS2_I(inode_in)->ip_flags & OCFS2_INODE_SYSTEM_FILE) ||
-	    (OCFS2_I(inode_out)->ip_flags & OCFS2_INODE_SYSTEM_FILE))
-		goto out_unlock;
-
-	ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out,
-			&len, remap_flags);
-	if (ret < 0 || len == 0)
-		goto out_unlock;
-
-	/* Lock out changes to the allocation maps and remap. */
-	down_write(&OCFS2_I(inode_in)->ip_alloc_sem);
-	if (!same_inode)
-		down_write_nested(&OCFS2_I(inode_out)->ip_alloc_sem,
-				  SINGLE_DEPTH_NESTING);
-
-	/* Zap any page cache for the destination file's range. */
-	truncate_inode_pages_range(&inode_out->i_data,
-				   round_down(pos_out, PAGE_SIZE),
-				   round_up(pos_out + len, PAGE_SIZE) - 1);
-
-	remapped = ocfs2_reflink_remap_blocks(inode_in, in_bh, pos_in,
-			inode_out, out_bh, pos_out, len);
-	up_write(&OCFS2_I(inode_in)->ip_alloc_sem);
-	if (!same_inode)
-		up_write(&OCFS2_I(inode_out)->ip_alloc_sem);
-	if (remapped < 0) {
-		ret = remapped;
-		mlog_errno(ret);
-		goto out_unlock;
-	}
-
-	/*
-	 * Empty the extent map so that we may get the right extent
-	 * record from the disk.
-	 */
-	ocfs2_extent_map_trunc(inode_in, 0);
-	ocfs2_extent_map_trunc(inode_out, 0);
-
-	ret = ocfs2_reflink_update_dest(inode_out, out_bh, pos_out + len);
-	if (ret) {
-		mlog_errno(ret);
-		goto out_unlock;
-	}
-
-out_unlock:
-	ocfs2_reflink_inodes_unlock(inode_in, in_bh, inode_out, out_bh);
-	return remapped > 0 ? remapped : ret;
-}
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index 9e64daba395de..e9e862be4a1e5 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -115,11 +115,23 @@ int ocfs2_reflink_ioctl(struct inode *inode,
 			const char __user *oldname,
 			const char __user *newname,
 			bool preserve);
-loff_t ocfs2_reflink_remap_range(struct file *file_in,
-				 loff_t pos_in,
-				 struct file *file_out,
-				 loff_t pos_out,
-				 loff_t len,
-				 unsigned int remap_flags);
+loff_t ocfs2_reflink_remap_blocks(struct inode *s_inode,
+				  struct buffer_head *s_bh,
+				  loff_t pos_in,
+				  struct inode *t_inode,
+				  struct buffer_head *t_bh,
+				  loff_t pos_out,
+				  loff_t len);
+int ocfs2_reflink_inodes_lock(struct inode *s_inode,
+			      struct buffer_head **bh1,
+			      struct inode *t_inode,
+			      struct buffer_head **bh2);
+void ocfs2_reflink_inodes_unlock(struct inode *s_inode,
+				 struct buffer_head *s_bh,
+				 struct inode *t_inode,
+				 struct buffer_head *t_bh);
+int ocfs2_reflink_update_dest(struct inode *dest,
+			      struct buffer_head *d_bh,
+			      loff_t newlen);
 
 #endif /* OCFS2_REFCOUNTTREE_H */
-- 
GitLab


From 4918ef4ea008cd2ff47eb852894e3f9b9047f4f3 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 30 Oct 2018 10:46:33 +1100
Subject: [PATCH 0298/1890] xfs: fix pagecache truncation prior to reflink

Prior to remapping blocks, it is necessary to remove pages from the
destination file's page cache.  Unfortunately, the truncation is not
aggressive enough -- if page size > block size, we'll end up zeroing
subpage blocks instead of removing them.  So, round the start offset
down and the end offset up to page boundaries.  We already wrote all
the dirty data so the larger range shouldn't be a problem.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/xfs_reflink.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 9b1ea42c81d13..e8e86646bb4b1 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1369,8 +1369,9 @@ xfs_reflink_remap_prep(
 		goto out_unlock;
 
 	/* Zap any page cache for the destination file's range. */
-	truncate_inode_pages_range(&inode_out->i_data, pos_out,
-				   PAGE_ALIGN(pos_out + *len) - 1);
+	truncate_inode_pages_range(&inode_out->i_data,
+			round_down(pos_out, PAGE_SIZE),
+			round_up(pos_out + *len, PAGE_SIZE) - 1);
 
 	return 1;
 out_unlock:
-- 
GitLab


From 9f04aaffddb3e487f3eda1945f1a9531d6cc7628 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 30 Oct 2018 10:46:50 +1100
Subject: [PATCH 0299/1890] xfs: clean up xfs_reflink_remap_blocks call site

Move the offset <-> blocks unit conversions into
xfs_reflink_remap_blocks to make the call site less ugly.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/xfs_reflink.c | 37 ++++++++++++++++++-------------------
 1 file changed, 18 insertions(+), 19 deletions(-)

diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index e8e86646bb4b1..79dec457f7fb3 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1119,16 +1119,23 @@ xfs_reflink_remap_extent(
 STATIC int
 xfs_reflink_remap_blocks(
 	struct xfs_inode	*src,
-	xfs_fileoff_t		srcoff,
+	loff_t			pos_in,
 	struct xfs_inode	*dest,
-	xfs_fileoff_t		destoff,
-	xfs_filblks_t		len,
-	xfs_off_t		new_isize)
+	loff_t			pos_out,
+	loff_t			remap_len)
 {
 	struct xfs_bmbt_irec	imap;
+	xfs_fileoff_t		srcoff;
+	xfs_fileoff_t		destoff;
+	xfs_filblks_t		len;
+	xfs_filblks_t		range_len;
+	xfs_off_t		new_isize = pos_out + remap_len;
 	int			nimaps;
 	int			error = 0;
-	xfs_filblks_t		range_len;
+
+	destoff = XFS_B_TO_FSBT(src->i_mount, pos_out);
+	srcoff = XFS_B_TO_FSBT(src->i_mount, pos_in);
+	len = XFS_B_TO_FSB(src->i_mount, remap_len);
 
 	/* drange = (destoff, destoff + len); srange = (srcoff, srcoff + len) */
 	while (len) {
@@ -1143,7 +1150,7 @@ xfs_reflink_remap_blocks(
 		error = xfs_bmapi_read(src, srcoff, len, &imap, &nimaps, 0);
 		xfs_iunlock(src, lock_mode);
 		if (error)
-			goto err;
+			break;
 		ASSERT(nimaps == 1);
 
 		trace_xfs_reflink_remap_imap(src, srcoff, len, XFS_IO_OVERWRITE,
@@ -1157,11 +1164,11 @@ xfs_reflink_remap_blocks(
 		error = xfs_reflink_remap_extent(dest, &imap, destoff,
 				new_isize);
 		if (error)
-			goto err;
+			break;
 
 		if (fatal_signal_pending(current)) {
 			error = -EINTR;
-			goto err;
+			break;
 		}
 
 		/* Advance drange/srange */
@@ -1170,10 +1177,8 @@ xfs_reflink_remap_blocks(
 		len -= range_len;
 	}
 
-	return 0;
-
-err:
-	trace_xfs_reflink_remap_blocks_error(dest, error, _RET_IP_);
+	if (error)
+		trace_xfs_reflink_remap_blocks_error(dest, error, _RET_IP_);
 	return error;
 }
 
@@ -1396,8 +1401,6 @@ xfs_reflink_remap_range(
 	struct inode		*inode_out = file_inode(file_out);
 	struct xfs_inode	*dest = XFS_I(inode_out);
 	struct xfs_mount	*mp = src->i_mount;
-	xfs_fileoff_t		sfsbno, dfsbno;
-	xfs_filblks_t		fsblen;
 	xfs_extlen_t		cowextsize;
 	ssize_t			ret;
 
@@ -1415,11 +1418,7 @@ xfs_reflink_remap_range(
 
 	trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
 
-	dfsbno = XFS_B_TO_FSBT(mp, pos_out);
-	sfsbno = XFS_B_TO_FSBT(mp, pos_in);
-	fsblen = XFS_B_TO_FSB(mp, len);
-	ret = xfs_reflink_remap_blocks(src, sfsbno, dest, dfsbno, fsblen,
-			pos_out + len);
+	ret = xfs_reflink_remap_blocks(src, pos_in, dest, pos_out, len);
 	if (ret)
 		goto out_unlock;
 
-- 
GitLab


From 3f68c1f562f1e4c5e1a515b392a2e0a509a342d5 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 30 Oct 2018 10:47:06 +1100
Subject: [PATCH 0300/1890] xfs: support returning partial reflink results

Back when the XFS reflink code only supported clone_file_range, we were
only able to return zero or negative error codes to userspace.  However,
now that copy_file_range (which returns bytes copied) can use XFS'
clone_file_range, we have the opportunity to return partial results.
For example, if userspace sends a 1GB clone request and we run out of
space halfway through, we at least can tell userspace that we completed
512M of that request like a regular write.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/xfs_file.c    |  5 +----
 fs/xfs/xfs_reflink.c | 17 ++++++++++++-----
 fs/xfs/xfs_reflink.h |  2 +-
 3 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 38fde4e117148..7d42ab8fe6e1e 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -928,14 +928,11 @@ xfs_file_remap_range(
 	loff_t		len,
 	unsigned int	remap_flags)
 {
-	int		ret;
-
 	if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
 		return -EINVAL;
 
-	ret = xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
+	return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
 			len, remap_flags);
-	return ret < 0 ? ret : len;
 }
 
 STATIC int
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 79dec457f7fb3..4abb2aea8f314 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1122,13 +1122,15 @@ xfs_reflink_remap_blocks(
 	loff_t			pos_in,
 	struct xfs_inode	*dest,
 	loff_t			pos_out,
-	loff_t			remap_len)
+	loff_t			remap_len,
+	loff_t			*remapped)
 {
 	struct xfs_bmbt_irec	imap;
 	xfs_fileoff_t		srcoff;
 	xfs_fileoff_t		destoff;
 	xfs_filblks_t		len;
 	xfs_filblks_t		range_len;
+	xfs_filblks_t		remapped_len = 0;
 	xfs_off_t		new_isize = pos_out + remap_len;
 	int			nimaps;
 	int			error = 0;
@@ -1175,10 +1177,13 @@ xfs_reflink_remap_blocks(
 		srcoff += range_len;
 		destoff += range_len;
 		len -= range_len;
+		remapped_len += range_len;
 	}
 
 	if (error)
 		trace_xfs_reflink_remap_blocks_error(dest, error, _RET_IP_);
+	*remapped = min_t(loff_t, remap_len,
+			  XFS_FSB_TO_B(src->i_mount, remapped_len));
 	return error;
 }
 
@@ -1387,7 +1392,7 @@ xfs_reflink_remap_prep(
 /*
  * Link a range of blocks from one file to another.
  */
-int
+loff_t
 xfs_reflink_remap_range(
 	struct file		*file_in,
 	loff_t			pos_in,
@@ -1401,8 +1406,9 @@ xfs_reflink_remap_range(
 	struct inode		*inode_out = file_inode(file_out);
 	struct xfs_inode	*dest = XFS_I(inode_out);
 	struct xfs_mount	*mp = src->i_mount;
+	loff_t			remapped = 0;
 	xfs_extlen_t		cowextsize;
-	ssize_t			ret;
+	int			ret;
 
 	if (!xfs_sb_version_hasreflink(&mp->m_sb))
 		return -EOPNOTSUPP;
@@ -1418,7 +1424,8 @@ xfs_reflink_remap_range(
 
 	trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
 
-	ret = xfs_reflink_remap_blocks(src, pos_in, dest, pos_out, len);
+	ret = xfs_reflink_remap_blocks(src, pos_in, dest, pos_out, len,
+			&remapped);
 	if (ret)
 		goto out_unlock;
 
@@ -1441,7 +1448,7 @@ xfs_reflink_remap_range(
 	xfs_reflink_remap_unlock(file_in, file_out);
 	if (ret)
 		trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
-	return ret;
+	return remapped > 0 ? remapped : ret;
 }
 
 /*
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index c3c46c276fe18..cbc26ff79a8f3 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -27,7 +27,7 @@ extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset,
 extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
 		xfs_off_t count);
 extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
-extern int xfs_reflink_remap_range(struct file *file_in, loff_t pos_in,
+extern loff_t xfs_reflink_remap_range(struct file *file_in, loff_t pos_in,
 		struct file *file_out, loff_t pos_out, loff_t len,
 		unsigned int remap_flags);
 extern int xfs_reflink_inode_has_shared_extents(struct xfs_trans *tp,
-- 
GitLab


From 7a6ccf004e234c01fb2a11771de9837c9ff3d56d Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 30 Oct 2018 10:47:16 +1100
Subject: [PATCH 0301/1890] xfs: remove redundant remap partial EOF block
 checks

Now that we've moved the partial EOF block checks to the VFS helpers, we
can remove the redundant functionality from XFS.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/xfs_reflink.c | 19 -------------------
 1 file changed, 19 deletions(-)

diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 4abb2aea8f314..bccc66316cc48 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1314,7 +1314,6 @@ xfs_reflink_remap_prep(
 	struct inode		*inode_out = file_inode(file_out);
 	struct xfs_inode	*dest = XFS_I(inode_out);
 	bool			same_inode = (inode_in == inode_out);
-	u64			blkmask = i_blocksize(inode_in) - 1;
 	ssize_t			ret;
 
 	/* Lock both files against IO */
@@ -1342,24 +1341,6 @@ xfs_reflink_remap_prep(
 	if (ret < 0 || *len == 0)
 		goto out_unlock;
 
-	/*
-	 * If the dedupe data matches, chop off the partial EOF block
-	 * from the source file so we don't try to dedupe the partial
-	 * EOF block.
-	 */
-	if (remap_flags & REMAP_FILE_DEDUP) {
-		*len &= ~blkmask;
-	} else if (*len & blkmask) {
-		/*
-		 * The user is attempting to share a partial EOF block,
-		 * if it's inside the destination EOF then reject it.
-		 */
-		if (pos_out + *len < i_size_read(inode_out)) {
-			ret = -EINVAL;
-			goto out_unlock;
-		}
-	}
-
 	/* Attach dquots to dest inode before changing block map */
 	ret = xfs_qm_dqattach(dest);
 	if (ret)
-- 
GitLab


From 3fc9f5e409319e994d113cf1327ba6ab147423c2 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 30 Oct 2018 10:47:26 +1100
Subject: [PATCH 0302/1890] xfs: remove xfs_reflink_remap_range

Since xfs_file_remap_range is a thin wrapper, move the contents of
xfs_reflink_remap_range into the shell.  This cuts down on the vfs
calls being made from internal xfs code.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/xfs_file.c    | 65 ++++++++++++++++++++++++++++++++++------
 fs/xfs/xfs_reflink.c | 70 +++-----------------------------------------
 fs/xfs/xfs_reflink.h | 10 +++++++
 3 files changed, 70 insertions(+), 75 deletions(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 7d42ab8fe6e1e..53c9ab8fb777f 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -919,20 +919,67 @@ xfs_file_fallocate(
 	return error;
 }
 
-STATIC loff_t
+
+loff_t
 xfs_file_remap_range(
-	struct file	*file_in,
-	loff_t		pos_in,
-	struct file	*file_out,
-	loff_t		pos_out,
-	loff_t		len,
-	unsigned int	remap_flags)
+	struct file		*file_in,
+	loff_t			pos_in,
+	struct file		*file_out,
+	loff_t			pos_out,
+	loff_t			len,
+	unsigned int		remap_flags)
 {
+	struct inode		*inode_in = file_inode(file_in);
+	struct xfs_inode	*src = XFS_I(inode_in);
+	struct inode		*inode_out = file_inode(file_out);
+	struct xfs_inode	*dest = XFS_I(inode_out);
+	struct xfs_mount	*mp = src->i_mount;
+	loff_t			remapped = 0;
+	xfs_extlen_t		cowextsize;
+	int			ret;
+
 	if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
 		return -EINVAL;
 
-	return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
-			len, remap_flags);
+	if (!xfs_sb_version_hasreflink(&mp->m_sb))
+		return -EOPNOTSUPP;
+
+	if (XFS_FORCED_SHUTDOWN(mp))
+		return -EIO;
+
+	/* Prepare and then clone file data. */
+	ret = xfs_reflink_remap_prep(file_in, pos_in, file_out, pos_out,
+			&len, remap_flags);
+	if (ret < 0 || len == 0)
+		return ret;
+
+	trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
+
+	ret = xfs_reflink_remap_blocks(src, pos_in, dest, pos_out, len,
+			&remapped);
+	if (ret)
+		goto out_unlock;
+
+	/*
+	 * Carry the cowextsize hint from src to dest if we're sharing the
+	 * entire source file to the entire destination file, the source file
+	 * has a cowextsize hint, and the destination file does not.
+	 */
+	cowextsize = 0;
+	if (pos_in == 0 && len == i_size_read(inode_in) &&
+	    (src->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) &&
+	    pos_out == 0 && len >= i_size_read(inode_out) &&
+	    !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE))
+		cowextsize = src->i_d.di_cowextsize;
+
+	ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize,
+			remap_flags);
+
+out_unlock:
+	xfs_reflink_remap_unlock(file_in, file_out);
+	if (ret)
+		trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
+	return remapped > 0 ? remapped : ret;
 }
 
 STATIC int
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index bccc66316cc48..84f372f7ea044 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -916,7 +916,7 @@ xfs_reflink_set_inode_flag(
 /*
  * Update destination inode size & cowextsize hint, if necessary.
  */
-STATIC int
+int
 xfs_reflink_update_dest(
 	struct xfs_inode	*dest,
 	xfs_off_t		newlen,
@@ -1116,7 +1116,7 @@ xfs_reflink_remap_extent(
 /*
  * Iteratively remap one file's extents (and holes) to another's.
  */
-STATIC int
+int
 xfs_reflink_remap_blocks(
 	struct xfs_inode	*src,
 	loff_t			pos_in,
@@ -1232,7 +1232,7 @@ xfs_iolock_two_inodes_and_break_layout(
 }
 
 /* Unlock both inodes after they've been prepped for a range clone. */
-STATIC void
+void
 xfs_reflink_remap_unlock(
 	struct file		*file_in,
 	struct file		*file_out)
@@ -1300,7 +1300,7 @@ xfs_reflink_zero_posteof(
  * stale data in the destination file. Hence we reject these clone attempts with
  * -EINVAL in this case.
  */
-STATIC int
+int
 xfs_reflink_remap_prep(
 	struct file		*file_in,
 	loff_t			pos_in,
@@ -1370,68 +1370,6 @@ xfs_reflink_remap_prep(
 	return ret;
 }
 
-/*
- * Link a range of blocks from one file to another.
- */
-loff_t
-xfs_reflink_remap_range(
-	struct file		*file_in,
-	loff_t			pos_in,
-	struct file		*file_out,
-	loff_t			pos_out,
-	loff_t			len,
-	unsigned int		remap_flags)
-{
-	struct inode		*inode_in = file_inode(file_in);
-	struct xfs_inode	*src = XFS_I(inode_in);
-	struct inode		*inode_out = file_inode(file_out);
-	struct xfs_inode	*dest = XFS_I(inode_out);
-	struct xfs_mount	*mp = src->i_mount;
-	loff_t			remapped = 0;
-	xfs_extlen_t		cowextsize;
-	int			ret;
-
-	if (!xfs_sb_version_hasreflink(&mp->m_sb))
-		return -EOPNOTSUPP;
-
-	if (XFS_FORCED_SHUTDOWN(mp))
-		return -EIO;
-
-	/* Prepare and then clone file data. */
-	ret = xfs_reflink_remap_prep(file_in, pos_in, file_out, pos_out,
-			&len, remap_flags);
-	if (ret < 0 || len == 0)
-		return ret;
-
-	trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
-
-	ret = xfs_reflink_remap_blocks(src, pos_in, dest, pos_out, len,
-			&remapped);
-	if (ret)
-		goto out_unlock;
-
-	/*
-	 * Carry the cowextsize hint from src to dest if we're sharing the
-	 * entire source file to the entire destination file, the source file
-	 * has a cowextsize hint, and the destination file does not.
-	 */
-	cowextsize = 0;
-	if (pos_in == 0 && len == i_size_read(inode_in) &&
-	    (src->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) &&
-	    pos_out == 0 && len >= i_size_read(inode_out) &&
-	    !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE))
-		cowextsize = src->i_d.di_cowextsize;
-
-	ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize,
-			remap_flags);
-
-out_unlock:
-	xfs_reflink_remap_unlock(file_in, file_out);
-	if (ret)
-		trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
-	return remapped > 0 ? remapped : ret;
-}
-
 /*
  * The user wants to preemptively CoW all shared blocks in this file,
  * which enables us to turn off the reflink flag.  Iterate all
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index cbc26ff79a8f3..28a84edda8899 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -36,5 +36,15 @@ extern int xfs_reflink_clear_inode_flag(struct xfs_inode *ip,
 		struct xfs_trans **tpp);
 extern int xfs_reflink_unshare(struct xfs_inode *ip, xfs_off_t offset,
 		xfs_off_t len);
+extern int xfs_reflink_remap_prep(struct file *file_in, loff_t pos_in,
+		struct file *file_out, loff_t pos_out, loff_t *len,
+		unsigned int remap_flags);
+extern int xfs_reflink_remap_blocks(struct xfs_inode *src, loff_t pos_in,
+		struct xfs_inode *dest, loff_t pos_out, loff_t remap_len,
+		loff_t *remapped);
+extern int xfs_reflink_update_dest(struct xfs_inode *dest, xfs_off_t newlen,
+		xfs_extlen_t cowextsize, unsigned int remap_flags);
+extern void xfs_reflink_remap_unlock(struct file *file_in,
+		struct file *file_out);
 
 #endif /* __XFS_REFLINK_H */
-- 
GitLab


From bf4a1fcf0bc18d52cf0fce6571d6f327ab5eaf22 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 30 Oct 2018 10:47:48 +1100
Subject: [PATCH 0303/1890] xfs: remove [cm]time update from reflink calls

Now that the vfs remap helper dirties the inode [cm]time for us, xfs no
longer needs to do that on its own.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/xfs/xfs_reflink.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 84f372f7ea044..e72218477bf2d 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -927,8 +927,7 @@ xfs_reflink_update_dest(
 	struct xfs_trans	*tp;
 	int			error;
 
-	if ((remap_flags & REMAP_FILE_DEDUP) &&
-	    newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0)
+	if (newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0)
 		return 0;
 
 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
@@ -949,10 +948,6 @@ xfs_reflink_update_dest(
 		dest->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE;
 	}
 
-	if (!(remap_flags & REMAP_FILE_DEDUP)) {
-		xfs_trans_ichgtime(tp, dest,
-				   XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
-	}
 	xfs_trans_log_inode(tp, dest, XFS_ILOG_CORE);
 
 	error = xfs_trans_commit(tp);
-- 
GitLab


From 4119ba211bc4f1bf638f41e50b7a0f329f58aa16 Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Mon, 29 Oct 2018 18:30:13 -0700
Subject: [PATCH 0304/1890] xtensa: add NOTES section to the linker script

This section collects all source .note.* sections together in the
vmlinux image. Without it .note.Linux section may be placed at address
0, while the rest of the kernel is at its normal address, resulting in a
huge vmlinux.bin image that may not be linked into the xtensa Image.elf.

Cc: stable@vger.kernel.org
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/boot/Makefile        | 2 +-
 arch/xtensa/kernel/vmlinux.lds.S | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/xtensa/boot/Makefile b/arch/xtensa/boot/Makefile
index dc9e0ba7122ca..294846117fc2c 100644
--- a/arch/xtensa/boot/Makefile
+++ b/arch/xtensa/boot/Makefile
@@ -33,7 +33,7 @@ uImage: $(obj)/uImage
 boot-elf boot-redboot: $(addprefix $(obj)/,$(subdir-y))
 	$(Q)$(MAKE) $(build)=$(obj)/$@ $(MAKECMDGOALS)
 
-OBJCOPYFLAGS = --strip-all -R .comment -R .note.gnu.build-id -O binary
+OBJCOPYFLAGS = --strip-all -R .comment -R .notes -O binary
 
 vmlinux.bin: vmlinux FORCE
 	$(call if_changed,objcopy)
diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S
index a1c3edb8ad56c..fa926995d2a37 100644
--- a/arch/xtensa/kernel/vmlinux.lds.S
+++ b/arch/xtensa/kernel/vmlinux.lds.S
@@ -131,6 +131,7 @@ SECTIONS
   .fixup   : { *(.fixup) }
 
   EXCEPTION_TABLE(16)
+  NOTES
   /* Data section */
 
   _sdata = .;
-- 
GitLab


From 966c37f2d77eb44d47af8e919267b1ba675b2eca Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Fri, 26 Oct 2018 11:30:35 +0800
Subject: [PATCH 0305/1890] ipv4/igmp: fix v1/v2 switchback timeout based on
 rfc3376, 8.12

Similiar with ipv6 mcast commit 89225d1ce6af3 ("net: ipv6: mld: fix v1/v2
switchback timeout to rfc3810, 9.12.")

i) RFC3376 8.12. Older Version Querier Present Timeout says:

   The Older Version Querier Interval is the time-out for transitioning
   a host back to IGMPv3 mode once an older version query is heard.
   When an older version query is received, hosts set their Older
   Version Querier Present Timer to Older Version Querier Interval.

   This value MUST be ((the Robustness Variable) times (the Query
   Interval in the last Query received)) plus (one Query Response
   Interval).

Currently we only use a hardcode value IGMP_V1/v2_ROUTER_PRESENT_TIMEOUT.
Fix it by adding two new items mr_qi(Query Interval) and mr_qri(Query Response
Interval) in struct in_device.

Now we can calculate the switchback time via (mr_qrv * mr_qi) + mr_qri.
We need update these values when receive IGMPv3 queries.

Reported-by: Ying Xu <yinxu@redhat.com>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inetdevice.h |  4 ++-
 net/ipv4/igmp.c            | 53 ++++++++++++++++++++++++++------------
 2 files changed, 39 insertions(+), 18 deletions(-)

diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index c759d1cbcedd8..a64f21a97369a 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -37,7 +37,9 @@ struct in_device {
 	unsigned long		mr_v1_seen;
 	unsigned long		mr_v2_seen;
 	unsigned long		mr_maxdelay;
-	unsigned char		mr_qrv;
+	unsigned long		mr_qi;		/* Query Interval */
+	unsigned long		mr_qri;		/* Query Response Interval */
+	unsigned char		mr_qrv;		/* Query Robustness Variable */
 	unsigned char		mr_gq_running;
 	unsigned char		mr_ifc_count;
 	struct timer_list	mr_gq_timer;	/* general query timer */
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 4da39446da2d8..765b2b32c4a42 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -111,13 +111,10 @@
 #ifdef CONFIG_IP_MULTICAST
 /* Parameter names and values are taken from igmp-v2-06 draft */
 
-#define IGMP_V1_ROUTER_PRESENT_TIMEOUT		(400*HZ)
-#define IGMP_V2_ROUTER_PRESENT_TIMEOUT		(400*HZ)
 #define IGMP_V2_UNSOLICITED_REPORT_INTERVAL	(10*HZ)
 #define IGMP_V3_UNSOLICITED_REPORT_INTERVAL	(1*HZ)
+#define IGMP_QUERY_INTERVAL			(125*HZ)
 #define IGMP_QUERY_RESPONSE_INTERVAL		(10*HZ)
-#define IGMP_QUERY_ROBUSTNESS_VARIABLE		2
-
 
 #define IGMP_INITIAL_REPORT_DELAY		(1)
 
@@ -935,13 +932,15 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
 
 			max_delay = IGMP_QUERY_RESPONSE_INTERVAL;
 			in_dev->mr_v1_seen = jiffies +
-				IGMP_V1_ROUTER_PRESENT_TIMEOUT;
+				(in_dev->mr_qrv * in_dev->mr_qi) +
+				in_dev->mr_qri;
 			group = 0;
 		} else {
 			/* v2 router present */
 			max_delay = ih->code*(HZ/IGMP_TIMER_SCALE);
 			in_dev->mr_v2_seen = jiffies +
-				IGMP_V2_ROUTER_PRESENT_TIMEOUT;
+				(in_dev->mr_qrv * in_dev->mr_qi) +
+				in_dev->mr_qri;
 		}
 		/* cancel the interface change timer */
 		in_dev->mr_ifc_count = 0;
@@ -981,8 +980,21 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
 		if (!max_delay)
 			max_delay = 1;	/* can't mod w/ 0 */
 		in_dev->mr_maxdelay = max_delay;
-		if (ih3->qrv)
-			in_dev->mr_qrv = ih3->qrv;
+
+		/* RFC3376, 4.1.6. QRV and 4.1.7. QQIC, when the most recently
+		 * received value was zero, use the default or statically
+		 * configured value.
+		 */
+		in_dev->mr_qrv = ih3->qrv ?: net->ipv4.sysctl_igmp_qrv;
+		in_dev->mr_qi = IGMPV3_QQIC(ih3->qqic)*HZ ?: IGMP_QUERY_INTERVAL;
+
+		/* RFC3376, 8.3. Query Response Interval:
+		 * The number of seconds represented by the [Query Response
+		 * Interval] must be less than the [Query Interval].
+		 */
+		if (in_dev->mr_qri >= in_dev->mr_qi)
+			in_dev->mr_qri = (in_dev->mr_qi/HZ - 1)*HZ;
+
 		if (!group) { /* general query */
 			if (ih3->nsrcs)
 				return true;	/* no sources allowed */
@@ -1723,18 +1735,30 @@ void ip_mc_down(struct in_device *in_dev)
 	ip_mc_dec_group(in_dev, IGMP_ALL_HOSTS);
 }
 
-void ip_mc_init_dev(struct in_device *in_dev)
-{
 #ifdef CONFIG_IP_MULTICAST
+static void ip_mc_reset(struct in_device *in_dev)
+{
 	struct net *net = dev_net(in_dev->dev);
+
+	in_dev->mr_qi = IGMP_QUERY_INTERVAL;
+	in_dev->mr_qri = IGMP_QUERY_RESPONSE_INTERVAL;
+	in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
+}
+#else
+static void ip_mc_reset(struct in_device *in_dev)
+{
+}
 #endif
+
+void ip_mc_init_dev(struct in_device *in_dev)
+{
 	ASSERT_RTNL();
 
 #ifdef CONFIG_IP_MULTICAST
 	timer_setup(&in_dev->mr_gq_timer, igmp_gq_timer_expire, 0);
 	timer_setup(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire, 0);
-	in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
 #endif
+	ip_mc_reset(in_dev);
 
 	spin_lock_init(&in_dev->mc_tomb_lock);
 }
@@ -1744,15 +1768,10 @@ void ip_mc_init_dev(struct in_device *in_dev)
 void ip_mc_up(struct in_device *in_dev)
 {
 	struct ip_mc_list *pmc;
-#ifdef CONFIG_IP_MULTICAST
-	struct net *net = dev_net(in_dev->dev);
-#endif
 
 	ASSERT_RTNL();
 
-#ifdef CONFIG_IP_MULTICAST
-	in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
-#endif
+	ip_mc_reset(in_dev);
 	ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS);
 
 	for_each_pmc_rtnl(in_dev, pmc) {
-- 
GitLab


From 414dd6fb9a1a1b59983aea7bf0f79f0085ecc5b8 Mon Sep 17 00:00:00 2001
From: Tobias Jungel <tobias.jungel@gmail.com>
Date: Sun, 28 Oct 2018 12:54:10 +0100
Subject: [PATCH 0306/1890] bonding: fix length of actor system

The attribute IFLA_BOND_AD_ACTOR_SYSTEM is sent to user space having the
length of sizeof(bond->params.ad_actor_system) which is 8 byte. This
patch aligns the length to ETH_ALEN to have the same MAC address exposed
as using sysfs.

Fixes: f87fda00b6ed2 ("bonding: prevent out of bound accesses")
Signed-off-by: Tobias Jungel <tobias.jungel@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_netlink.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c
index 9697977b80f04..6b9ad86732188 100644
--- a/drivers/net/bonding/bond_netlink.c
+++ b/drivers/net/bonding/bond_netlink.c
@@ -638,8 +638,7 @@ static int bond_fill_info(struct sk_buff *skb,
 				goto nla_put_failure;
 
 			if (nla_put(skb, IFLA_BOND_AD_ACTOR_SYSTEM,
-				    sizeof(bond->params.ad_actor_system),
-				    &bond->params.ad_actor_system))
+				    ETH_ALEN, &bond->params.ad_actor_system))
 				goto nla_put_failure;
 		}
 		if (!bond_3ad_get_active_agg_info(bond, &info)) {
-- 
GitLab


From e2d00e62f24b907d49b83802ae91a6fa6254a48c Mon Sep 17 00:00:00 2001
From: Lorenzo Colitti <lorenzo@google.com>
Date: Mon, 29 Oct 2018 09:30:29 +0900
Subject: [PATCH 0307/1890] Documentation: ip-sysctl.txt: Document
 tcp_fwmark_accept

This patch documents the tcp_fwmark_accept sysctl that was
added in 3.15.

Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 163b5ff1073cd..32b21571adfeb 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -316,6 +316,17 @@ tcp_frto - INTEGER
 
 	By default it's enabled with a non-zero value. 0 disables F-RTO.
 
+tcp_fwmark_accept - BOOLEAN
+	If set, incoming connections to listening sockets that do not have a
+	socket mark will set the mark of the accepting socket to the fwmark of
+	the incoming SYN packet. This will cause all packets on that connection
+	(starting from the first SYNACK) to be sent with that fwmark. The
+	listening socket's mark is unchanged. Listening sockets that already
+	have a fwmark set via setsockopt(SOL_SOCKET, SO_MARK, ...) are
+	unaffected.
+
+	Default: 0
+
 tcp_invalid_ratelimit - INTEGER
 	Limit the maximal rate for sending duplicate acknowledgments
 	in response to incoming TCP packets that are for an existing
-- 
GitLab


From 6e29464b8a72e74ec7c3f816f53bfe46a43601bc Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Mon, 29 Oct 2018 03:51:58 -0700
Subject: [PATCH 0308/1890] hinic: Fix l4_type parameter in
 hinic_task_set_tunnel_l4

Clang warns:

drivers/net/ethernet/huawei/hinic/hinic_tx.c:392:34: error: implicit
conversion from enumeration type 'enum hinic_l4_tunnel_type' to
different enumeration type 'enum hinic_l4_offload_type'
[-Werror,-Wenum-conversion]
                hinic_task_set_tunnel_l4(task, TUNNEL_UDP_NO_CSUM,
                ~~~~~~~~~~~~~~~~~~~~~~~~       ^~~~~~~~~~~~~~~~~~
1 error generated.

It seems that hinic_task_set_tunnel_l4 was meant to take an enum of type
hinic_l4_tunnel_type, not hinic_l4_offload_type, given both the name of
the functions and the values used.

Fixes: cc18a7543d2f ("net-next/hinic: add checksum offload and TSO support")
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c | 2 +-
 drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c
index 967c993d5303a..bbf9bdd0ee3e7 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c
@@ -532,7 +532,7 @@ void hinic_task_set_inner_l3(struct hinic_sq_task *task,
 }
 
 void hinic_task_set_tunnel_l4(struct hinic_sq_task *task,
-			      enum hinic_l4_offload_type l4_type,
+			      enum hinic_l4_tunnel_type l4_type,
 			      u32 tunnel_len)
 {
 	task->pkt_info2 |= HINIC_SQ_TASK_INFO2_SET(l4_type, TUNNEL_L4TYPE) |
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
index a0dc63a4bfc7a..038522e202b6f 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
@@ -160,7 +160,7 @@ void hinic_task_set_inner_l3(struct hinic_sq_task *task,
 			     u32 network_len);
 
 void hinic_task_set_tunnel_l4(struct hinic_sq_task *task,
-			      enum hinic_l4_offload_type l4_type,
+			      enum hinic_l4_tunnel_type l4_type,
 			      u32 tunnel_len);
 
 void hinic_set_cs_inner_l4(struct hinic_sq_task *task,
-- 
GitLab


From ad0b9d94182be8356978d220c82f9837cffeb7a9 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Mon, 29 Oct 2018 14:26:14 +0000
Subject: [PATCH 0309/1890] mlxsw: spectrum_switchdev: Don't ignore deletions
 of learned MACs

Demands to remove FDB entries should be honored even if the FDB entry in
question was originally learned, and not added by the user. Therefore
ignore the added_by_user datum for SWITCHDEV_FDB_DEL_TO_DEVICE.

Fixes: 816a3bed9549 ("switchdev: Add fdb.added_by_user to switchdev notifications")
Signed-off-by: Petr Machata <petrm@mellanox.com>
Suggested-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index bc60d7a8b49d7..739a51f0a366f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -2661,8 +2661,6 @@ static void mlxsw_sp_switchdev_bridge_fdb_event_work(struct work_struct *work)
 		break;
 	case SWITCHDEV_FDB_DEL_TO_DEVICE:
 		fdb_info = &switchdev_work->fdb_info;
-		if (!fdb_info->added_by_user)
-			break;
 		mlxsw_sp_port_fdb_set(mlxsw_sp_port, fdb_info, false);
 		break;
 	case SWITCHDEV_FDB_ADD_TO_BRIDGE: /* fall through */
-- 
GitLab


From a22712a962912faf257e857ab6857f56a93cfb34 Mon Sep 17 00:00:00 2001
From: Shalom Toledo <shalomt@mellanox.com>
Date: Mon, 29 Oct 2018 14:26:16 +0000
Subject: [PATCH 0310/1890] mlxsw: core: Fix devlink unregister flow

After a failed reload, the driver is still registered to devlink, its
devlink instance is still allocated and the 'reload_fail' flag is set.
Then, in the next reload try, the driver's allocated devlink instance will
be freed without unregistering from devlink and its components (e.g,
resources). This scenario can cause a use-after-free if the user tries to
execute command via devlink user-space tool.

Fix by not freeing the devlink instance during reload (failed or not).

Fixes: 24cc68ad6c46 ("mlxsw: core: Add support for reload")
Signed-off-by: Shalom Toledo <shalomt@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/core.c | 24 +++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index 937d0ace699a7..30f751e696980 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -943,8 +943,8 @@ static int mlxsw_devlink_core_bus_device_reload(struct devlink *devlink,
 					     mlxsw_core->bus,
 					     mlxsw_core->bus_priv, true,
 					     devlink);
-	if (err)
-		mlxsw_core->reload_fail = true;
+	mlxsw_core->reload_fail = !!err;
+
 	return err;
 }
 
@@ -1083,8 +1083,15 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core,
 {
 	struct devlink *devlink = priv_to_devlink(mlxsw_core);
 
-	if (mlxsw_core->reload_fail)
-		goto reload_fail;
+	if (mlxsw_core->reload_fail) {
+		if (!reload)
+			/* Only the parts that were not de-initialized in the
+			 * failed reload attempt need to be de-initialized.
+			 */
+			goto reload_fail_deinit;
+		else
+			return;
+	}
 
 	if (mlxsw_core->driver->fini)
 		mlxsw_core->driver->fini(mlxsw_core);
@@ -1098,9 +1105,12 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core,
 	if (!reload)
 		devlink_resources_unregister(devlink, NULL);
 	mlxsw_core->bus->fini(mlxsw_core->bus_priv);
-	if (reload)
-		return;
-reload_fail:
+
+	return;
+
+reload_fail_deinit:
+	devlink_unregister(devlink);
+	devlink_resources_unregister(devlink, NULL);
 	devlink_free(devlink);
 }
 EXPORT_SYMBOL(mlxsw_core_bus_device_unregister);
-- 
GitLab


From df132eff463873e14e019a07f387b4d577d6d1f9 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 29 Oct 2018 23:10:29 +0800
Subject: [PATCH 0311/1890] sctp: clear the transport of some out_chunk_list
 chunks in sctp_assoc_rm_peer

If a transport is removed by asconf but there still are some chunks with
this transport queuing on out_chunk_list, later an use-after-free issue
will be caused when accessing this transport from these chunks in
sctp_outq_flush().

This is an old bug, we fix it by clearing the transport of these chunks
in out_chunk_list when removing a transport in sctp_assoc_rm_peer().

Reported-by: syzbot+56a40ceee5fb35932f4d@syzkaller.appspotmail.com
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/associola.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index a827a1f562bf3..6a28b96e779e6 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -499,8 +499,9 @@ void sctp_assoc_set_primary(struct sctp_association *asoc,
 void sctp_assoc_rm_peer(struct sctp_association *asoc,
 			struct sctp_transport *peer)
 {
-	struct list_head	*pos;
-	struct sctp_transport	*transport;
+	struct sctp_transport *transport;
+	struct list_head *pos;
+	struct sctp_chunk *ch;
 
 	pr_debug("%s: association:%p addr:%pISpc\n",
 		 __func__, asoc, &peer->ipaddr.sa);
@@ -564,7 +565,6 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc,
 	 */
 	if (!list_empty(&peer->transmitted)) {
 		struct sctp_transport *active = asoc->peer.active_path;
-		struct sctp_chunk *ch;
 
 		/* Reset the transport of each chunk on this list */
 		list_for_each_entry(ch, &peer->transmitted,
@@ -586,6 +586,10 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc,
 				sctp_transport_hold(active);
 	}
 
+	list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list)
+		if (ch->transport == peer)
+			ch->transport = NULL;
+
 	asoc->peer.transport_count--;
 
 	sctp_transport_free(peer);
-- 
GitLab


From 713358369382cebf92f6e98ce2005f94e7344931 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 29 Oct 2018 23:13:11 +0800
Subject: [PATCH 0312/1890] sctp: check policy more carefully when getting pr
 status

When getting pr_assocstatus and pr_streamstatus by sctp_getsockopt,
it doesn't correctly process the case when policy is set with
SCTP_PR_SCTP_ALL | SCTP_PR_SCTP_MASK. It even causes a
slab-out-of-bounds in sctp_getsockopt_pr_streamstatus().

This patch fixes it by return -EINVAL for this case.

Fixes: 0ac1077e3a54 ("sctp: get pr_assoc and pr_stream all status with SCTP_PR_SCTP_ALL")
Reported-by: syzbot+5da0d0a72a9e7d791748@syzkaller.appspotmail.com
Suggested-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index fc0386e8ff239..739f3e50120dd 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -7083,14 +7083,15 @@ static int sctp_getsockopt_pr_assocstatus(struct sock *sk, int len,
 	}
 
 	policy = params.sprstat_policy;
-	if (!policy || (policy & ~(SCTP_PR_SCTP_MASK | SCTP_PR_SCTP_ALL)))
+	if (!policy || (policy & ~(SCTP_PR_SCTP_MASK | SCTP_PR_SCTP_ALL)) ||
+	    ((policy & SCTP_PR_SCTP_ALL) && (policy & SCTP_PR_SCTP_MASK)))
 		goto out;
 
 	asoc = sctp_id2assoc(sk, params.sprstat_assoc_id);
 	if (!asoc)
 		goto out;
 
-	if (policy & SCTP_PR_SCTP_ALL) {
+	if (policy == SCTP_PR_SCTP_ALL) {
 		params.sprstat_abandoned_unsent = 0;
 		params.sprstat_abandoned_sent = 0;
 		for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) {
@@ -7142,7 +7143,8 @@ static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len,
 	}
 
 	policy = params.sprstat_policy;
-	if (!policy || (policy & ~(SCTP_PR_SCTP_MASK | SCTP_PR_SCTP_ALL)))
+	if (!policy || (policy & ~(SCTP_PR_SCTP_MASK | SCTP_PR_SCTP_ALL)) ||
+	    ((policy & SCTP_PR_SCTP_ALL) && (policy & SCTP_PR_SCTP_MASK)))
 		goto out;
 
 	asoc = sctp_id2assoc(sk, params.sprstat_assoc_id);
-- 
GitLab


From da71577545a52be3e0e9225a946e5fd79cfab015 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Mon, 29 Oct 2018 20:36:43 +0000
Subject: [PATCH 0313/1890] rtnetlink: Disallow FDB configuration for
 non-Ethernet device

When an FDB entry is configured, the address is validated to have the
length of an Ethernet address, but the device for which the address is
configured can be of any type.

The above can result in the use of uninitialized memory when the address
is later compared against existing addresses since 'dev->addr_len' is
used and it may be greater than ETH_ALEN, as with ip6tnl devices.

Fix this by making sure that FDB entries are only configured for
Ethernet devices.

BUG: KMSAN: uninit-value in memcmp+0x11d/0x180 lib/string.c:863
CPU: 1 PID: 4318 Comm: syz-executor998 Not tainted 4.19.0-rc3+ #49
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
Google 01/01/2011
Call Trace:
  __dump_stack lib/dump_stack.c:77 [inline]
  dump_stack+0x14b/0x190 lib/dump_stack.c:113
  kmsan_report+0x183/0x2b0 mm/kmsan/kmsan.c:956
  __msan_warning+0x70/0xc0 mm/kmsan/kmsan_instr.c:645
  memcmp+0x11d/0x180 lib/string.c:863
  dev_uc_add_excl+0x165/0x7b0 net/core/dev_addr_lists.c:464
  ndo_dflt_fdb_add net/core/rtnetlink.c:3463 [inline]
  rtnl_fdb_add+0x1081/0x1270 net/core/rtnetlink.c:3558
  rtnetlink_rcv_msg+0xa0b/0x1530 net/core/rtnetlink.c:4715
  netlink_rcv_skb+0x36e/0x5f0 net/netlink/af_netlink.c:2454
  rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:4733
  netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline]
  netlink_unicast+0x1638/0x1720 net/netlink/af_netlink.c:1343
  netlink_sendmsg+0x1205/0x1290 net/netlink/af_netlink.c:1908
  sock_sendmsg_nosec net/socket.c:621 [inline]
  sock_sendmsg net/socket.c:631 [inline]
  ___sys_sendmsg+0xe70/0x1290 net/socket.c:2114
  __sys_sendmsg net/socket.c:2152 [inline]
  __do_sys_sendmsg net/socket.c:2161 [inline]
  __se_sys_sendmsg+0x2a3/0x3d0 net/socket.c:2159
  __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2159
  do_syscall_64+0xb8/0x100 arch/x86/entry/common.c:291
  entry_SYSCALL_64_after_hwframe+0x63/0xe7
RIP: 0033:0x440ee9
Code: e8 cc ab 02 00 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7
48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff
ff 0f 83 bb 0a fc ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007fff6a93b518 EFLAGS: 00000213 ORIG_RAX: 000000000000002e
RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 0000000000440ee9
RDX: 0000000000000000 RSI: 0000000020000240 RDI: 0000000000000003
RBP: 0000000000000000 R08: 00000000004002c8 R09: 00000000004002c8
R10: 00000000004002c8 R11: 0000000000000213 R12: 000000000000b4b0
R13: 0000000000401ec0 R14: 0000000000000000 R15: 0000000000000000

Uninit was created at:
  kmsan_save_stack_with_flags mm/kmsan/kmsan.c:256 [inline]
  kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:181
  kmsan_kmalloc+0x98/0x100 mm/kmsan/kmsan_hooks.c:91
  kmsan_slab_alloc+0x10/0x20 mm/kmsan/kmsan_hooks.c:100
  slab_post_alloc_hook mm/slab.h:446 [inline]
  slab_alloc_node mm/slub.c:2718 [inline]
  __kmalloc_node_track_caller+0x9e7/0x1160 mm/slub.c:4351
  __kmalloc_reserve net/core/skbuff.c:138 [inline]
  __alloc_skb+0x2f5/0x9e0 net/core/skbuff.c:206
  alloc_skb include/linux/skbuff.h:996 [inline]
  netlink_alloc_large_skb net/netlink/af_netlink.c:1189 [inline]
  netlink_sendmsg+0xb49/0x1290 net/netlink/af_netlink.c:1883
  sock_sendmsg_nosec net/socket.c:621 [inline]
  sock_sendmsg net/socket.c:631 [inline]
  ___sys_sendmsg+0xe70/0x1290 net/socket.c:2114
  __sys_sendmsg net/socket.c:2152 [inline]
  __do_sys_sendmsg net/socket.c:2161 [inline]
  __se_sys_sendmsg+0x2a3/0x3d0 net/socket.c:2159
  __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2159
  do_syscall_64+0xb8/0x100 arch/x86/entry/common.c:291
  entry_SYSCALL_64_after_hwframe+0x63/0xe7

v2:
* Make error message more specific (David)

Fixes: 090096bf3db1 ("net: generic fdb support for drivers without ndo_fdb_<op>")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reported-and-tested-by: syzbot+3a288d5f5530b901310e@syzkaller.appspotmail.com
Reported-and-tested-by: syzbot+d53ab4e92a1db04110ff@syzkaller.appspotmail.com
Cc: Vlad Yasevich <vyasevich@gmail.com>
Cc: David Ahern <dsahern@gmail.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index f679c7a7d761a..e01274bd5e3e2 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3600,6 +3600,11 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
 		return -EINVAL;
 	}
 
+	if (dev->type != ARPHRD_ETHER) {
+		NL_SET_ERR_MSG(extack, "FDB add only supported for Ethernet devices");
+		return -EINVAL;
+	}
+
 	addr = nla_data(tb[NDA_LLADDR]);
 
 	err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack);
@@ -3704,6 +3709,11 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
 		return -EINVAL;
 	}
 
+	if (dev->type != ARPHRD_ETHER) {
+		NL_SET_ERR_MSG(extack, "FDB delete only supported for Ethernet devices");
+		return -EINVAL;
+	}
+
 	addr = nla_data(tb[NDA_LLADDR]);
 
 	err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack);
-- 
GitLab


From 0f0a691f1ef9af0f8943f601b325a219d1ba8184 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 29 Oct 2018 22:17:12 -0700
Subject: [PATCH 0314/1890] sparc64: Remvoe set_fs() from
 perf_callchain_user().

Ever since commit 88b0193d9418 ("perf/callchain: Force USER_DS when
invoking perf_callchain_user()") the caller does this for us.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/perf_event.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 67b3e6b3ce5d7..47c871394ccb1 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -1849,16 +1849,12 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
 {
 	u64 saved_fault_address = current_thread_info()->fault_address;
 	u8 saved_fault_code = get_thread_fault_code();
-	mm_segment_t old_fs;
 
 	perf_callchain_store(entry, regs->tpc);
 
 	if (!current->mm)
 		return;
 
-	old_fs = get_fs();
-	set_fs(USER_DS);
-
 	flushw_user();
 
 	pagefault_disable();
@@ -1870,7 +1866,6 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
 
 	pagefault_enable();
 
-	set_fs(old_fs);
 	set_thread_fault_code(saved_fault_code);
 	current_thread_info()->fault_address = saved_fault_address;
 }
-- 
GitLab


From 1026ded6936f0f9caba5759e2e7438f4dee67867 Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Mon, 29 Oct 2018 18:30:46 -0700
Subject: [PATCH 0315/1890] xtensa: use DWARF_DEBUG in the vmlinux.lds.S

Xtensa doesn't have anything custom in its debug sections list. Use
macro DWARF_DEBUG instead of opencoding it.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/kernel/vmlinux.lds.S | 18 +-----------------
 1 file changed, 1 insertion(+), 17 deletions(-)

diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S
index fa926995d2a37..ba66b5bfefe89 100644
--- a/arch/xtensa/kernel/vmlinux.lds.S
+++ b/arch/xtensa/kernel/vmlinux.lds.S
@@ -301,23 +301,7 @@ SECTIONS
   .xt.lit : { *(.xt.lit) }
   .xt.prop : { *(.xt.prop) }
 
-  .debug  0 :  { *(.debug) }
-  .line  0 :  { *(.line) }
-  .debug_srcinfo  0 :  { *(.debug_srcinfo) }
-  .debug_sfnames  0 :  { *(.debug_sfnames) }
-  .debug_aranges  0 :  { *(.debug_aranges) }
-  .debug_pubnames  0 :  { *(.debug_pubnames) }
-  .debug_info  0 :  { *(.debug_info) }
-  .debug_abbrev  0 :  { *(.debug_abbrev) }
-  .debug_line  0 :  { *(.debug_line) }
-  .debug_frame  0 :  { *(.debug_frame) }
-  .debug_str  0 :  { *(.debug_str) }
-  .debug_loc  0 :  { *(.debug_loc) }
-  .debug_macinfo  0 :  { *(.debug_macinfo) }
-  .debug_weaknames  0 :  { *(.debug_weaknames) }
-  .debug_funcnames  0 :  { *(.debug_funcnames) }
-  .debug_typenames  0 :  { *(.debug_typenames) }
-  .debug_varnames  0 :  { *(.debug_varnames) }
+  DWARF_DEBUG
 
   .xt.insn 0 :
   {
-- 
GitLab


From 960b82c383d38fea4439e4182b5be4352a6048e2 Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Tue, 30 Oct 2018 01:28:56 -0700
Subject: [PATCH 0316/1890] xtensa: clean up xtensa-specific property sections

xtensa-specific property sections may be section-specific. They should
be collected in the order of appearance. .gnu.linkonce.prop.* input
sections should be collected into the .xt.prop output section.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/kernel/vmlinux.lds.S | 17 +++--------------
 1 file changed, 3 insertions(+), 14 deletions(-)

diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S
index ba66b5bfefe89..67a4f7f8a38d1 100644
--- a/arch/xtensa/kernel/vmlinux.lds.S
+++ b/arch/xtensa/kernel/vmlinux.lds.S
@@ -298,22 +298,11 @@ SECTIONS
 
   _end = .;
 
-  .xt.lit : { *(.xt.lit) }
-  .xt.prop : { *(.xt.prop) }
-
   DWARF_DEBUG
 
-  .xt.insn 0 :
-  {
-    *(.xt.insn)
-    *(.gnu.linkonce.x*)
-  }
-
-  .xt.lit 0 :
-  {
-    *(.xt.lit)
-    *(.gnu.linkonce.p*)
-  }
+  .xt.prop 0 : { KEEP(*(.xt.prop .xt.prop.* .gnu.linkonce.prop.*)) }
+  .xt.insn 0 : { KEEP(*(.xt.insn .xt.insn.* .gnu.linkonce.x*)) }
+  .xt.lit  0 : { KEEP(*(.xt.lit  .xt.lit.*  .gnu.linkonce.p*)) }
 
   /* Sections to be discarded */
   DISCARDS
-- 
GitLab


From 28fa741c27e6d57f6bf594ba3c444ce79e671e09 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 29 Oct 2018 23:32:11 +0000
Subject: [PATCH 0317/1890] perf/core: Clean up inconsisent indentation

Replace a bunch of spaces with tab, cleans up indentation

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: kernel-janitors@vger.kernel.org
Link: http://lkml.kernel.org/r/20181029233211.21475-1-colin.king@canonical.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 5a97f34bc14c8..65e90c752a91e 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -750,7 +750,7 @@ static inline void update_cgrp_time_from_event(struct perf_event *event)
 	/*
 	 * Do not update time when cgroup is not active
 	 */
-       if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup))
+	if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup))
 		__update_cgrp_time(event->cgrp);
 }
 
-- 
GitLab


From 7847c7be0481558f17e3ef3b03f573677fd30d29 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Tue, 30 Oct 2018 07:33:01 +0100
Subject: [PATCH 0318/1890] x86/paravirt: Remove unused _paravirt_ident_32

There is no user of _paravirt_ident_32 left in the tree. Remove it
together with the related paravirt_patch_ident_32().

paravirt_patch_ident_64() can be moved inside CONFIG_PARAVIRT_XXL=y.

Signed-off-by: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: akataria@vmware.com
Cc: boris.ostrovsky@oracle.com
Cc: rusty@rustcorp.com.au
Cc: virtualization@lists.linux-foundation.org
Cc: xen-devel@lists.xenproject.org
Link: http://lkml.kernel.org/r/20181030063301.15054-1-jgross@suse.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/paravirt_types.h |  2 --
 arch/x86/kernel/paravirt.c            | 26 +++++++-------------------
 arch/x86/kernel/paravirt_patch_32.c   | 18 ++++++------------
 arch/x86/kernel/paravirt_patch_64.c   | 20 ++++++--------------
 4 files changed, 19 insertions(+), 47 deletions(-)

diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index fba54ca23b2a9..26942ad638304 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -361,7 +361,6 @@ extern struct paravirt_patch_template pv_ops;
 	__visible extern const char start_##ops##_##name[], end_##ops##_##name[];	\
 	asm(NATIVE_LABEL("start_", ops, name) code NATIVE_LABEL("end_", ops, name))
 
-unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len);
 unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len);
 unsigned paravirt_patch_default(u8 type, void *insnbuf,
 				unsigned long addr, unsigned len);
@@ -651,7 +650,6 @@ void paravirt_leave_lazy_mmu(void);
 void paravirt_flush_lazy_mmu(void);
 
 void _paravirt_nop(void);
-u32 _paravirt_ident_32(u32);
 u64 _paravirt_ident_64(u64);
 
 #define paravirt_nop	((void *)_paravirt_nop)
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 45123b116c056..c0e0101133f35 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -56,17 +56,6 @@ asm (".pushsection .entry.text, \"ax\"\n"
      ".type _paravirt_nop, @function\n\t"
      ".popsection");
 
-/* identity function, which can be inlined */
-u32 notrace _paravirt_ident_32(u32 x)
-{
-	return x;
-}
-
-u64 notrace _paravirt_ident_64(u64 x)
-{
-	return x;
-}
-
 void __init default_banner(void)
 {
 	printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
@@ -102,6 +91,12 @@ static unsigned paravirt_patch_call(void *insnbuf, const void *target,
 }
 
 #ifdef CONFIG_PARAVIRT_XXL
+/* identity function, which can be inlined */
+u64 notrace _paravirt_ident_64(u64 x)
+{
+	return x;
+}
+
 static unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
 				   unsigned long addr, unsigned len)
 {
@@ -146,13 +141,11 @@ unsigned paravirt_patch_default(u8 type, void *insnbuf,
 	else if (opfunc == _paravirt_nop)
 		ret = 0;
 
+#ifdef CONFIG_PARAVIRT_XXL
 	/* identity functions just return their single argument */
-	else if (opfunc == _paravirt_ident_32)
-		ret = paravirt_patch_ident_32(insnbuf, len);
 	else if (opfunc == _paravirt_ident_64)
 		ret = paravirt_patch_ident_64(insnbuf, len);
 
-#ifdef CONFIG_PARAVIRT_XXL
 	else if (type == PARAVIRT_PATCH(cpu.iret) ||
 		 type == PARAVIRT_PATCH(cpu.usergs_sysret64))
 		/* If operation requires a jmp, then jmp */
@@ -309,13 +302,8 @@ struct pv_info pv_info = {
 #endif
 };
 
-#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
-/* 32-bit pagetable entries */
-#define PTE_IDENT	__PV_IS_CALLEE_SAVE(_paravirt_ident_32)
-#else
 /* 64-bit pagetable entries */
 #define PTE_IDENT	__PV_IS_CALLEE_SAVE(_paravirt_ident_64)
-#endif
 
 struct paravirt_patch_template pv_ops = {
 	/* Init ops. */
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index 6368c22fa1fa3..de138d3912e45 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -10,24 +10,18 @@ DEF_NATIVE(cpu, iret, "iret");
 DEF_NATIVE(mmu, read_cr2, "mov %cr2, %eax");
 DEF_NATIVE(mmu, write_cr3, "mov %eax, %cr3");
 DEF_NATIVE(mmu, read_cr3, "mov %cr3, %eax");
-#endif
-
-#if defined(CONFIG_PARAVIRT_SPINLOCKS)
-DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%eax)");
-DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax");
-#endif
-
-unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
-{
-	/* arg in %eax, return in %eax */
-	return 0;
-}
 
 unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
 {
 	/* arg in %edx:%eax, return in %edx:%eax */
 	return 0;
 }
+#endif
+
+#if defined(CONFIG_PARAVIRT_SPINLOCKS)
+DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%eax)");
+DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax");
+#endif
 
 extern bool pv_is_native_spin_unlock(void);
 extern bool pv_is_native_vcpu_is_preempted(void);
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index 7ca9cb726f4d6..9d9e04b310773 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -15,27 +15,19 @@ DEF_NATIVE(cpu, wbinvd, "wbinvd");
 
 DEF_NATIVE(cpu, usergs_sysret64, "swapgs; sysretq");
 DEF_NATIVE(cpu, swapgs, "swapgs");
-#endif
-
-DEF_NATIVE(, mov32, "mov %edi, %eax");
 DEF_NATIVE(, mov64, "mov %rdi, %rax");
 
-#if defined(CONFIG_PARAVIRT_SPINLOCKS)
-DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%rdi)");
-DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax");
-#endif
-
-unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
-{
-	return paravirt_patch_insns(insnbuf, len,
-				    start__mov32, end__mov32);
-}
-
 unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
 {
 	return paravirt_patch_insns(insnbuf, len,
 				    start__mov64, end__mov64);
 }
+#endif
+
+#if defined(CONFIG_PARAVIRT_SPINLOCKS)
+DEF_NATIVE(lock, queued_spin_unlock, "movb $0, (%rdi)");
+DEF_NATIVE(lock, vcpu_is_preempted, "xor %eax, %eax");
+#endif
 
 extern bool pv_is_native_spin_unlock(void);
 extern bool pv_is_native_vcpu_is_preempted(void);
-- 
GitLab


From c6ee7a548e2c291398b4f32c1f741c66b9f98e1c Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Thu, 25 Oct 2018 13:26:45 -0700
Subject: [PATCH 0319/1890] x86/numa_emulation: Fix uniform-split numa
 emulation

The numa_emulation() routine in the 'uniform' case walks through all the
physical 'memblk' instances and divides them into N emulated nodes with
split_nodes_size_interleave_uniform(). As each physical node is consumed it
is removed from the physical memblk array in the numa_remove_memblk_from()
helper.

Since split_nodes_size_interleave_uniform() handles advancing the array as
the 'memblk' is consumed it is expected that the base of the array is
always specified as the argument.

Otherwise, on multi-socket (> 2) configurations the uniform-split
capability can generate an invalid numa configuration leading to boot
failures with signatures like the following:

    rcu: INFO: rcu_sched detected stalls on CPUs/tasks:
    Sending NMI from CPU 0 to CPUs 2:
    NMI backtrace for cpu 2
    CPU: 2 PID: 1332 Comm: pgdatinit0 Not tainted 4.19.0-rc8-next-20181019-baseline #59
    RIP: 0010:__init_single_page.isra.74+0x81/0x90
    [..]
    Call Trace:
     deferred_init_pages+0xaa/0xe3
     deferred_init_memmap+0x18f/0x318
     kthread+0xf8/0x130
     ? deferred_free_pages.isra.105+0xc9/0xc9
     ? kthread_stop+0x110/0x110
     ret_from_fork+0x35/0x40

Fixes: 1f6a2c6d9f121 ("x86/numa_emulation: Introduce uniform split capability")
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>
Reviewed-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/154049911459.2685845.9210186007479774286.stgit@dwillia2-desk3.amr.corp.intel.com
---
 arch/x86/mm/numa_emulation.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index b54d52a2d00a8..d71d72cf6c666 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -400,9 +400,17 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
 		n = simple_strtoul(emu_cmdline, &emu_cmdline, 0);
 		ret = -1;
 		for_each_node_mask(i, physnode_mask) {
+			/*
+			 * The reason we pass in blk[0] is due to
+			 * numa_remove_memblk_from() called by
+			 * emu_setup_memblk() will delete entry 0
+			 * and then move everything else up in the pi.blk
+			 * array. Therefore we should always be looking
+			 * at blk[0].
+			 */
 			ret = split_nodes_size_interleave_uniform(&ei, &pi,
-					pi.blk[i].start, pi.blk[i].end, 0,
-					n, &pi.blk[i], nid);
+					pi.blk[0].start, pi.blk[0].end, 0,
+					n, &pi.blk[0], nid);
 			if (ret < 0)
 				break;
 			if (ret < n) {
-- 
GitLab


From 826b5de90c0bca4e9de6231da9e1730480621588 Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Tue, 30 Oct 2018 15:31:15 +0900
Subject: [PATCH 0320/1890] ALSA: firewire-lib: fix insufficient PCM rule for
 period/buffer size

In a former commit, PCM constraint based on LCM of SYT_INTERVAL was
obsoleted with PCM rule. However, the new PCM rule brings -EINVAL in
some cases that max/min values of size of buffer/period is not
multiples of one of values of SYT_INTERVAL. For example, pulseaudio
always fail to configure PCM substream.

This commit changes strategy for the PCM rule. Although the buggy rules
had a single dependency (rate from period, period from rate, rate from
buffer, buffer from rate), a revised rule has double dependencies
(period from period/rate, buffer from buffer/rate). A step of value is
calculated with table of SYT_INTERVAL and list of available rates. This
prevents interval template which brings -EINVAL to a call of
snd_interval_refine().

Fixes: 5950229582bc('ALSA: firewire-lib: add PCM rules to obsolete PCM constraints based on LCM of SYT_INTERVAL')
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/firewire/amdtp-stream.c | 57 ++++++-----------------------------
 1 file changed, 9 insertions(+), 48 deletions(-)

diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c
index fcd965f1d69e8..9be76c808fccf 100644
--- a/sound/firewire/amdtp-stream.c
+++ b/sound/firewire/amdtp-stream.c
@@ -146,53 +146,22 @@ static int apply_constraint_to_size(struct snd_pcm_hw_params *params,
 	struct snd_interval *s = hw_param_interval(params, rule->var);
 	const struct snd_interval *r =
 		hw_param_interval_c(params, SNDRV_PCM_HW_PARAM_RATE);
-	struct snd_interval t = {
-		.min = s->min, .max = s->max, .integer = 1,
-	};
+	struct snd_interval t = {0};
+	unsigned int step = 0;
 	int i;
 
 	for (i = 0; i < CIP_SFC_COUNT; ++i) {
-		unsigned int rate = amdtp_rate_table[i];
-		unsigned int step = amdtp_syt_intervals[i];
-
-		if (!snd_interval_test(r, rate))
-			continue;
-
-		t.min = roundup(t.min, step);
-		t.max = rounddown(t.max, step);
+		if (snd_interval_test(r, amdtp_rate_table[i]))
+			step = max(step, amdtp_syt_intervals[i]);
 	}
 
-	if (snd_interval_checkempty(&t))
-		return -EINVAL;
+	t.min = roundup(s->min, step);
+	t.max = rounddown(s->max, step);
+	t.integer = 1;
 
 	return snd_interval_refine(s, &t);
 }
 
-static int apply_constraint_to_rate(struct snd_pcm_hw_params *params,
-				    struct snd_pcm_hw_rule *rule)
-{
-	struct snd_interval *r =
-			hw_param_interval(params, SNDRV_PCM_HW_PARAM_RATE);
-	const struct snd_interval *s = hw_param_interval_c(params, rule->deps[0]);
-	struct snd_interval t = {
-		.min = UINT_MAX, .max = 0, .integer = 1,
-	};
-	int i;
-
-	for (i = 0; i < CIP_SFC_COUNT; ++i) {
-		unsigned int step = amdtp_syt_intervals[i];
-		unsigned int rate = amdtp_rate_table[i];
-
-		if (s->min % step || s->max % step)
-			continue;
-
-		t.min = min(t.min, rate);
-		t.max = max(t.max, rate);
-	}
-
-	return snd_interval_refine(r, &t);
-}
-
 /**
  * amdtp_stream_add_pcm_hw_constraints - add hw constraints for PCM substream
  * @s:		the AMDTP stream, which must be initialized.
@@ -250,24 +219,16 @@ int amdtp_stream_add_pcm_hw_constraints(struct amdtp_stream *s,
 	 */
 	err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_PERIOD_SIZE,
 				  apply_constraint_to_size, NULL,
+				  SNDRV_PCM_HW_PARAM_PERIOD_SIZE,
 				  SNDRV_PCM_HW_PARAM_RATE, -1);
 	if (err < 0)
 		goto end;
-	err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_RATE,
-				  apply_constraint_to_rate, NULL,
-				  SNDRV_PCM_HW_PARAM_PERIOD_SIZE, -1);
-	if (err < 0)
-		goto end;
 	err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_BUFFER_SIZE,
 				  apply_constraint_to_size, NULL,
+				  SNDRV_PCM_HW_PARAM_BUFFER_SIZE,
 				  SNDRV_PCM_HW_PARAM_RATE, -1);
 	if (err < 0)
 		goto end;
-	err = snd_pcm_hw_rule_add(runtime, 0, SNDRV_PCM_HW_PARAM_RATE,
-				  apply_constraint_to_rate, NULL,
-				  SNDRV_PCM_HW_PARAM_BUFFER_SIZE, -1);
-	if (err < 0)
-		goto end;
 end:
 	return err;
 }
-- 
GitLab


From a95ecac5cb2fc8a8ee606991384d33ee121df00c Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Tue, 23 Oct 2018 13:34:56 +0530
Subject: [PATCH 0321/1890] selftests/powerpc: Relax L1d miss targets for
 rfi_flush test

When running the rfi_flush test, if the system is loaded, we see two
issues:
1. The L1d misses when rfi_flush is disabled increase significantly due
to other workloads interfering with the cache.
2. The L1d misses when rfi_flush is enabled sometimes goes slightly
below the expected number of misses.

To address these, let's relax the expected number of L1d misses:
1. When rfi_flush is disabled, we allow upto half the expected number of
the misses for when rfi_flush is enabled.
2. When rfi_flush is enabled, we allow ~1% lower number of cache misses.

Reported-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Tested-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 .../selftests/powerpc/security/rfi_flush.c     | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/powerpc/security/rfi_flush.c b/tools/testing/selftests/powerpc/security/rfi_flush.c
index 564ed45bbf731..0a7d0afb26b88 100644
--- a/tools/testing/selftests/powerpc/security/rfi_flush.c
+++ b/tools/testing/selftests/powerpc/security/rfi_flush.c
@@ -49,6 +49,7 @@ int rfi_flush_test(void)
 	struct perf_event_read v;
 	__u64 l1d_misses_total = 0;
 	unsigned long iterations = 100000, zero_size = 24 * 1024;
+	unsigned long l1d_misses_expected;
 	int rfi_flush_org, rfi_flush;
 
 	SKIP_IF(geteuid() != 0);
@@ -71,6 +72,12 @@ int rfi_flush_test(void)
 
 	iter = repetitions;
 
+	/*
+	 * We expect to see l1d miss for each cacheline access when rfi_flush
+	 * is set. Allow a small variation on this.
+	 */
+	l1d_misses_expected = iterations * (zero_size / CACHELINE_SIZE - 2);
+
 again:
 	FAIL_IF(perf_event_reset(fd));
 
@@ -78,10 +85,9 @@ int rfi_flush_test(void)
 
 	FAIL_IF(read(fd, &v, sizeof(v)) != sizeof(v));
 
-	/* Expect at least zero_size/CACHELINE_SIZE misses per iteration */
-	if (v.l1d_misses >= (iterations * zero_size / CACHELINE_SIZE) && rfi_flush)
+	if (rfi_flush && v.l1d_misses >= l1d_misses_expected)
 		passes++;
-	else if (v.l1d_misses < iterations && !rfi_flush)
+	else if (!rfi_flush && v.l1d_misses < (l1d_misses_expected / 2))
 		passes++;
 
 	l1d_misses_total += v.l1d_misses;
@@ -92,13 +98,15 @@ int rfi_flush_test(void)
 	if (passes < repetitions) {
 		printf("FAIL (L1D misses with rfi_flush=%d: %llu %c %lu) [%d/%d failures]\n",
 		       rfi_flush, l1d_misses_total, rfi_flush ? '<' : '>',
-		       rfi_flush ? (repetitions * iterations * zero_size / CACHELINE_SIZE) : iterations,
+		       rfi_flush ? repetitions * l1d_misses_expected :
+		       repetitions * l1d_misses_expected / 2,
 		       repetitions - passes, repetitions);
 		rc = 1;
 	} else
 		printf("PASS (L1D misses with rfi_flush=%d: %llu %c %lu) [%d/%d pass]\n",
 		       rfi_flush, l1d_misses_total, rfi_flush ? '>' : '<',
-		       rfi_flush ? (repetitions * iterations * zero_size / CACHELINE_SIZE) : iterations,
+		       rfi_flush ? repetitions * l1d_misses_expected :
+		       repetitions * l1d_misses_expected / 2,
 		       passes, repetitions);
 
 	if (rfi_flush == rfi_flush_org) {
-- 
GitLab


From f443f38c5789ece6ebe59ae21c27bf861e61c4e2 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 24 Oct 2018 14:31:12 -0300
Subject: [PATCH 0322/1890] tools include uapi: Grab a copy of linux/fs.h

We'll use it to create tables for the 'flags' argument to the 'mount'
and 'umount' syscalls.

Add it to check_headers.sh so that when a new protocol gets added we get
a notification during the build process.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Benjamin Peterson <benjamin@python.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-yacf9jvkwfwg2g95r2us3xb3@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/linux/fs.h | 393 ++++++++++++++++++++++++++++++++++
 tools/perf/check-headers.sh   |   1 +
 2 files changed, 394 insertions(+)
 create mode 100644 tools/include/uapi/linux/fs.h

diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h
new file mode 100644
index 0000000000000..73e01918f9963
--- /dev/null
+++ b/tools/include/uapi/linux/fs.h
@@ -0,0 +1,393 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_FS_H
+#define _UAPI_LINUX_FS_H
+
+/*
+ * This file has definitions for some important file table structures
+ * and constants and structures used by various generic file system
+ * ioctl's.  Please do not make any changes in this file before
+ * sending patches for review to linux-fsdevel@vger.kernel.org and
+ * linux-api@vger.kernel.org.
+ */
+
+#include <linux/limits.h>
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+/*
+ * It's silly to have NR_OPEN bigger than NR_FILE, but you can change
+ * the file limit at runtime and only root can increase the per-process
+ * nr_file rlimit, so it's safe to set up a ridiculously high absolute
+ * upper limit on files-per-process.
+ *
+ * Some programs (notably those using select()) may have to be 
+ * recompiled to take full advantage of the new limits..  
+ */
+
+/* Fixed constants first: */
+#undef NR_OPEN
+#define INR_OPEN_CUR 1024	/* Initial setting for nfile rlimits */
+#define INR_OPEN_MAX 4096	/* Hard limit for nfile rlimits */
+
+#define BLOCK_SIZE_BITS 10
+#define BLOCK_SIZE (1<<BLOCK_SIZE_BITS)
+
+#define SEEK_SET	0	/* seek relative to beginning of file */
+#define SEEK_CUR	1	/* seek relative to current file position */
+#define SEEK_END	2	/* seek relative to end of file */
+#define SEEK_DATA	3	/* seek to the next data */
+#define SEEK_HOLE	4	/* seek to the next hole */
+#define SEEK_MAX	SEEK_HOLE
+
+#define RENAME_NOREPLACE	(1 << 0)	/* Don't overwrite target */
+#define RENAME_EXCHANGE		(1 << 1)	/* Exchange source and dest */
+#define RENAME_WHITEOUT		(1 << 2)	/* Whiteout source */
+
+struct file_clone_range {
+	__s64 src_fd;
+	__u64 src_offset;
+	__u64 src_length;
+	__u64 dest_offset;
+};
+
+struct fstrim_range {
+	__u64 start;
+	__u64 len;
+	__u64 minlen;
+};
+
+/* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */
+#define FILE_DEDUPE_RANGE_SAME		0
+#define FILE_DEDUPE_RANGE_DIFFERS	1
+
+/* from struct btrfs_ioctl_file_extent_same_info */
+struct file_dedupe_range_info {
+	__s64 dest_fd;		/* in - destination file */
+	__u64 dest_offset;	/* in - start of extent in destination */
+	__u64 bytes_deduped;	/* out - total # of bytes we were able
+				 * to dedupe from this file. */
+	/* status of this dedupe operation:
+	 * < 0 for error
+	 * == FILE_DEDUPE_RANGE_SAME if dedupe succeeds
+	 * == FILE_DEDUPE_RANGE_DIFFERS if data differs
+	 */
+	__s32 status;		/* out - see above description */
+	__u32 reserved;		/* must be zero */
+};
+
+/* from struct btrfs_ioctl_file_extent_same_args */
+struct file_dedupe_range {
+	__u64 src_offset;	/* in - start of extent in source */
+	__u64 src_length;	/* in - length of extent */
+	__u16 dest_count;	/* in - total elements in info array */
+	__u16 reserved1;	/* must be zero */
+	__u32 reserved2;	/* must be zero */
+	struct file_dedupe_range_info info[0];
+};
+
+/* And dynamically-tunable limits and defaults: */
+struct files_stat_struct {
+	unsigned long nr_files;		/* read only */
+	unsigned long nr_free_files;	/* read only */
+	unsigned long max_files;		/* tunable */
+};
+
+struct inodes_stat_t {
+	long nr_inodes;
+	long nr_unused;
+	long dummy[5];		/* padding for sysctl ABI compatibility */
+};
+
+
+#define NR_FILE  8192	/* this can well be larger on a larger system */
+
+
+/*
+ * These are the fs-independent mount-flags: up to 32 flags are supported
+ */
+#define MS_RDONLY	 1	/* Mount read-only */
+#define MS_NOSUID	 2	/* Ignore suid and sgid bits */
+#define MS_NODEV	 4	/* Disallow access to device special files */
+#define MS_NOEXEC	 8	/* Disallow program execution */
+#define MS_SYNCHRONOUS	16	/* Writes are synced at once */
+#define MS_REMOUNT	32	/* Alter flags of a mounted FS */
+#define MS_MANDLOCK	64	/* Allow mandatory locks on an FS */
+#define MS_DIRSYNC	128	/* Directory modifications are synchronous */
+#define MS_NOATIME	1024	/* Do not update access times. */
+#define MS_NODIRATIME	2048	/* Do not update directory access times */
+#define MS_BIND		4096
+#define MS_MOVE		8192
+#define MS_REC		16384
+#define MS_VERBOSE	32768	/* War is peace. Verbosity is silence.
+				   MS_VERBOSE is deprecated. */
+#define MS_SILENT	32768
+#define MS_POSIXACL	(1<<16)	/* VFS does not apply the umask */
+#define MS_UNBINDABLE	(1<<17)	/* change to unbindable */
+#define MS_PRIVATE	(1<<18)	/* change to private */
+#define MS_SLAVE	(1<<19)	/* change to slave */
+#define MS_SHARED	(1<<20)	/* change to shared */
+#define MS_RELATIME	(1<<21)	/* Update atime relative to mtime/ctime. */
+#define MS_KERNMOUNT	(1<<22) /* this is a kern_mount call */
+#define MS_I_VERSION	(1<<23) /* Update inode I_version field */
+#define MS_STRICTATIME	(1<<24) /* Always perform atime updates */
+#define MS_LAZYTIME	(1<<25) /* Update the on-disk [acm]times lazily */
+
+/* These sb flags are internal to the kernel */
+#define MS_SUBMOUNT     (1<<26)
+#define MS_NOREMOTELOCK	(1<<27)
+#define MS_NOSEC	(1<<28)
+#define MS_BORN		(1<<29)
+#define MS_ACTIVE	(1<<30)
+#define MS_NOUSER	(1<<31)
+
+/*
+ * Superblock flags that can be altered by MS_REMOUNT
+ */
+#define MS_RMT_MASK	(MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION|\
+			 MS_LAZYTIME)
+
+/*
+ * Old magic mount flag and mask
+ */
+#define MS_MGC_VAL 0xC0ED0000
+#define MS_MGC_MSK 0xffff0000
+
+/*
+ * Structure for FS_IOC_FSGETXATTR[A] and FS_IOC_FSSETXATTR.
+ */
+struct fsxattr {
+	__u32		fsx_xflags;	/* xflags field value (get/set) */
+	__u32		fsx_extsize;	/* extsize field value (get/set)*/
+	__u32		fsx_nextents;	/* nextents field value (get)	*/
+	__u32		fsx_projid;	/* project identifier (get/set) */
+	__u32		fsx_cowextsize;	/* CoW extsize field value (get/set)*/
+	unsigned char	fsx_pad[8];
+};
+
+/*
+ * Flags for the fsx_xflags field
+ */
+#define FS_XFLAG_REALTIME	0x00000001	/* data in realtime volume */
+#define FS_XFLAG_PREALLOC	0x00000002	/* preallocated file extents */
+#define FS_XFLAG_IMMUTABLE	0x00000008	/* file cannot be modified */
+#define FS_XFLAG_APPEND		0x00000010	/* all writes append */
+#define FS_XFLAG_SYNC		0x00000020	/* all writes synchronous */
+#define FS_XFLAG_NOATIME	0x00000040	/* do not update access time */
+#define FS_XFLAG_NODUMP		0x00000080	/* do not include in backups */
+#define FS_XFLAG_RTINHERIT	0x00000100	/* create with rt bit set */
+#define FS_XFLAG_PROJINHERIT	0x00000200	/* create with parents projid */
+#define FS_XFLAG_NOSYMLINKS	0x00000400	/* disallow symlink creation */
+#define FS_XFLAG_EXTSIZE	0x00000800	/* extent size allocator hint */
+#define FS_XFLAG_EXTSZINHERIT	0x00001000	/* inherit inode extent size */
+#define FS_XFLAG_NODEFRAG	0x00002000	/* do not defragment */
+#define FS_XFLAG_FILESTREAM	0x00004000	/* use filestream allocator */
+#define FS_XFLAG_DAX		0x00008000	/* use DAX for IO */
+#define FS_XFLAG_COWEXTSIZE	0x00010000	/* CoW extent size allocator hint */
+#define FS_XFLAG_HASATTR	0x80000000	/* no DIFLAG for this	*/
+
+/* the read-only stuff doesn't really belong here, but any other place is
+   probably as bad and I don't want to create yet another include file. */
+
+#define BLKROSET   _IO(0x12,93)	/* set device read-only (0 = read-write) */
+#define BLKROGET   _IO(0x12,94)	/* get read-only status (0 = read_write) */
+#define BLKRRPART  _IO(0x12,95)	/* re-read partition table */
+#define BLKGETSIZE _IO(0x12,96)	/* return device size /512 (long *arg) */
+#define BLKFLSBUF  _IO(0x12,97)	/* flush buffer cache */
+#define BLKRASET   _IO(0x12,98)	/* set read ahead for block device */
+#define BLKRAGET   _IO(0x12,99)	/* get current read ahead setting */
+#define BLKFRASET  _IO(0x12,100)/* set filesystem (mm/filemap.c) read-ahead */
+#define BLKFRAGET  _IO(0x12,101)/* get filesystem (mm/filemap.c) read-ahead */
+#define BLKSECTSET _IO(0x12,102)/* set max sectors per request (ll_rw_blk.c) */
+#define BLKSECTGET _IO(0x12,103)/* get max sectors per request (ll_rw_blk.c) */
+#define BLKSSZGET  _IO(0x12,104)/* get block device sector size */
+#if 0
+#define BLKPG      _IO(0x12,105)/* See blkpg.h */
+
+/* Some people are morons.  Do not use sizeof! */
+
+#define BLKELVGET  _IOR(0x12,106,size_t)/* elevator get */
+#define BLKELVSET  _IOW(0x12,107,size_t)/* elevator set */
+/* This was here just to show that the number is taken -
+   probably all these _IO(0x12,*) ioctls should be moved to blkpg.h. */
+#endif
+/* A jump here: 108-111 have been used for various private purposes. */
+#define BLKBSZGET  _IOR(0x12,112,size_t)
+#define BLKBSZSET  _IOW(0x12,113,size_t)
+#define BLKGETSIZE64 _IOR(0x12,114,size_t)	/* return device size in bytes (u64 *arg) */
+#define BLKTRACESETUP _IOWR(0x12,115,struct blk_user_trace_setup)
+#define BLKTRACESTART _IO(0x12,116)
+#define BLKTRACESTOP _IO(0x12,117)
+#define BLKTRACETEARDOWN _IO(0x12,118)
+#define BLKDISCARD _IO(0x12,119)
+#define BLKIOMIN _IO(0x12,120)
+#define BLKIOOPT _IO(0x12,121)
+#define BLKALIGNOFF _IO(0x12,122)
+#define BLKPBSZGET _IO(0x12,123)
+#define BLKDISCARDZEROES _IO(0x12,124)
+#define BLKSECDISCARD _IO(0x12,125)
+#define BLKROTATIONAL _IO(0x12,126)
+#define BLKZEROOUT _IO(0x12,127)
+/*
+ * A jump here: 130-131 are reserved for zoned block devices
+ * (see uapi/linux/blkzoned.h)
+ */
+
+#define BMAP_IOCTL 1		/* obsolete - kept for compatibility */
+#define FIBMAP	   _IO(0x00,1)	/* bmap access */
+#define FIGETBSZ   _IO(0x00,2)	/* get the block size used for bmap */
+#define FIFREEZE	_IOWR('X', 119, int)	/* Freeze */
+#define FITHAW		_IOWR('X', 120, int)	/* Thaw */
+#define FITRIM		_IOWR('X', 121, struct fstrim_range)	/* Trim */
+#define FICLONE		_IOW(0x94, 9, int)
+#define FICLONERANGE	_IOW(0x94, 13, struct file_clone_range)
+#define FIDEDUPERANGE	_IOWR(0x94, 54, struct file_dedupe_range)
+
+#define FSLABEL_MAX 256	/* Max chars for the interface; each fs may differ */
+
+#define	FS_IOC_GETFLAGS			_IOR('f', 1, long)
+#define	FS_IOC_SETFLAGS			_IOW('f', 2, long)
+#define	FS_IOC_GETVERSION		_IOR('v', 1, long)
+#define	FS_IOC_SETVERSION		_IOW('v', 2, long)
+#define FS_IOC_FIEMAP			_IOWR('f', 11, struct fiemap)
+#define FS_IOC32_GETFLAGS		_IOR('f', 1, int)
+#define FS_IOC32_SETFLAGS		_IOW('f', 2, int)
+#define FS_IOC32_GETVERSION		_IOR('v', 1, int)
+#define FS_IOC32_SETVERSION		_IOW('v', 2, int)
+#define FS_IOC_FSGETXATTR		_IOR('X', 31, struct fsxattr)
+#define FS_IOC_FSSETXATTR		_IOW('X', 32, struct fsxattr)
+#define FS_IOC_GETFSLABEL		_IOR(0x94, 49, char[FSLABEL_MAX])
+#define FS_IOC_SETFSLABEL		_IOW(0x94, 50, char[FSLABEL_MAX])
+
+/*
+ * File system encryption support
+ */
+/* Policy provided via an ioctl on the topmost directory */
+#define FS_KEY_DESCRIPTOR_SIZE	8
+
+#define FS_POLICY_FLAGS_PAD_4		0x00
+#define FS_POLICY_FLAGS_PAD_8		0x01
+#define FS_POLICY_FLAGS_PAD_16		0x02
+#define FS_POLICY_FLAGS_PAD_32		0x03
+#define FS_POLICY_FLAGS_PAD_MASK	0x03
+#define FS_POLICY_FLAGS_VALID		0x03
+
+/* Encryption algorithms */
+#define FS_ENCRYPTION_MODE_INVALID		0
+#define FS_ENCRYPTION_MODE_AES_256_XTS		1
+#define FS_ENCRYPTION_MODE_AES_256_GCM		2
+#define FS_ENCRYPTION_MODE_AES_256_CBC		3
+#define FS_ENCRYPTION_MODE_AES_256_CTS		4
+#define FS_ENCRYPTION_MODE_AES_128_CBC		5
+#define FS_ENCRYPTION_MODE_AES_128_CTS		6
+#define FS_ENCRYPTION_MODE_SPECK128_256_XTS	7
+#define FS_ENCRYPTION_MODE_SPECK128_256_CTS	8
+
+struct fscrypt_policy {
+	__u8 version;
+	__u8 contents_encryption_mode;
+	__u8 filenames_encryption_mode;
+	__u8 flags;
+	__u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE];
+};
+
+#define FS_IOC_SET_ENCRYPTION_POLICY	_IOR('f', 19, struct fscrypt_policy)
+#define FS_IOC_GET_ENCRYPTION_PWSALT	_IOW('f', 20, __u8[16])
+#define FS_IOC_GET_ENCRYPTION_POLICY	_IOW('f', 21, struct fscrypt_policy)
+
+/* Parameters for passing an encryption key into the kernel keyring */
+#define FS_KEY_DESC_PREFIX		"fscrypt:"
+#define FS_KEY_DESC_PREFIX_SIZE		8
+
+/* Structure that userspace passes to the kernel keyring */
+#define FS_MAX_KEY_SIZE			64
+
+struct fscrypt_key {
+	__u32 mode;
+	__u8 raw[FS_MAX_KEY_SIZE];
+	__u32 size;
+};
+
+/*
+ * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS)
+ *
+ * Note: for historical reasons, these flags were originally used and
+ * defined for use by ext2/ext3, and then other file systems started
+ * using these flags so they wouldn't need to write their own version
+ * of chattr/lsattr (which was shipped as part of e2fsprogs).  You
+ * should think twice before trying to use these flags in new
+ * contexts, or trying to assign these flags, since they are used both
+ * as the UAPI and the on-disk encoding for ext2/3/4.  Also, we are
+ * almost out of 32-bit flags.  :-)
+ *
+ * We have recently hoisted FS_IOC_FSGETXATTR / FS_IOC_FSSETXATTR from
+ * XFS to the generic FS level interface.  This uses a structure that
+ * has padding and hence has more room to grow, so it may be more
+ * appropriate for many new use cases.
+ *
+ * Please do not change these flags or interfaces before checking with
+ * linux-fsdevel@vger.kernel.org and linux-api@vger.kernel.org.
+ */
+#define	FS_SECRM_FL			0x00000001 /* Secure deletion */
+#define	FS_UNRM_FL			0x00000002 /* Undelete */
+#define	FS_COMPR_FL			0x00000004 /* Compress file */
+#define FS_SYNC_FL			0x00000008 /* Synchronous updates */
+#define FS_IMMUTABLE_FL			0x00000010 /* Immutable file */
+#define FS_APPEND_FL			0x00000020 /* writes to file may only append */
+#define FS_NODUMP_FL			0x00000040 /* do not dump file */
+#define FS_NOATIME_FL			0x00000080 /* do not update atime */
+/* Reserved for compression usage... */
+#define FS_DIRTY_FL			0x00000100
+#define FS_COMPRBLK_FL			0x00000200 /* One or more compressed clusters */
+#define FS_NOCOMP_FL			0x00000400 /* Don't compress */
+/* End compression flags --- maybe not all used */
+#define FS_ENCRYPT_FL			0x00000800 /* Encrypted file */
+#define FS_BTREE_FL			0x00001000 /* btree format dir */
+#define FS_INDEX_FL			0x00001000 /* hash-indexed directory */
+#define FS_IMAGIC_FL			0x00002000 /* AFS directory */
+#define FS_JOURNAL_DATA_FL		0x00004000 /* Reserved for ext3 */
+#define FS_NOTAIL_FL			0x00008000 /* file tail should not be merged */
+#define FS_DIRSYNC_FL			0x00010000 /* dirsync behaviour (directories only) */
+#define FS_TOPDIR_FL			0x00020000 /* Top of directory hierarchies*/
+#define FS_HUGE_FILE_FL			0x00040000 /* Reserved for ext4 */
+#define FS_EXTENT_FL			0x00080000 /* Extents */
+#define FS_EA_INODE_FL			0x00200000 /* Inode used for large EA */
+#define FS_EOFBLOCKS_FL			0x00400000 /* Reserved for ext4 */
+#define FS_NOCOW_FL			0x00800000 /* Do not cow file */
+#define FS_INLINE_DATA_FL		0x10000000 /* Reserved for ext4 */
+#define FS_PROJINHERIT_FL		0x20000000 /* Create with parents projid */
+#define FS_RESERVED_FL			0x80000000 /* reserved for ext2 lib */
+
+#define FS_FL_USER_VISIBLE		0x0003DFFF /* User visible flags */
+#define FS_FL_USER_MODIFIABLE		0x000380FF /* User modifiable flags */
+
+
+#define SYNC_FILE_RANGE_WAIT_BEFORE	1
+#define SYNC_FILE_RANGE_WRITE		2
+#define SYNC_FILE_RANGE_WAIT_AFTER	4
+
+/*
+ * Flags for preadv2/pwritev2:
+ */
+
+typedef int __bitwise __kernel_rwf_t;
+
+/* high priority request, poll if possible */
+#define RWF_HIPRI	((__force __kernel_rwf_t)0x00000001)
+
+/* per-IO O_DSYNC */
+#define RWF_DSYNC	((__force __kernel_rwf_t)0x00000002)
+
+/* per-IO O_SYNC */
+#define RWF_SYNC	((__force __kernel_rwf_t)0x00000004)
+
+/* per-IO, return -EAGAIN if operation would block */
+#define RWF_NOWAIT	((__force __kernel_rwf_t)0x00000008)
+
+/* per-IO O_APPEND */
+#define RWF_APPEND	((__force __kernel_rwf_t)0x00000010)
+
+/* mask of flags supported by the kernel */
+#define RWF_SUPPORTED	(RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\
+			 RWF_APPEND)
+
+#endif /* _UAPI_LINUX_FS_H */
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index c72cc73a6b09a..9531f7bd7d9bd 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -5,6 +5,7 @@ HEADERS='
 include/uapi/drm/drm.h
 include/uapi/drm/i915_drm.h
 include/uapi/linux/fcntl.h
+include/uapi/linux/fs.h
 include/uapi/linux/kcmp.h
 include/uapi/linux/kvm.h
 include/uapi/linux/in.h
-- 
GitLab


From ceaf8e5b49ce18e9e58d292130e92150d01a34e6 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 24 Oct 2018 14:54:55 -0300
Subject: [PATCH 0323/1890] perf beauty: Add a generator for MS_ mount/umount's
 flag constants

It'll use tools/include copy of linux/fs.h to generate a table to be
used by tools, initially by the 'mount' and 'umount' beautifiers in
'perf trace', but that could also be used to translate from a string
constant to the integer value to be used in a eBPF or tracefs tracepoint
filter.

When used without any args it produces:

  $ tools/perf/trace/beauty/mount_flags.sh
  static const char *mount_flags[] = {
	[1 ? (ilog2(1) + 1) : 0] = "RDONLY",
	[2 ? (ilog2(2) + 1) : 0] = "NOSUID",
	[4 ? (ilog2(4) + 1) : 0] = "NODEV",
	[8 ? (ilog2(8) + 1) : 0] = "NOEXEC",
	[16 ? (ilog2(16) + 1) : 0] = "SYNCHRONOUS",
	[32 ? (ilog2(32) + 1) : 0] = "REMOUNT",
	[64 ? (ilog2(64) + 1) : 0] = "MANDLOCK",
	[128 ? (ilog2(128) + 1) : 0] = "DIRSYNC",
	[1024 ? (ilog2(1024) + 1) : 0] = "NOATIME",
	[2048 ? (ilog2(2048) + 1) : 0] = "NODIRATIME",
	[4096 ? (ilog2(4096) + 1) : 0] = "BIND",
	[8192 ? (ilog2(8192) + 1) : 0] = "MOVE",
	[16384 ? (ilog2(16384) + 1) : 0] = "REC",
	[32768 ? (ilog2(32768) + 1) : 0] = "SILENT",
	[16 + 1] = "POSIXACL",
	[17 + 1] = "UNBINDABLE",
	[18 + 1] = "PRIVATE",
	[19 + 1] = "SLAVE",
	[20 + 1] = "SHARED",
	[21 + 1] = "RELATIME",
	[22 + 1] = "KERNMOUNT",
	[23 + 1] = "I_VERSION",
	[24 + 1] = "STRICTATIME",
	[25 + 1] = "LAZYTIME",
	[26 + 1] = "SUBMOUNT",
	[27 + 1] = "NOREMOTELOCK",
	[28 + 1] = "NOSEC",
	[29 + 1] = "BORN",
	[30 + 1] = "ACTIVE",
	[31 + 1] = "NOUSER",
  };
  $

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Benjamin Peterson <benjamin@python.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-mgutbbkmip9gfnmd28ikg7xt@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/trace/beauty/mount_flags.sh | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
 create mode 100755 tools/perf/trace/beauty/mount_flags.sh

diff --git a/tools/perf/trace/beauty/mount_flags.sh b/tools/perf/trace/beauty/mount_flags.sh
new file mode 100755
index 0000000000000..45547573a1dbb
--- /dev/null
+++ b/tools/perf/trace/beauty/mount_flags.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
+
+[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
+
+printf "static const char *mount_flags[] = {\n"
+regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MS_([[:alnum:]_]+)[[:space:]]+([[:digit:]]+)[[:space:]]*.*'
+egrep $regex ${header_dir}/fs.h | egrep -v '(MSK|VERBOSE|MGC_VAL)\>' | \
+	sed -r "s/$regex/\2 \2 \1/g" | sort -n | \
+	xargs printf "\t[%s ? (ilog2(%s) + 1) : 0] = \"%s\",\n"
+regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MS_([[:alnum:]_]+)[[:space:]]+\(1<<([[:digit:]]+)\)[[:space:]]*.*'
+egrep $regex ${header_dir}/fs.h | \
+	sed -r "s/$regex/\2 \1/g" | \
+	xargs printf "\t[%s + 1] = \"%s\",\n"
+printf "};\n"
-- 
GitLab


From 794f594e0c3be6199e0b3e2324280e8785806fb6 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 24 Oct 2018 15:54:23 -0300
Subject: [PATCH 0324/1890] perf beauty: Switch from GPL v2.0 to LGPL v2.1

The intention is to have this as a library, since it is not perf
specific at all.

I did the switch for the files where I'm the only contributor, with the
exception of a few lines changed by Jiri Olsa.

Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-a04q6chdyjknm1hr305ulx8h@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/trace/beauty/clone.c                     | 3 +--
 tools/perf/trace/beauty/drm_ioctl.sh                | 1 +
 tools/perf/trace/beauty/eventfd.c                   | 2 +-
 tools/perf/trace/beauty/fcntl.c                     | 3 +--
 tools/perf/trace/beauty/flock.c                     | 2 +-
 tools/perf/trace/beauty/futex_op.c                  | 2 +-
 tools/perf/trace/beauty/futex_val3.c                | 2 +-
 tools/perf/trace/beauty/ioctl.c                     | 3 +--
 tools/perf/trace/beauty/kcmp.c                      | 3 +--
 tools/perf/trace/beauty/kcmp_type.sh                | 1 +
 tools/perf/trace/beauty/kvm_ioctl.sh                | 1 +
 tools/perf/trace/beauty/madvise_behavior.sh         | 1 +
 tools/perf/trace/beauty/mmap.c                      | 2 +-
 tools/perf/trace/beauty/mode_t.c                    | 2 +-
 tools/perf/trace/beauty/msg_flags.c                 | 2 +-
 tools/perf/trace/beauty/open_flags.c                | 2 +-
 tools/perf/trace/beauty/perf_event_open.c           | 2 +-
 tools/perf/trace/beauty/perf_ioctl.sh               | 1 +
 tools/perf/trace/beauty/pid.c                       | 3 ++-
 tools/perf/trace/beauty/pkey_alloc.c                | 3 +--
 tools/perf/trace/beauty/pkey_alloc_access_rights.sh | 1 +
 tools/perf/trace/beauty/prctl.c                     | 3 +--
 tools/perf/trace/beauty/prctl_option.sh             | 1 +
 tools/perf/trace/beauty/sched_policy.c              | 2 +-
 tools/perf/trace/beauty/seccomp.c                   | 2 +-
 tools/perf/trace/beauty/signum.c                    | 2 +-
 tools/perf/trace/beauty/sndrv_ctl_ioctl.sh          | 1 +
 tools/perf/trace/beauty/sndrv_pcm_ioctl.sh          | 1 +
 tools/perf/trace/beauty/sockaddr.c                  | 2 +-
 tools/perf/trace/beauty/socket.c                    | 2 +-
 tools/perf/trace/beauty/socket_ipproto.sh           | 1 +
 tools/perf/trace/beauty/socket_type.c               | 2 +-
 tools/perf/trace/beauty/statx.c                     | 3 +--
 tools/perf/trace/beauty/vhost_virtio_ioctl.sh       | 1 +
 tools/perf/trace/beauty/waitid_options.c            | 2 +-
 35 files changed, 36 insertions(+), 31 deletions(-)

diff --git a/tools/perf/trace/beauty/clone.c b/tools/perf/trace/beauty/clone.c
index d64d049ab9915..010406500c304 100644
--- a/tools/perf/trace/beauty/clone.c
+++ b/tools/perf/trace/beauty/clone.c
@@ -1,9 +1,8 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  * trace/beauty/cone.c
  *
  *  Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
- *
- * Released under the GPL v2. (and only v2, not any later version)
  */
 
 #include "trace/beauty/beauty.h"
diff --git a/tools/perf/trace/beauty/drm_ioctl.sh b/tools/perf/trace/beauty/drm_ioctl.sh
index 9d3816815e60f..9aa94fd523a9c 100755
--- a/tools/perf/trace/beauty/drm_ioctl.sh
+++ b/tools/perf/trace/beauty/drm_ioctl.sh
@@ -1,4 +1,5 @@
 #!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
 
 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/drm/
 
diff --git a/tools/perf/trace/beauty/eventfd.c b/tools/perf/trace/beauty/eventfd.c
index 5d6a477a64002..db5b9b4921137 100644
--- a/tools/perf/trace/beauty/eventfd.c
+++ b/tools/perf/trace/beauty/eventfd.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #ifndef EFD_SEMAPHORE
 #define EFD_SEMAPHORE		1
 #endif
diff --git a/tools/perf/trace/beauty/fcntl.c b/tools/perf/trace/beauty/fcntl.c
index 9e8900c13cb13..e6de31674e246 100644
--- a/tools/perf/trace/beauty/fcntl.c
+++ b/tools/perf/trace/beauty/fcntl.c
@@ -1,9 +1,8 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  * trace/beauty/fcntl.c
  *
  *  Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
- *
- * Released under the GPL v2. (and only v2, not any later version)
  */
 
 #include "trace/beauty/beauty.h"
diff --git a/tools/perf/trace/beauty/flock.c b/tools/perf/trace/beauty/flock.c
index c4ff6ad30b062..cf02ae5f0ba66 100644
--- a/tools/perf/trace/beauty/flock.c
+++ b/tools/perf/trace/beauty/flock.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 
 #include "trace/beauty/beauty.h"
 #include <linux/kernel.h>
diff --git a/tools/perf/trace/beauty/futex_op.c b/tools/perf/trace/beauty/futex_op.c
index 61850fbc85ff3..1136bde56406e 100644
--- a/tools/perf/trace/beauty/futex_op.c
+++ b/tools/perf/trace/beauty/futex_op.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #include <linux/futex.h>
 
 #ifndef FUTEX_WAIT_BITSET
diff --git a/tools/perf/trace/beauty/futex_val3.c b/tools/perf/trace/beauty/futex_val3.c
index 26f6b3253511e..138b7d588a708 100644
--- a/tools/perf/trace/beauty/futex_val3.c
+++ b/tools/perf/trace/beauty/futex_val3.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #include <linux/futex.h>
 
 #ifndef FUTEX_BITSET_MATCH_ANY
diff --git a/tools/perf/trace/beauty/ioctl.c b/tools/perf/trace/beauty/ioctl.c
index 1be3b4cf08270..5d2a7fd8d4077 100644
--- a/tools/perf/trace/beauty/ioctl.c
+++ b/tools/perf/trace/beauty/ioctl.c
@@ -1,9 +1,8 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  * trace/beauty/ioctl.c
  *
  *  Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
- *
- * Released under the GPL v2. (and only v2, not any later version)
  */
 
 #include "trace/beauty/beauty.h"
diff --git a/tools/perf/trace/beauty/kcmp.c b/tools/perf/trace/beauty/kcmp.c
index f62040eb9d5c5..b276a274f2030 100644
--- a/tools/perf/trace/beauty/kcmp.c
+++ b/tools/perf/trace/beauty/kcmp.c
@@ -1,9 +1,8 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  * trace/beauty/kcmp.c
  *
  *  Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
- *
- * Released under the GPL v2. (and only v2, not any later version)
  */
 
 #include "trace/beauty/beauty.h"
diff --git a/tools/perf/trace/beauty/kcmp_type.sh b/tools/perf/trace/beauty/kcmp_type.sh
index a3c304caa3365..df8b17486d575 100755
--- a/tools/perf/trace/beauty/kcmp_type.sh
+++ b/tools/perf/trace/beauty/kcmp_type.sh
@@ -1,4 +1,5 @@
 #!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
 
 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
 
diff --git a/tools/perf/trace/beauty/kvm_ioctl.sh b/tools/perf/trace/beauty/kvm_ioctl.sh
index c4699fd46bb64..4ce54f5bf7564 100755
--- a/tools/perf/trace/beauty/kvm_ioctl.sh
+++ b/tools/perf/trace/beauty/kvm_ioctl.sh
@@ -1,4 +1,5 @@
 #!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
 
 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
 
diff --git a/tools/perf/trace/beauty/madvise_behavior.sh b/tools/perf/trace/beauty/madvise_behavior.sh
index 431639eb4d29a..4527d290cdfc6 100755
--- a/tools/perf/trace/beauty/madvise_behavior.sh
+++ b/tools/perf/trace/beauty/madvise_behavior.sh
@@ -1,4 +1,5 @@
 #!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
 
 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/asm-generic/
 
diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c
index 9f68077b241b9..0605593552c6d 100644
--- a/tools/perf/trace/beauty/mmap.c
+++ b/tools/perf/trace/beauty/mmap.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #include <uapi/linux/mman.h>
 
 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
diff --git a/tools/perf/trace/beauty/mode_t.c b/tools/perf/trace/beauty/mode_t.c
index d929ad7dd97be..6879d36d30048 100644
--- a/tools/perf/trace/beauty/mode_t.c
+++ b/tools/perf/trace/beauty/mode_t.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <unistd.h>
diff --git a/tools/perf/trace/beauty/msg_flags.c b/tools/perf/trace/beauty/msg_flags.c
index c064d6aae6597..1b9d6306d2749 100644
--- a/tools/perf/trace/beauty/msg_flags.c
+++ b/tools/perf/trace/beauty/msg_flags.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #include <sys/types.h>
 #include <sys/socket.h>
 
diff --git a/tools/perf/trace/beauty/open_flags.c b/tools/perf/trace/beauty/open_flags.c
index 6aec6178a99dc..cc673fec9184d 100644
--- a/tools/perf/trace/beauty/open_flags.c
+++ b/tools/perf/trace/beauty/open_flags.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
diff --git a/tools/perf/trace/beauty/perf_event_open.c b/tools/perf/trace/beauty/perf_event_open.c
index 2bafd7c995fff..981185c1974ba 100644
--- a/tools/perf/trace/beauty/perf_event_open.c
+++ b/tools/perf/trace/beauty/perf_event_open.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #ifndef PERF_FLAG_FD_NO_GROUP
 # define PERF_FLAG_FD_NO_GROUP		(1UL << 0)
 #endif
diff --git a/tools/perf/trace/beauty/perf_ioctl.sh b/tools/perf/trace/beauty/perf_ioctl.sh
index 6492c74df928d..9aabd9743ef6e 100755
--- a/tools/perf/trace/beauty/perf_ioctl.sh
+++ b/tools/perf/trace/beauty/perf_ioctl.sh
@@ -1,4 +1,5 @@
 #!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
 
 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
 
diff --git a/tools/perf/trace/beauty/pid.c b/tools/perf/trace/beauty/pid.c
index 0313df3428304..1a6acc46807bc 100644
--- a/tools/perf/trace/beauty/pid.c
+++ b/tools/perf/trace/beauty/pid.c
@@ -1,4 +1,5 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
+
 size_t syscall_arg__scnprintf_pid(char *bf, size_t size, struct syscall_arg *arg)
 {
 	int pid = arg->val;
diff --git a/tools/perf/trace/beauty/pkey_alloc.c b/tools/perf/trace/beauty/pkey_alloc.c
index 2ba784a3734ad..d3e5188a9befa 100644
--- a/tools/perf/trace/beauty/pkey_alloc.c
+++ b/tools/perf/trace/beauty/pkey_alloc.c
@@ -1,9 +1,8 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  * trace/beauty/pkey_alloc.c
  *
  *  Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
- *
- * Released under the GPL v2. (and only v2, not any later version)
  */
 
 #include "trace/beauty/beauty.h"
diff --git a/tools/perf/trace/beauty/pkey_alloc_access_rights.sh b/tools/perf/trace/beauty/pkey_alloc_access_rights.sh
index e0a51aeb20b21..f8f1b560cf8a4 100755
--- a/tools/perf/trace/beauty/pkey_alloc_access_rights.sh
+++ b/tools/perf/trace/beauty/pkey_alloc_access_rights.sh
@@ -1,4 +1,5 @@
 #!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
 
 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/asm-generic/
 
diff --git a/tools/perf/trace/beauty/prctl.c b/tools/perf/trace/beauty/prctl.c
index 246130dad6c41..be7a5d3959757 100644
--- a/tools/perf/trace/beauty/prctl.c
+++ b/tools/perf/trace/beauty/prctl.c
@@ -1,9 +1,8 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  * trace/beauty/prctl.c
  *
  *  Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
- *
- * Released under the GPL v2. (and only v2, not any later version)
  */
 
 #include "trace/beauty/beauty.h"
diff --git a/tools/perf/trace/beauty/prctl_option.sh b/tools/perf/trace/beauty/prctl_option.sh
index f24722146ebef..d32f8f1124af0 100755
--- a/tools/perf/trace/beauty/prctl_option.sh
+++ b/tools/perf/trace/beauty/prctl_option.sh
@@ -1,4 +1,5 @@
 #!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
 
 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
 
diff --git a/tools/perf/trace/beauty/sched_policy.c b/tools/perf/trace/beauty/sched_policy.c
index ba5096ae76b60..48f2b5c9aa3ed 100644
--- a/tools/perf/trace/beauty/sched_policy.c
+++ b/tools/perf/trace/beauty/sched_policy.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #include <sched.h>
 
 /*
diff --git a/tools/perf/trace/beauty/seccomp.c b/tools/perf/trace/beauty/seccomp.c
index b7097fd5fed9e..e36156b19c708 100644
--- a/tools/perf/trace/beauty/seccomp.c
+++ b/tools/perf/trace/beauty/seccomp.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #ifndef SECCOMP_SET_MODE_STRICT
 #define SECCOMP_SET_MODE_STRICT 0
 #endif
diff --git a/tools/perf/trace/beauty/signum.c b/tools/perf/trace/beauty/signum.c
index bde18a53f0909..587fec545b8a6 100644
--- a/tools/perf/trace/beauty/signum.c
+++ b/tools/perf/trace/beauty/signum.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #include <signal.h>
 
 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
diff --git a/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh b/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh
index eb511bb5fbd32..e0803b9575932 100755
--- a/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh
+++ b/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh
@@ -1,4 +1,5 @@
 #!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
 
 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/sound/
 
diff --git a/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh b/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh
index 6818392968b24..7a464a7bf9139 100755
--- a/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh
+++ b/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh
@@ -1,4 +1,5 @@
 #!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
 
 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/sound/
 
diff --git a/tools/perf/trace/beauty/sockaddr.c b/tools/perf/trace/beauty/sockaddr.c
index 71a79f72d9d92..9410ad230f101 100644
--- a/tools/perf/trace/beauty/sockaddr.c
+++ b/tools/perf/trace/beauty/sockaddr.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 // Copyright (C) 2018, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
 
 #include "trace/beauty/beauty.h"
diff --git a/tools/perf/trace/beauty/socket.c b/tools/perf/trace/beauty/socket.c
index 65227269384b1..d971a25964174 100644
--- a/tools/perf/trace/beauty/socket.c
+++ b/tools/perf/trace/beauty/socket.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 /*
  * trace/beauty/socket.c
  *
diff --git a/tools/perf/trace/beauty/socket_ipproto.sh b/tools/perf/trace/beauty/socket_ipproto.sh
index a3cc24633bec5..de0f2f29017f2 100755
--- a/tools/perf/trace/beauty/socket_ipproto.sh
+++ b/tools/perf/trace/beauty/socket_ipproto.sh
@@ -1,4 +1,5 @@
 #!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
 
 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
 
diff --git a/tools/perf/trace/beauty/socket_type.c b/tools/perf/trace/beauty/socket_type.c
index bca26aef4a77a..a63a9a332aa0f 100644
--- a/tools/perf/trace/beauty/socket_type.c
+++ b/tools/perf/trace/beauty/socket_type.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #include <sys/types.h>
 #include <sys/socket.h>
 
diff --git a/tools/perf/trace/beauty/statx.c b/tools/perf/trace/beauty/statx.c
index 5643b692af4cf..630f2760dd666 100644
--- a/tools/perf/trace/beauty/statx.c
+++ b/tools/perf/trace/beauty/statx.c
@@ -1,9 +1,8 @@
+// SPDX-License-Identifier: LGPL-2.1
 /*
  * trace/beauty/statx.c
  *
  *  Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
- *
- * Released under the GPL v2. (and only v2, not any later version)
  */
 
 #include "trace/beauty/beauty.h"
diff --git a/tools/perf/trace/beauty/vhost_virtio_ioctl.sh b/tools/perf/trace/beauty/vhost_virtio_ioctl.sh
index 0f6a5197d0bed..439773daaf77d 100755
--- a/tools/perf/trace/beauty/vhost_virtio_ioctl.sh
+++ b/tools/perf/trace/beauty/vhost_virtio_ioctl.sh
@@ -1,4 +1,5 @@
 #!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
 
 [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
 
diff --git a/tools/perf/trace/beauty/waitid_options.c b/tools/perf/trace/beauty/waitid_options.c
index 8465281a093de..42ff58ad613b8 100644
--- a/tools/perf/trace/beauty/waitid_options.c
+++ b/tools/perf/trace/beauty/waitid_options.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: LGPL-2.1
 #include <sys/types.h>
 #include <sys/wait.h>
 
-- 
GitLab


From 579e5ff629b17744f62473f314768de8b6f1e66c Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 25 Oct 2018 14:21:31 -0300
Subject: [PATCH 0325/1890] perf beauty: Introduce strarray__scnprintf_flags()

Generalizing pkey_alloc__scnprintf_access_rights(), so that we can use
it with other flags-like arguments, such as mount's mountflags argument.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Benjamin Peterson <benjamin@python.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-o3ymi3104m8moaz9865g09w9@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/trace/beauty/beauty.h     |  1 +
 tools/perf/trace/beauty/pkey_alloc.c | 27 ++++++++++++++++-----------
 2 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h
index 2570152d39097..a1806c4f3ccd8 100644
--- a/tools/perf/trace/beauty/beauty.h
+++ b/tools/perf/trace/beauty/beauty.h
@@ -24,6 +24,7 @@ struct strarray {
 }
 
 size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, int val);
+size_t strarray__scnprintf_flags(struct strarray *sa, char *bf, size_t size, unsigned long flags);
 
 struct trace;
 struct thread;
diff --git a/tools/perf/trace/beauty/pkey_alloc.c b/tools/perf/trace/beauty/pkey_alloc.c
index d3e5188a9befa..1b8ed4cac8153 100644
--- a/tools/perf/trace/beauty/pkey_alloc.c
+++ b/tools/perf/trace/beauty/pkey_alloc.c
@@ -9,31 +9,28 @@
 #include <linux/kernel.h>
 #include <linux/log2.h>
 
-static size_t pkey_alloc__scnprintf_access_rights(int access_rights, char *bf, size_t size)
+size_t strarray__scnprintf_flags(struct strarray *sa, char *bf, size_t size, unsigned long flags)
 {
 	int i, printed = 0;
 
-#include "trace/beauty/generated/pkey_alloc_access_rights_array.c"
-	static DEFINE_STRARRAY(pkey_alloc_access_rights);
-
-	if (access_rights == 0) {
-		const char *s = strarray__pkey_alloc_access_rights.entries[0];
+	if (flags == 0) {
+		const char *s = sa->entries[0];
 		if (s)
 			return scnprintf(bf, size, "%s", s);
 		return scnprintf(bf, size, "%d", 0);
 	}
 
-	for (i = 1; i < strarray__pkey_alloc_access_rights.nr_entries; ++i) {
-		int bit = 1 << (i - 1);
+	for (i = 1; i < sa->nr_entries; ++i) {
+		unsigned long bit = 1UL << (i - 1);
 
-		if (!(access_rights & bit))
+		if (!(flags & bit))
 			continue;
 
 		if (printed != 0)
 			printed += scnprintf(bf + printed, size - printed, "|");
 
-		if (strarray__pkey_alloc_access_rights.entries[i] != NULL)
-			printed += scnprintf(bf + printed, size - printed, "%s", strarray__pkey_alloc_access_rights.entries[i]);
+		if (sa->entries[i] != NULL)
+			printed += scnprintf(bf + printed, size - printed, "%s", sa->entries[i]);
 		else
 			printed += scnprintf(bf + printed, size - printed, "0x%#", bit);
 	}
@@ -41,6 +38,14 @@ static size_t pkey_alloc__scnprintf_access_rights(int access_rights, char *bf, s
 	return printed;
 }
 
+static size_t pkey_alloc__scnprintf_access_rights(int access_rights, char *bf, size_t size)
+{
+#include "trace/beauty/generated/pkey_alloc_access_rights_array.c"
+	static DEFINE_STRARRAY(pkey_alloc_access_rights);
+
+	return strarray__scnprintf_flags(&strarray__pkey_alloc_access_rights, bf, size, access_rights);
+}
+
 size_t syscall_arg__scnprintf_pkey_alloc_access_rights(char *bf, size_t size, struct syscall_arg *arg)
 {
 	unsigned long cmd = arg->val;
-- 
GitLab


From 496fd346b71ffa0ff35623e9ec79df0bad47bd66 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 25 Oct 2018 16:09:47 -0300
Subject: [PATCH 0326/1890] perf trace beauty: Allow syscalls to mask an
 argument before considering it

Take mount's 'flags' arg, to cope with this semantic, as defined in do_mount in fs/namespace.c:

  /*
   * Pre-0.97 versions of mount() didn't have a flags word.  When the
   * flags word was introduced its top half was required to have the
   * magic value 0xC0ED, and this remained so until 2.4.0-test9.
   * Therefore, if this magic number is present, it carries no
   * information and must be discarded.
   */

We need to mask this arg, and then see if it is zero, when we simply
don't print the arg name and value.

The next patch will use this for mount's 'flag' arg.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Benjamin Peterson <benjamin@python.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-btue14k5jemayuykfrwsnh85@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-trace.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 7081d7ea12e5d..14fb63e17de56 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -614,6 +614,7 @@ static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
 
 struct syscall_arg_fmt {
 	size_t	   (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
+	unsigned long (*mask_val)(struct syscall_arg *arg, unsigned long val);
 	void	   *parm;
 	const char *name;
 	bool	   show_zero;
@@ -1487,6 +1488,19 @@ static size_t syscall__scnprintf_name(struct syscall *sc, char *bf, size_t size,
 	return scnprintf(bf, size, "arg%d: ", arg->idx);
 }
 
+/*
+ * Check if the value is in fact zero, i.e. mask whatever needs masking, such
+ * as mount 'flags' argument that needs ignoring some magic flag, see comment
+ * in tools/perf/trace/beauty/mount_flags.c
+ */
+static unsigned long syscall__mask_val(struct syscall *sc, struct syscall_arg *arg, unsigned long val)
+{
+	if (sc->arg_fmt && sc->arg_fmt[arg->idx].mask_val)
+		return sc->arg_fmt[arg->idx].mask_val(arg, val);
+
+	return val;
+}
+
 static size_t syscall__scnprintf_val(struct syscall *sc, char *bf, size_t size,
 				     struct syscall_arg *arg, unsigned long val)
 {
@@ -1535,6 +1549,11 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
 				continue;
 
 			val = syscall_arg__val(&arg, arg.idx);
+			/*
+			 * Some syscall args need some mask, most don't and
+			 * return val untouched.
+			 */
+			val = syscall__mask_val(sc, &arg, val);
 
 			/*
  			 * Suppress this argument if its value is zero and
-- 
GitLab


From 73d141adcea66de656d0c8336811f2b0bbd9700c Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 25 Oct 2018 15:18:06 -0300
Subject: [PATCH 0327/1890] perf trace beauty: Beautify mount/umount's 'flags'
 argument

  # trace -e mount mount -o ro -t debugfs nodev /mnt
     0.000 ( 1.040 ms): mount/27235 mount(dev_name: 0x5601cc8c64e0, dir_name: 0x5601cc8c6500, type: 0x5601cc8c6480, flags: RDONLY) = 0
  # trace -e mount mount -o remount,relatime -t debugfs nodev /mnt
     0.000 ( 2.946 ms): mount/27262 mount(dev_name: 0x55f4a73d64e0, dir_name: 0x55f4a73d6500, type: 0x55f4a73d6480, flags: REMOUNT|RELATIME) = 0
  # trace -e mount mount -o remount,strictatime -t debugfs nodev /mnt
     0.000 ( 2.934 ms): mount/27265 mount(dev_name: 0x5617f71d94e0, dir_name: 0x5617f71d9500, type: 0x5617f71d9480, flags: REMOUNT|STRICTATIME) = 0
  # trace -e mount mount -o remount,suid,silent -t debugfs nodev /mnt
     0.000 ( 0.049 ms): mount/27273 mount(dev_name: 0x55ad65df24e0, dir_name: 0x55ad65df2500, type: 0x55ad65df2480, flags: REMOUNT|SILENT) = 0
  # trace -e mount mount -o remount,rw,sync,lazytime -t debugfs nodev /mnt
     0.000 ( 2.684 ms): mount/27281 mount(dev_name: 0x561216055530, dir_name: 0x561216055550, type: 0x561216055510, flags: SYNCHRONOUS|REMOUNT|LAZYTIME) = 0
  # trace -e mount mount -o remount,dirsync -t debugfs nodev /mnt
     0.000 ( 3.512 ms): mount/27314 mount(dev_name: 0x55c4e7188480, dir_name: 0x55c4e7188530, type: 0x55c4e71884a0, flags: REMOUNT|DIRSYNC, data: 0x55c4e71884e0) = 0
  #

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Benjamin Peterson <benjamin@python.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-i5ncao73c0bd02qprgrq6wb9@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Makefile.perf              |  8 +++++
 tools/perf/builtin-trace.c            |  3 ++
 tools/perf/trace/beauty/Build         |  1 +
 tools/perf/trace/beauty/beauty.h      |  6 ++++
 tools/perf/trace/beauty/mount_flags.c | 43 +++++++++++++++++++++++++++
 5 files changed, 61 insertions(+)
 create mode 100644 tools/perf/trace/beauty/mount_flags.c

diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 2f3bf025e3050..a31c5c29d53ab 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -460,6 +460,12 @@ madvise_behavior_tbl := $(srctree)/tools/perf/trace/beauty/madvise_behavior.sh
 $(madvise_behavior_array): $(madvise_hdr_dir)/mman-common.h $(madvise_behavior_tbl)
 	$(Q)$(SHELL) '$(madvise_behavior_tbl)' $(madvise_hdr_dir) > $@
 
+mount_flags_array := $(beauty_outdir)/mount_flags_array.c
+mount_flags_tbl := $(srctree)/tools/perf/trace/beauty/mount_flags.sh
+
+$(mount_flags_array): $(linux_uapi_dir)/fs.h $(mount_flags_tbl)
+	$(Q)$(SHELL) '$(mount_flags_tbl)' $(linux_uapi_dir) > $@
+
 prctl_option_array := $(beauty_outdir)/prctl_option_array.c
 prctl_hdr_dir := $(srctree)/tools/include/uapi/linux/
 prctl_option_tbl := $(srctree)/tools/perf/trace/beauty/prctl_option.sh
@@ -577,6 +583,7 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc
 	$(socket_ipproto_array) \
 	$(vhost_virtio_ioctl_array) \
 	$(madvise_behavior_array) \
+	$(mount_flags_array) \
 	$(perf_ioctl_array) \
 	$(prctl_option_array) \
 	$(arch_errno_name_array)
@@ -863,6 +870,7 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea
 		$(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \
 		$(OUTPUT)pmu-events/pmu-events.c \
 		$(OUTPUT)$(madvise_behavior_array) \
+		$(OUTPUT)$(mount_flags_array) \
 		$(OUTPUT)$(drm_ioctl_array) \
 		$(OUTPUT)$(pkey_alloc_access_rights_array) \
 		$(OUTPUT)$(sndrv_ctl_ioctl_array) \
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 14fb63e17de56..76c14c0129fca 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -726,6 +726,9 @@ static struct syscall_fmt {
 	  .arg = { [0] = { .scnprintf = SCA_HEX,	/* addr */ },
 		   [2] = { .scnprintf = SCA_MMAP_PROT,	/* prot */ },
 		   [3] = { .scnprintf = SCA_MMAP_FLAGS,	/* flags */ }, }, },
+	{ .name	    = "mount",
+	  .arg = { [3] = { .scnprintf = SCA_MOUNT_FLAGS, /* flags */
+			   .mask_val  = SCAMV_MOUNT_FLAGS, /* flags */ }, }, },
 	{ .name	    = "mprotect",
 	  .arg = { [0] = { .scnprintf = SCA_HEX,	/* start */ },
 		   [2] = { .scnprintf = SCA_MMAP_PROT,	/* prot */ }, }, },
diff --git a/tools/perf/trace/beauty/Build b/tools/perf/trace/beauty/Build
index c3b0afd67760a..3043130732427 100644
--- a/tools/perf/trace/beauty/Build
+++ b/tools/perf/trace/beauty/Build
@@ -5,6 +5,7 @@ ifeq ($(SRCARCH),$(filter $(SRCARCH),x86))
 libperf-y += ioctl.o
 endif
 libperf-y += kcmp.o
+libperf-y += mount_flags.o
 libperf-y += pkey_alloc.o
 libperf-y += prctl.o
 libperf-y += sockaddr.o
diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h
index a1806c4f3ccd8..039c29039b2c4 100644
--- a/tools/perf/trace/beauty/beauty.h
+++ b/tools/perf/trace/beauty/beauty.h
@@ -123,6 +123,12 @@ size_t syscall_arg__scnprintf_kcmp_type(char *bf, size_t size, struct syscall_ar
 size_t syscall_arg__scnprintf_kcmp_idx(char *bf, size_t size, struct syscall_arg *arg);
 #define SCA_KCMP_IDX syscall_arg__scnprintf_kcmp_idx
 
+unsigned long syscall_arg__mask_val_mount_flags(struct syscall_arg *arg, unsigned long flags);
+#define SCAMV_MOUNT_FLAGS syscall_arg__mask_val_mount_flags
+
+size_t syscall_arg__scnprintf_mount_flags(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_MOUNT_FLAGS syscall_arg__scnprintf_mount_flags
+
 size_t syscall_arg__scnprintf_pkey_alloc_access_rights(char *bf, size_t size, struct syscall_arg *arg);
 #define SCA_PKEY_ALLOC_ACCESS_RIGHTS syscall_arg__scnprintf_pkey_alloc_access_rights
 
diff --git a/tools/perf/trace/beauty/mount_flags.c b/tools/perf/trace/beauty/mount_flags.c
new file mode 100644
index 0000000000000..712935c6620a5
--- /dev/null
+++ b/tools/perf/trace/beauty/mount_flags.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * trace/beauty/mount_flags.c
+ *
+ *  Copyright (C) 2018, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ */
+
+#include "trace/beauty/beauty.h"
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <sys/mount.h>
+
+static size_t mount__scnprintf_flags(unsigned long flags, char *bf, size_t size)
+{
+#include "trace/beauty/generated/mount_flags_array.c"
+	static DEFINE_STRARRAY(mount_flags);
+
+	return strarray__scnprintf_flags(&strarray__mount_flags, bf, size, flags);
+}
+
+unsigned long syscall_arg__mask_val_mount_flags(struct syscall_arg *arg __maybe_unused, unsigned long flags)
+{
+	// do_mount in fs/namespace.c:
+	/*
+	 * Pre-0.97 versions of mount() didn't have a flags word.  When the
+	 * flags word was introduced its top half was required to have the
+	 * magic value 0xC0ED, and this remained so until 2.4.0-test9.
+	 * Therefore, if this magic number is present, it carries no
+	 * information and must be discarded.
+	 */
+	if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
+		flags &= ~MS_MGC_MSK;
+
+	return flags;
+}
+
+size_t syscall_arg__scnprintf_mount_flags(char *bf, size_t size, struct syscall_arg *arg)
+{
+	unsigned long flags = arg->val;
+
+	return mount__scnprintf_flags(flags, bf, size);
+}
-- 
GitLab


From f932184e282f574cfd34afee917a10b782fd3e76 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 25 Oct 2018 17:24:45 -0300
Subject: [PATCH 0328/1890] perf trace: Consider syscall aliases too

When trying to trace the 'umount' syscall on x86_64 I noticed that it
was failing:

  # trace -e umount umount /mnt
  event syntax error: 'umount'
                       \___ parser error
  Run 'perf list' for a list of valid events

   Usage: perf trace [<options>] [<command>]
      or: perf trace [<options>] -- <command> [<options>]
      or: perf trace record [<options>] [<command>]
      or: perf trace record [<options>] -- <command> [<options>]

      -e, --event <event>   event/syscall selector. use 'perf list' to list available events
  #

This is because in the x86-64 we have it just as 'umount2':

  $ grep umount arch/x86/entry/syscalls/syscall_64.tbl
  166	common	umount2			__x64_sys_umount
  $

So if the syscall name fails, try fallbacking to looking at the aliases
we have in the syscall_fmts table to then re-lookup, now:

  # trace -e umount umount -f /mnt
  umount: /mnt: not mounted.
     1.759 ( 0.004 ms): umount/18365 umount2(name: 0x55fbfcbc4480, flags: 1) = -1 EINVAL Invalid argument
  #

Time to beautify the flags arg :-)

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Benjamin Peterson <benjamin@python.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-ukweodgzbmjd25lfkgryeft1@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-trace.c | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 76c14c0129fca..db8711061ca3b 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -862,6 +862,18 @@ static struct syscall_fmt *syscall_fmt__find(const char *name)
 	return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
 }
 
+static struct syscall_fmt *syscall_fmt__find_by_alias(const char *alias)
+{
+	int i, nmemb = ARRAY_SIZE(syscall_fmts);
+
+	for (i = 0; i < nmemb; ++i) {
+		if (syscall_fmts[i].alias && strcmp(syscall_fmts[i].alias, alias) == 0)
+			return &syscall_fmts[i];
+	}
+
+	return NULL;
+}
+
 /*
  * is_exit: is this "exit" or "exit_group"?
  * is_open: is this "open" or "openat"? To associate the fd returned in sys_exit with the pathname in sys_enter.
@@ -3195,6 +3207,7 @@ static int trace__parse_events_option(const struct option *opt, const char *str,
 	int len = strlen(str) + 1, err = -1, list, idx;
 	char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
 	char group_name[PATH_MAX];
+	struct syscall_fmt *fmt;
 
 	if (strace_groups_dir == NULL)
 		return -1;
@@ -3212,12 +3225,19 @@ static int trace__parse_events_option(const struct option *opt, const char *str,
 		if (syscalltbl__id(trace->sctbl, s) >= 0 ||
 		    syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) {
 			list = 1;
+			goto do_concat;
+		}
+
+		fmt = syscall_fmt__find_by_alias(s);
+		if (fmt != NULL) {
+			list = 1;
+			s = fmt->name;
 		} else {
 			path__join(group_name, sizeof(group_name), strace_groups_dir, s);
 			if (access(group_name, R_OK) == 0)
 				list = 1;
 		}
-
+do_concat:
 		if (lists[list]) {
 			sprintf(lists[list] + strlen(lists[list]), ",%s", s);
 		} else {
-- 
GitLab


From 476c92cacf383c83584ba02d06c88cf18f062afb Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 26 Oct 2018 13:23:25 -0300
Subject: [PATCH 0329/1890] perf trace: Beautify the umount's 'name' argument

By using the SCA_FILENAME beautifier, that works when either the
probe:vfs_getname probe is in place or with the eBPF program
tools/perf/examples/bpf/augmented_syscalls.c:

  # perf probe -l
  probe:vfs_getname (on getname_flags:73@acme/git/linux/fs/namei.c with pathname)
  # perf trace -e umount
  9630.332 ( 9.521 ms): umount/8082 umount2(name: /mnt) = 0
  #

The augmented syscalls one will be done in the next patch.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Benjamin Peterson <benjamin@python.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-hegbzlpd2nrn584l5jxn7sy2@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-trace.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index db8711061ca3b..d286f73ef2e5b 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -838,7 +838,8 @@ static struct syscall_fmt {
 	  .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
 	{ .name	    = "tkill",
 	  .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
-	{ .name     = "umount2", .alias = "umount", },
+	{ .name     = "umount2", .alias = "umount",
+	  .arg = { [0] = { .scnprintf = SCA_FILENAME, /* name */ }, }, },
 	{ .name	    = "uname", .alias = "newuname", },
 	{ .name	    = "unlinkat",
 	  .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
-- 
GitLab


From 23c07a23cbed389d0b9c7a06486574621df8d1a4 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 26 Oct 2018 13:51:45 -0300
Subject: [PATCH 0330/1890] perf trace: Beautify mount's first pathname arg

The pathname beautifiers so far support just one augmented pathname per
syscall, so do it just for mount's first arg, later this will get fixed.

With:

  # perf probe -l
  probe:vfs_getname    (on getname_flags:73@acme/git/linux/fs/namei.c with pathname)
  #

Later this will get added to augmented_syscalls.c (eBPF):

In one xterm:

  # perf trace -e mount,umount
  2687.331 ( 3.544 ms): mount/8892 mount(dev_name: /mnt, dir_name: 0x561f9ac184a0, type: 0x561f9ac1b170, flags: BIND) = 0
  3912.126 ( 8.807 ms): umount/8895 umount2(name: /mnt) = 0
  ^C#

In the other:

  $ sudo mount --bind /proc /mnt
  $ sudo umount /mnt

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Benjamin Peterson <benjamin@python.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-qsvhrm2es635cl4zicqjeth2@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-trace.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index d286f73ef2e5b..dc8a6c4986ce2 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -727,7 +727,8 @@ static struct syscall_fmt {
 		   [2] = { .scnprintf = SCA_MMAP_PROT,	/* prot */ },
 		   [3] = { .scnprintf = SCA_MMAP_FLAGS,	/* flags */ }, }, },
 	{ .name	    = "mount",
-	  .arg = { [3] = { .scnprintf = SCA_MOUNT_FLAGS, /* flags */
+	  .arg = { [0] = { .scnprintf = SCA_FILENAME, /* dev_name */ },
+		   [3] = { .scnprintf = SCA_MOUNT_FLAGS, /* flags */
 			   .mask_val  = SCAMV_MOUNT_FLAGS, /* flags */ }, }, },
 	{ .name	    = "mprotect",
 	  .arg = { [0] = { .scnprintf = SCA_HEX,	/* start */ },
-- 
GitLab


From 4e303fbe2d95806c875f5ebfcb3d980e20b4bd83 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 26 Oct 2018 15:55:23 -0300
Subject: [PATCH 0331/1890] perf top: Allow disabling the overwrite mode

In ebebbf082357 ("perf top: Switch default mode to overwrite mode") we
forgot to leave a way to disable that new default, add a --overwrite
option that can be disabled using --no-overwrite, since the code already
in such a way that we can readily disable this mode.

This is useful when investigating bugs with this mode like the recent
report from David Miller where lots of unknown symbols appear due to
disabling the events while processing them which disables all record
types, not just PERF_RECORD_SAMPLE, which makes it impossible to resolve
maps when we lose PERF_RECORD_MMAP records.

This can be easily seen while building a kernel, when there are lots of
short lived processes.

Reported-by: David Miller <davem@davemloft.net>
Acked-by: Kan Liang <kan.liang@intel.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wang Nan <wangnan0@huawei.com>
Fixes: ebebbf082357 ("perf top: Switch default mode to overwrite mode")
Link: https://lkml.kernel.org/n/tip-oqgsz2bq4kgrnnajrafcdhie@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-top.txt | 5 +++++
 tools/perf/builtin-top.c              | 2 ++
 2 files changed, 7 insertions(+)

diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 114fda12aa490..d4be6061fe1c1 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -242,6 +242,11 @@ Default is to monitor all CPUS.
 --hierarchy::
 	Enable hierarchy output.
 
+--overwrite::
+	This is the default, but for investigating problems with it or any other strange
+	behaviour like lots of unknown samples, we may want to disable this mode by using
+	--no-overwrite.
+
 --force::
 	Don't do ownership validation.
 
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index d21d8751e7491..214fad747b041 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1372,6 +1372,8 @@ int cmd_top(int argc, const char **argv)
 		    "Show raw trace event output (do not use print fmt or plugins)"),
 	OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy,
 		    "Show entries in a hierarchy"),
+	OPT_BOOLEAN(0, "overwrite", &top.record_opts.overwrite,
+		    "Use a backward ring buffer, default: yes"),
 	OPT_BOOLEAN(0, "force", &symbol_conf.force, "don't complain, do it"),
 	OPT_UINTEGER(0, "num-thread-synthesize", &top.nr_threads_synthesize,
 			"number of thread to run event synthesize"),
-- 
GitLab


From 23cb2d04db54535df65edbbebbca89f2590f08bd Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Fri, 26 Oct 2018 16:54:43 -0500
Subject: [PATCH 0332/1890] ASoC: fix oops w/ for_each_rtd_codec_dai_rollback()
 macro

A kernel oops happens on an error case (usual missing BE mixer
configuration required by Intel SST driver). Git bisect points to this
macro and an operator precedence issue.

	for (; ((i--) >= 0) && ((dai) = rtd->codec_dais[i]);)

The initial code replaced by this macro was
	while (--i >= 0) {
		codec_dai = rtd->codec_dais[i];

Fix the C operator precedence difference by reverting to pre-decrement

Fixes: 0b7990e3897 ('ASoC: add for_each_rtd_codec_dai() macro')
Cc: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/sound/soc.h b/include/sound/soc.h
index f1dab1f4b194d..70c10a8f3e90a 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -1192,7 +1192,7 @@ struct snd_soc_pcm_runtime {
 	     ((i) < rtd->num_codecs) && ((dai) = rtd->codec_dais[i]); \
 	     (i)++)
 #define for_each_rtd_codec_dai_rollback(rtd, i, dai)		\
-	for (; ((i--) >= 0) && ((dai) = rtd->codec_dais[i]);)
+	for (; ((--i) >= 0) && ((dai) = rtd->codec_dais[i]);)
 
 
 /* mixer control */
-- 
GitLab


From 3aa8029e1ac4faea2967e36281d93f5d099ed6a9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 30 Oct 2018 00:18:12 -0700
Subject: [PATCH 0333/1890] net/mlx4_en: add a missing <net/ip.h> include

Abdul Haleem reported a build error on ppc :

drivers/net/ethernet/mellanox/mlx4/en_rx.c:582:18: warning: `struct
iphdr` declared inside parameter list [enabled by default]
           struct iphdr *iph)
                  ^
drivers/net/ethernet/mellanox/mlx4/en_rx.c:582:18: warning: its scope is
only this definition or declaration, which is probably not what you want
[enabled by default]
drivers/net/ethernet/mellanox/mlx4/en_rx.c: In function
get_fixed_ipv4_csum:
drivers/net/ethernet/mellanox/mlx4/en_rx.c:586:20: error: dereferencing
pointer to incomplete type
  __u8 ipproto = iph->protocol;
                    ^

Fixes: 55469bc6b577 ("drivers: net: remove <net/busy_poll.h> inclusion when not needed")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Abdul Haleem <abdhalee@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_rx.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 5a6d0919533d6..db00bf1c23f5a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -43,6 +43,7 @@
 #include <linux/vmalloc.h>
 #include <linux/irq.h>
 
+#include <net/ip.h>
 #if IS_ENABLED(CONFIG_IPV6)
 #include <net/ip6_checksum.h>
 #endif
-- 
GitLab


From a6b3a3fa042343e29ffaf9169f5ba3c819d4f9a2 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 30 Oct 2018 15:41:00 +0000
Subject: [PATCH 0334/1890] net: mvpp2: Fix affinity hint allocation

The mvpp2 driver has the curious behaviour of passing a stack variable
to irq_set_affinity_hint(), which results in the kernel exploding
the first time anyone accesses this information. News flash: userspace
does, and irqbalance will happily take the machine down. Great stuff.

An easy fix is to track the mask within the queue_vector structure,
and to make sure it has the same lifetime as the interrupt itself.

Fixes: e531f76757eb ("net: mvpp2: handle cases where more CPUs are available than s/w threads")
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvpp2/mvpp2.h     |  1 +
 .../net/ethernet/marvell/mvpp2/mvpp2_main.c    | 18 ++++++++++++++----
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
index 176c6b56fdccd..398328f107437 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
@@ -796,6 +796,7 @@ struct mvpp2_queue_vector {
 	int nrxqs;
 	u32 pending_cause_rx;
 	struct mvpp2_port *port;
+	struct cpumask *mask;
 };
 
 struct mvpp2_port {
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 14f9679c957c6..7a37a37e3fb34 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -3298,24 +3298,30 @@ static int mvpp2_irqs_init(struct mvpp2_port *port)
 	for (i = 0; i < port->nqvecs; i++) {
 		struct mvpp2_queue_vector *qv = port->qvecs + i;
 
-		if (qv->type == MVPP2_QUEUE_VECTOR_PRIVATE)
+		if (qv->type == MVPP2_QUEUE_VECTOR_PRIVATE) {
+			qv->mask = kzalloc(cpumask_size(), GFP_KERNEL);
+			if (!qv->mask) {
+				err = -ENOMEM;
+				goto err;
+			}
+
 			irq_set_status_flags(qv->irq, IRQ_NO_BALANCING);
+		}
 
 		err = request_irq(qv->irq, mvpp2_isr, 0, port->dev->name, qv);
 		if (err)
 			goto err;
 
 		if (qv->type == MVPP2_QUEUE_VECTOR_PRIVATE) {
-			unsigned long mask = 0;
 			unsigned int cpu;
 
 			for_each_present_cpu(cpu) {
 				if (mvpp2_cpu_to_thread(port->priv, cpu) ==
 				    qv->sw_thread_id)
-					mask |= BIT(cpu);
+					cpumask_set_cpu(cpu, qv->mask);
 			}
 
-			irq_set_affinity_hint(qv->irq, to_cpumask(&mask));
+			irq_set_affinity_hint(qv->irq, qv->mask);
 		}
 	}
 
@@ -3325,6 +3331,8 @@ static int mvpp2_irqs_init(struct mvpp2_port *port)
 		struct mvpp2_queue_vector *qv = port->qvecs + i;
 
 		irq_set_affinity_hint(qv->irq, NULL);
+		kfree(qv->mask);
+		qv->mask = NULL;
 		free_irq(qv->irq, qv);
 	}
 
@@ -3339,6 +3347,8 @@ static void mvpp2_irqs_deinit(struct mvpp2_port *port)
 		struct mvpp2_queue_vector *qv = port->qvecs + i;
 
 		irq_set_affinity_hint(qv->irq, NULL);
+		kfree(qv->mask);
+		qv->mask = NULL;
 		irq_clear_status_flags(qv->irq, IRQ_NO_BALANCING);
 		free_irq(qv->irq, qv);
 	}
-- 
GitLab


From 698b53b3119c45a59eef10b516d780b3e9a5402d Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 30 Oct 2018 11:29:42 +0000
Subject: [PATCH 0335/1890] mtip32xx: clean an indentation issue, remove
 extraneous tabs

Trivial fix to clean up an indentation issue, remove tabs

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/mtip32xx/mtip32xx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index dfc8de6ce5254..a7daa8acbab3a 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -1942,8 +1942,8 @@ static int exec_drive_taskfile(struct driver_data *dd,
 				dev_warn(&dd->pdev->dev,
 					"data movement but "
 					"sect_count is 0\n");
-					err = -EINVAL;
-					goto abort;
+				err = -EINVAL;
+				goto abort;
 			}
 		}
 	}
-- 
GitLab


From 0cafc82fae41531b0162150f9a97f2c74f97118f Mon Sep 17 00:00:00 2001
From: "Guttula, Suresh" <Suresh.Guttula@amd.com>
Date: Mon, 29 Oct 2018 05:23:25 +0000
Subject: [PATCH 0336/1890] drm/amd/display: set backlight level limit to 1

This patch will work as workaround for silicon limitation
related to PWM dutycycle when the backlight level goes to 0.

Actually PWM value is 16 bit value and valid range from 1-65535.
when ever user requested to set this PWM value to 0 which is not
fall in the range, in VBIOS taken care this by limiting to 1.
This patch here will do the same. Either driver or VBIOS can not
pass 0 value as it is not a valid range for PWM and it will
give a high PWM pulse which is not the intended behaviour as
per HW constraints.

Signed-off-by: suresh guttula <suresh.guttula@amd.com>
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index e224f23e22155..b0df6dc9a775f 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1524,6 +1524,13 @@ static int amdgpu_dm_backlight_update_status(struct backlight_device *bd)
 {
 	struct amdgpu_display_manager *dm = bl_get_data(bd);
 
+	/*
+	 * PWM interperts 0 as 100% rather than 0% because of HW
+	 * limitation for level 0.So limiting minimum brightness level
+	 * to 1.
+	 */
+	if (bd->props.brightness < 1)
+		return 1;
 	if (dc_link_set_backlight_level(dm->backlight_link,
 			bd->props.brightness, 0, 0))
 		return 0;
-- 
GitLab


From 1ecd0da5888960ecff6409393c11b35fad644104 Mon Sep 17 00:00:00 2001
From: Evan Quan <evan.quan@amd.com>
Date: Tue, 30 Oct 2018 09:12:22 +0800
Subject: [PATCH 0337/1890] drm/amd/powerplay: revise Vega20 pptable version
 check

Tell the version numbers when the pptable versions do not match.

Signed-off-by: Evan Quan <evan.quan@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../drm/amd/powerplay/hwmgr/vega20_processpptables.c   | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_processpptables.c
index f7e8bbdc20b0b..97f8a1a970c37 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_processpptables.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_processpptables.c
@@ -642,8 +642,14 @@ static int check_powerplay_tables(
 		"Unsupported PPTable format!", return -1);
 	PP_ASSERT_WITH_CODE(powerplay_table->sHeader.structuresize > 0,
 		"Invalid PowerPlay Table!", return -1);
-	PP_ASSERT_WITH_CODE(powerplay_table->smcPPTable.Version == PPTABLE_V20_SMU_VERSION,
-		"Unmatch PPTable version, vbios update may be needed!", return -1);
+
+	if (powerplay_table->smcPPTable.Version != PPTABLE_V20_SMU_VERSION) {
+		pr_info("Unmatch PPTable version: "
+			"pptable from VBIOS is V%d while driver supported is V%d!",
+			powerplay_table->smcPPTable.Version,
+			PPTABLE_V20_SMU_VERSION);
+		return -EINVAL;
+	}
 
 	//dump_pptable(&powerplay_table->smcPPTable);
 
-- 
GitLab


From 204c881e96e435606451e8a167cdb5a12fafd32a Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Mon, 29 Oct 2018 15:13:05 +0530
Subject: [PATCH 0338/1890] dt-bindings: arm: Explain capacities-dmips-mhz
 calculations in example

The example contains two values for the capacity currently, 446 in text
and 578 in code. The numbers are all correct but can confuse some of the
readers. This patch tries to explain how the numbers are calculated to
avoid same confusion going forward.

Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/arm/cpu-capacity.txt | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/arm/cpu-capacity.txt b/Documentation/devicetree/bindings/arm/cpu-capacity.txt
index 9b5685a1d15d9..84262cdb8d29a 100644
--- a/Documentation/devicetree/bindings/arm/cpu-capacity.txt
+++ b/Documentation/devicetree/bindings/arm/cpu-capacity.txt
@@ -59,9 +59,11 @@ mhz values (normalized w.r.t. the highest value found while parsing the DT).
 ===========================================
 
 Example 1 (ARM 64-bit, 6-cpu system, two clusters):
-capacities-dmips-mhz are scaled w.r.t. 1024 (cpu@0 and cpu@1)
-supposing cluster0@max-freq=1100 and custer1@max-freq=850,
-final capacities are 1024 for cluster0 and 446 for cluster1
+The capacities-dmips-mhz or DMIPS/MHz values (scaled to 1024)
+are 1024 and 578 for cluster0 and cluster1. Further normalization
+is done by the operating system based on cluster0@max-freq=1100 and
+custer1@max-freq=850, final capacities are 1024 for cluster0 and
+446 for cluster1 (576*850/1100).
 
 cpus {
 	#address-cells = <2>;
-- 
GitLab


From b31d30d9be32d41bef3e6076a965565d3a3d8005 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Mon, 29 Oct 2018 14:56:48 -0700
Subject: [PATCH 0339/1890] tools/bpf: add unlimited rlimit for
 flow_dissector_load

On our test machine, bpf selftest test_flow_dissector.sh failed
with the following error:
  # ./test_flow_dissector.sh
  bpffs not mounted. Mounting...
  libbpf: failed to create map (name: 'jmp_table'): Operation not permitted
  libbpf: failed to load object 'bpf_flow.o'
  ./flow_dissector_load: bpf_prog_load bpf_flow.o
  selftests: test_flow_dissector [FAILED]

Let us increase the rlimit to remove the above map
creation failure.

Signed-off-by: Yonghong Song <yhs@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/flow_dissector_load.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/testing/selftests/bpf/flow_dissector_load.c b/tools/testing/selftests/bpf/flow_dissector_load.c
index d3273b5b3173e..ae8180b11d5fe 100644
--- a/tools/testing/selftests/bpf/flow_dissector_load.c
+++ b/tools/testing/selftests/bpf/flow_dissector_load.c
@@ -11,6 +11,8 @@
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
 
+#include "bpf_rlimit.h"
+
 const char *cfg_pin_path = "/sys/fs/bpf/flow_dissector";
 const char *cfg_map_name = "jmp_table";
 bool cfg_attach = true;
-- 
GitLab


From 27b31e68bc9fc25c519c7772fa23913687218d5f Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Mon, 29 Oct 2018 12:31:28 -0700
Subject: [PATCH 0340/1890] bpf: tcp_bpf_recvmsg should return EAGAIN when
 nonblocking and no data

We return 0 in the case of a nonblocking socket that has no data
available. However, this is incorrect and may confuse applications.
After this patch we do the correct thing and return the error
EAGAIN.

Quoting return codes from recvmsg manpage,

EAGAIN or EWOULDBLOCK
 The socket is marked nonblocking and the receive operation would
 block, or a receive timeout had been set and the timeout expired
 before data was received.

Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 net/ipv4/tcp_bpf.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index b7918d4caa300..3b45fe530f91e 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -145,6 +145,7 @@ int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 			ret = err;
 			goto out;
 		}
+		copied = -EAGAIN;
 	}
 	ret = copied;
 out:
-- 
GitLab


From a0aebae07f211c05d64be886e56d352eea86350b Mon Sep 17 00:00:00 2001
From: Joel Stanley <joel@jms.id.au>
Date: Mon, 22 Oct 2018 22:39:26 +1030
Subject: [PATCH 0341/1890] selftests: powerpc: Fix warning for security subdir

typing 'make' inside tools/testing/selftests/powerpc gave a build
warning:

BUILD_TARGET=tools/testing/selftests/powerpc/security; mkdir -p $BUILD_TARGET; make OUTPUT=$BUILD_TARGET -k -C security all
make[1]: Entering directory 'tools/testing/selftests/powerpc/security'
../../lib.mk:20: ../../../../scripts/subarch.include: No such file or directory
make[1]: *** No rule to make target '../../../../scripts/subarch.include'.
make[1]: Failed to remake makefile '../../../../scripts/subarch.include'.

The build is one level deeper than lib.mk thinks it is. Set top_srcdir
to set things straight.

Note that the test program is still built.

Signed-off-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 tools/testing/selftests/powerpc/security/Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/powerpc/security/Makefile b/tools/testing/selftests/powerpc/security/Makefile
index 44690f1bb26ae..85861c46b4457 100644
--- a/tools/testing/selftests/powerpc/security/Makefile
+++ b/tools/testing/selftests/powerpc/security/Makefile
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0+
 
 TEST_GEN_PROGS := rfi_flush
+top_srcdir = ../../../../..
 
 CFLAGS += -I../../../../../usr/include
 
-- 
GitLab


From bc0686ff5fad7a842cc88377439e78be87fedc80 Mon Sep 17 00:00:00 2001
From: Hang Yuan <hang.yuan@linux.intel.com>
Date: Wed, 19 Sep 2018 14:42:10 +0800
Subject: [PATCH 0342/1890] drm/i915/gvt: support inconsecutive partial gtt
 entry write

Previously we assumed two 4-byte writes to the same PTE coming in sequence.
But recently we observed inconsecutive partial write happening as well. So
this patch enhances the previous solution. It now uses a list to save more
partial writes. If one partial write can be combined with another one in
the list to construct a full PTE, update its shadow entry. Otherwise, save
the partial write in the list.

v2: invalidate old entry and flush ggtt (Zhenyu)
v3: split old ggtt page unmap to another patch (Zhenyu)
v4: refine codes (Zhenyu)

Signed-off-by: Hang Yuan <hang.yuan@linux.intel.com>
Cc: Yan Zhao <yan.y.zhao@intel.com>
Cc: Xiaolin Zhang <xiaolin.zhang@intel.com>
Cc: Zhenyu Wang <zhenyu.z.wang@intel.com>
Reviewed-by: Xiaolin Zhang <xiaolin.zhang@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/gtt.c | 107 ++++++++++++++++-----------------
 drivers/gpu/drm/i915/gvt/gtt.h |   9 ++-
 2 files changed, 60 insertions(+), 56 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index c11e353ca9043..919de5a1bafb7 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -1901,7 +1901,6 @@ static struct intel_vgpu_mm *intel_vgpu_create_ggtt_mm(struct intel_vgpu *vgpu)
 		vgpu_free_mm(mm);
 		return ERR_PTR(-ENOMEM);
 	}
-	mm->ggtt_mm.last_partial_off = -1UL;
 
 	return mm;
 }
@@ -1926,7 +1925,6 @@ void _intel_vgpu_mm_release(struct kref *mm_ref)
 		invalidate_ppgtt_mm(mm);
 	} else {
 		vfree(mm->ggtt_mm.virtual_ggtt);
-		mm->ggtt_mm.last_partial_off = -1UL;
 	}
 
 	vgpu_free_mm(mm);
@@ -2164,6 +2162,8 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
 	struct intel_gvt_gtt_entry e, m;
 	dma_addr_t dma_addr;
 	int ret;
+	struct intel_gvt_partial_pte *partial_pte, *pos, *n;
+	bool partial_update = false;
 
 	if (bytes != 4 && bytes != 8)
 		return -EINVAL;
@@ -2174,68 +2174,57 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
 	if (!vgpu_gmadr_is_valid(vgpu, gma))
 		return 0;
 
-	ggtt_get_guest_entry(ggtt_mm, &e, g_gtt_index);
-
+	e.type = GTT_TYPE_GGTT_PTE;
 	memcpy((void *)&e.val64 + (off & (info->gtt_entry_size - 1)), p_data,
 			bytes);
 
 	/* If ggtt entry size is 8 bytes, and it's split into two 4 bytes
-	 * write, we assume the two 4 bytes writes are consecutive.
-	 * Otherwise, we abort and report error
+	 * write, save the first 4 bytes in a list and update virtual
+	 * PTE. Only update shadow PTE when the second 4 bytes comes.
 	 */
 	if (bytes < info->gtt_entry_size) {
-		if (ggtt_mm->ggtt_mm.last_partial_off == -1UL) {
-			/* the first partial part*/
-			ggtt_mm->ggtt_mm.last_partial_off = off;
-			ggtt_mm->ggtt_mm.last_partial_data = e.val64;
-			return 0;
-		} else if ((g_gtt_index ==
-				(ggtt_mm->ggtt_mm.last_partial_off >>
-				info->gtt_entry_size_shift)) &&
-			(off !=	ggtt_mm->ggtt_mm.last_partial_off)) {
-			/* the second partial part */
-
-			int last_off = ggtt_mm->ggtt_mm.last_partial_off &
-				(info->gtt_entry_size - 1);
-
-			memcpy((void *)&e.val64 + last_off,
-				(void *)&ggtt_mm->ggtt_mm.last_partial_data +
-				last_off, bytes);
-
-			ggtt_mm->ggtt_mm.last_partial_off = -1UL;
-		} else {
-			int last_offset;
-
-			gvt_vgpu_err("failed to populate guest ggtt entry: abnormal ggtt entry write sequence, last_partial_off=%lx, offset=%x, bytes=%d, ggtt entry size=%d\n",
-					ggtt_mm->ggtt_mm.last_partial_off, off,
-					bytes, info->gtt_entry_size);
-
-			/* set host ggtt entry to scratch page and clear
-			 * virtual ggtt entry as not present for last
-			 * partially write offset
-			 */
-			last_offset = ggtt_mm->ggtt_mm.last_partial_off &
-					(~(info->gtt_entry_size - 1));
-
-			ggtt_get_host_entry(ggtt_mm, &m, last_offset);
-			ggtt_invalidate_pte(vgpu, &m);
-			ops->set_pfn(&m, gvt->gtt.scratch_mfn);
-			ops->clear_present(&m);
-			ggtt_set_host_entry(ggtt_mm, &m, last_offset);
-			ggtt_invalidate(gvt->dev_priv);
-
-			ggtt_get_guest_entry(ggtt_mm, &e, last_offset);
-			ops->clear_present(&e);
-			ggtt_set_guest_entry(ggtt_mm, &e, last_offset);
-
-			ggtt_mm->ggtt_mm.last_partial_off = off;
-			ggtt_mm->ggtt_mm.last_partial_data = e.val64;
+		bool found = false;
+
+		list_for_each_entry_safe(pos, n,
+				&ggtt_mm->ggtt_mm.partial_pte_list, list) {
+			if (g_gtt_index == pos->offset >>
+					info->gtt_entry_size_shift) {
+				if (off != pos->offset) {
+					/* the second partial part*/
+					int last_off = pos->offset &
+						(info->gtt_entry_size - 1);
+
+					memcpy((void *)&e.val64 + last_off,
+						(void *)&pos->data + last_off,
+						bytes);
+
+					list_del(&pos->list);
+					kfree(pos);
+					found = true;
+					break;
+				}
+
+				/* update of the first partial part */
+				pos->data = e.val64;
+				ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index);
+				return 0;
+			}
+		}
 
-			return 0;
+		if (!found) {
+			/* the first partial part */
+			partial_pte = kzalloc(sizeof(*partial_pte), GFP_KERNEL);
+			if (!partial_pte)
+				return -ENOMEM;
+			partial_pte->offset = off;
+			partial_pte->data = e.val64;
+			list_add_tail(&partial_pte->list,
+				&ggtt_mm->ggtt_mm.partial_pte_list);
+			partial_update = true;
 		}
 	}
 
-	if (ops->test_present(&e)) {
+	if (!partial_update && (ops->test_present(&e))) {
 		gfn = ops->get_pfn(&e);
 		m = e;
 
@@ -2428,6 +2417,8 @@ int intel_vgpu_init_gtt(struct intel_vgpu *vgpu)
 
 	intel_vgpu_reset_ggtt(vgpu, false);
 
+	INIT_LIST_HEAD(&gtt->ggtt_mm->ggtt_mm.partial_pte_list);
+
 	return create_scratch_page_tree(vgpu);
 }
 
@@ -2452,6 +2443,14 @@ static void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu)
 
 static void intel_vgpu_destroy_ggtt_mm(struct intel_vgpu *vgpu)
 {
+	struct intel_gvt_partial_pte *pos;
+
+	list_for_each_entry(pos,
+			&vgpu->gtt.ggtt_mm->ggtt_mm.partial_pte_list, list) {
+		gvt_dbg_mm("partial PTE update on hold 0x%lx : 0x%llx\n",
+			pos->offset, pos->data);
+		kfree(pos);
+	}
 	intel_vgpu_destroy_mm(vgpu->gtt.ggtt_mm);
 	vgpu->gtt.ggtt_mm = NULL;
 }
diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h
index 7a9b36176efb7..a11bfee1e0c85 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.h
+++ b/drivers/gpu/drm/i915/gvt/gtt.h
@@ -133,6 +133,12 @@ enum intel_gvt_mm_type {
 
 #define GVT_RING_CTX_NR_PDPS	GEN8_3LVL_PDPES
 
+struct intel_gvt_partial_pte {
+	unsigned long offset;
+	u64 data;
+	struct list_head list;
+};
+
 struct intel_vgpu_mm {
 	enum intel_gvt_mm_type type;
 	struct intel_vgpu *vgpu;
@@ -157,8 +163,7 @@ struct intel_vgpu_mm {
 		} ppgtt_mm;
 		struct {
 			void *virtual_ggtt;
-			unsigned long last_partial_off;
-			u64 last_partial_data;
+			struct list_head partial_pte_list;
 		} ggtt_mm;
 	};
 };
-- 
GitLab


From 606a745944bc0ebd14f77dfc61ac7d6cb685cefe Mon Sep 17 00:00:00 2001
From: Xinyun Liu <xinyun.liu@intel.com>
Date: Wed, 19 Sep 2018 15:28:53 +0800
Subject: [PATCH 0343/1890] drm/i915/gvt: correct mask setting for
 CSFE_CHICKEN1

CSFE_CHICKEN1(0x20d4) needs access with mask. This is caught in AcrnGT
conformance check test:

[drm:intel_gvt_vgpu_conformance_check]
	*ERROR* gvt: vgpu1 unconformance mmio 0x20d4:0x40004,0x4

Signed-off-by: Xinyun Liu <xinyun.liu@intel.com>
Signed-off-by: Zhi Wang <zhi.a.wang@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/mmio_context.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
index e872f4847fbe0..088a62ab2bc89 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.c
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -144,7 +144,7 @@ static struct engine_mmio gen9_engine_mmio_list[] __cacheline_aligned = {
 	{RCS, GAMT_CHKN_BIT_REG, 0x0, false}, /* 0x4ab8 */
 
 	{RCS, GEN9_GAMT_ECO_REG_RW_IA, 0x0, false}, /* 0x4ab0 */
-	{RCS, GEN9_CSFE_CHICKEN1_RCS, 0x0, false}, /* 0x20d4 */
+	{RCS, GEN9_CSFE_CHICKEN1_RCS, 0xffff, false}, /* 0x20d4 */
 
 	{RCS, GEN8_GARBCNTL, 0x0, false}, /* 0xb004 */
 	{RCS, GEN7_FF_THREAD_MODE, 0x0, false}, /* 0x20a0 */
-- 
GitLab


From 5e7154ff5e8e21dc9acac4f8dba7533552365374 Mon Sep 17 00:00:00 2001
From: Longhe Zheng <longhe.zheng@intel.com>
Date: Tue, 30 Oct 2018 16:12:10 +0800
Subject: [PATCH 0344/1890] drm/i915/gvt: Handle values of EDP_PSR_IMR and
 EDP_PSR_IIR

GVT-g only simulates DP port for guest and leaves EDP_PSR_IMR
and EDP_PSR_IIR registers as default MMIO read/write.
So guest won't get expected initial values of these registers when
initializing the gpu driver, which results in following warning and logs.

--------
Interrupt register 0x64838 is not zero: 0xffffffff
WARNING: CPU: 1 PID: 157 at drivers/gpu/drm/i915/i915_irq.c:177
gen3_assert_iir_is_zero+0x38/0xa0

Call Trace:
gen8_de_irq_postinstall+0xa7/0x400
gen8_irq_postinstall+0x27/0x80
drm_irq_install+0xbc/0x140
i915_driver_load+0xa9d/0xd50
--------
Because GVT-g does not handle EDP(embedded DP) simulation for guests,
always set EDP_PSR_IMR and EDP_PSR_IIR to value 0.

Signed-off-by: Longhe Zheng <longhe.zheng@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/handlers.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 94c1089ecf59e..f9002cb1f2a3b 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -1608,7 +1608,7 @@ static int bxt_gt_disp_pwron_write(struct intel_vgpu *vgpu,
 	return 0;
 }
 
-static int bxt_edp_psr_imr_iir_write(struct intel_vgpu *vgpu,
+static int edp_psr_imr_iir_write(struct intel_vgpu *vgpu,
 		unsigned int offset, void *p_data, unsigned int bytes)
 {
 	vgpu_vreg(vgpu, offset) = 0;
@@ -2613,6 +2613,9 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 	MMIO_DFH(_MMIO(0x1a178), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(_MMIO(0x1a17c), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(_MMIO(0x2217c), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+
+	MMIO_DH(EDP_PSR_IMR, D_BDW_PLUS, NULL, edp_psr_imr_iir_write);
+	MMIO_DH(EDP_PSR_IIR, D_BDW_PLUS, NULL, edp_psr_imr_iir_write);
 	return 0;
 }
 
@@ -3216,9 +3219,6 @@ static int init_bxt_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(HSW_TVIDEO_DIP_GCP(TRANSCODER_B), D_BXT);
 	MMIO_D(HSW_TVIDEO_DIP_GCP(TRANSCODER_C), D_BXT);
 
-	MMIO_DH(EDP_PSR_IMR, D_BXT, NULL, bxt_edp_psr_imr_iir_write);
-	MMIO_DH(EDP_PSR_IIR, D_BXT, NULL, bxt_edp_psr_imr_iir_write);
-
 	MMIO_D(RC6_CTX_BASE, D_BXT);
 
 	MMIO_D(GEN8_PUSHBUS_CONTROL, D_BXT);
-- 
GitLab


From 9c87156cce5a63735d1218f0096a65c50a7a32aa Mon Sep 17 00:00:00 2001
From: Joel Stanley <joel@jms.id.au>
Date: Wed, 31 Oct 2018 11:39:34 +1030
Subject: [PATCH 0345/1890] powerpc/xmon: Relax frame size for clang

When building with clang (8 trunk, 7.0 release) the frame size limit is
hit:

 arch/powerpc/xmon/xmon.c:452:12: warning: stack frame size of 2576
 bytes in function 'xmon_core' [-Wframe-larger-than=]

Some investigation by Naveen indicates this is due to clang saving the
addresses to printf format strings on the stack.

While this issue is investigated, bump up the frame size limit for xmon
when building with clang.

Link: https://github.com/ClangBuiltLinux/linux/issues/252
Signed-off-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/xmon/Makefile | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile
index 69e7fb47bcaa3..878f9c1d36150 100644
--- a/arch/powerpc/xmon/Makefile
+++ b/arch/powerpc/xmon/Makefile
@@ -11,6 +11,12 @@ UBSAN_SANITIZE := n
 ORIG_CFLAGS := $(KBUILD_CFLAGS)
 KBUILD_CFLAGS = $(subst $(CC_FLAGS_FTRACE),,$(ORIG_CFLAGS))
 
+ifdef CONFIG_CC_IS_CLANG
+# clang stores addresses on the stack causing the frame size to blow
+# out. See https://github.com/ClangBuiltLinux/linux/issues/252
+KBUILD_CFLAGS += -Wframe-larger-than=4096
+endif
+
 ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
 
 obj-y			+= xmon.o nonstdio.o spr_access.o
-- 
GitLab


From a4d8f64f7267a88d4688f5c216926f5f6cafbae6 Mon Sep 17 00:00:00 2001
From: Leilk Liu <leilk.liu@mediatek.com>
Date: Wed, 31 Oct 2018 16:49:16 +0800
Subject: [PATCH 0346/1890] spi: mediatek: use correct mata->xfer_len when in
 fifo transfer

when xfer_len is greater than 64 bytes and use fifo mode
to transfer, the actual length from the third time is mata->xfer_len
but not len in mtk_spi_interrupt().

Signed-off-by: Leilk Liu <leilk.liu@mediatek.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-mt65xx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c
index 3dc31627c6558..0c2867deb36fc 100644
--- a/drivers/spi/spi-mt65xx.c
+++ b/drivers/spi/spi-mt65xx.c
@@ -522,11 +522,11 @@ static irqreturn_t mtk_spi_interrupt(int irq, void *dev_id)
 		mdata->xfer_len = min(MTK_SPI_MAX_FIFO_SIZE, len);
 		mtk_spi_setup_packet(master);
 
-		cnt = len / 4;
+		cnt = mdata->xfer_len / 4;
 		iowrite32_rep(mdata->base + SPI_TX_DATA_REG,
 				trans->tx_buf + mdata->num_xfered, cnt);
 
-		remainder = len % 4;
+		remainder = mdata->xfer_len % 4;
 		if (remainder > 0) {
 			reg_val = 0;
 			memcpy(&reg_val,
-- 
GitLab


From 3ee9a76a8c5a10e1bfb04b81db767c6d562ddaf3 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Wed, 31 Oct 2018 00:48:12 +0000
Subject: [PATCH 0347/1890] ASoC: rsnd: fixup clock start checker

commit 4d230d12710646 ("ASoC: rsnd: fixup not to call clk_get/set under
non-atomic") fixuped clock start timing. But it exchanged clock start
checker from ssi->usrcnt to ssi->rate.

Current rsnd_ssi_master_clk_start() is called from .prepare,
but some player (for example GStreamer) might calls it many times.
In such case, the checker might returns error even though it was not
error. It should check ssi->usrcnt instead of ssi->rate.
This patch fixup it. Without this patch, GStreamer can't switch
48kHz / 44.1kHz.

Reported-by: Yusuke Goda <yusuke.goda.sx@renesas.com>
Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Tested-by: Yusuke Goda <yusuke.goda.sx@renesas.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sh/rcar/ssi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/sh/rcar/ssi.c b/sound/soc/sh/rcar/ssi.c
index fcb4df23248c1..6ec78f3096dd3 100644
--- a/sound/soc/sh/rcar/ssi.c
+++ b/sound/soc/sh/rcar/ssi.c
@@ -306,7 +306,7 @@ static int rsnd_ssi_master_clk_start(struct rsnd_mod *mod,
 	if (rsnd_ssi_is_multi_slave(mod, io))
 		return 0;
 
-	if (ssi->rate) {
+	if (ssi->usrcnt > 1) {
 		if (ssi->rate != rate) {
 			dev_err(dev, "SSI parent/child should use same rate\n");
 			return -EINVAL;
-- 
GitLab


From 5e1275808630ea3b2c97c776f40e475017535f72 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 31 Oct 2018 12:15:23 +0100
Subject: [PATCH 0348/1890] ovl: check whiteout in ovl_create_over_whiteout()

Kaixuxia repors that it's possible to crash overlayfs by removing the
whiteout on the upper layer before creating a directory over it.  This is a
reproducer:

 mkdir lower upper work merge
 touch lower/file
 mount -t overlay overlay -olowerdir=lower,upperdir=upper,workdir=work merge
 rm merge/file
 ls -al merge/file
 rm upper/file
 ls -al merge/
 mkdir merge/file

Before commencing with a vfs_rename(..., RENAME_EXCHANGE) verify that the
lookup of "upper" is positive and is a whiteout, and return ESTALE
otherwise.

Reported by: kaixuxia <xiakaixu1987@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Fixes: e9be9d5e76e3 ("overlay filesystem")
Cc: <stable@vger.kernel.org> # v3.18
---
 fs/overlayfs/dir.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index ce1857fb54348..c6289147c7871 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -462,6 +462,10 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
 	if (IS_ERR(upper))
 		goto out_unlock;
 
+	err = -ESTALE;
+	if (d_is_negative(upper) || !IS_WHITEOUT(d_inode(upper)))
+		goto out_dput;
+
 	newdentry = ovl_create_temp(workdir, cattr);
 	err = PTR_ERR(newdentry);
 	if (IS_ERR(newdentry))
-- 
GitLab


From c39b79082a38a4f8c801790edecbbb4d62ed2992 Mon Sep 17 00:00:00 2001
From: Joel Stanley <joel@jms.id.au>
Date: Mon, 29 Oct 2018 22:23:49 +1100
Subject: [PATCH 0349/1890] selftests/powerpc/ptrace: Fix out-of-tree build

We should use TEST_GEN_PROGS, not TEST_PROGS. That tells the selftests
makefile (lib.mk) that those tests are generated (built), and so it
adds the $(OUTPUT) prefix for us, making the out-of-tree build work
correctly.

It also means we don't need our own clean rule, lib.mk does it.

We also have to update the ptrace-pkey and core-pkey rules to use
$(OUTPUT).

Signed-off-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 tools/testing/selftests/powerpc/ptrace/Makefile | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile
index 9b35ca8e8f13d..8d3f006c98cc3 100644
--- a/tools/testing/selftests/powerpc/ptrace/Makefile
+++ b/tools/testing/selftests/powerpc/ptrace/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-TEST_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \
+TEST_GEN_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \
               ptrace-tar ptrace-tm-tar ptrace-tm-spd-tar ptrace-vsx ptrace-tm-vsx \
               ptrace-tm-spd-vsx ptrace-tm-spr ptrace-hwbreak ptrace-pkey core-pkey \
               perf-hwbreak ptrace-syscall
@@ -7,14 +7,9 @@ TEST_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \
 top_srcdir = ../../../../..
 include ../../lib.mk
 
-all: $(TEST_PROGS)
-
 CFLAGS += -m64 -I../../../../../usr/include -I../tm -mhtm -fno-pie
 
-ptrace-pkey core-pkey: child.h
-ptrace-pkey core-pkey: LDLIBS += -pthread
-
-$(TEST_PROGS): ../harness.c ../utils.c ../lib/reg.S ptrace.h
+$(OUTPUT)/ptrace-pkey $(OUTPUT)/core-pkey: child.h
+$(OUTPUT)/ptrace-pkey $(OUTPUT)/core-pkey: LDLIBS += -pthread
 
-clean:
-	rm -f $(TEST_PROGS) *.o
+$(TEST_GEN_PROGS): ../harness.c ../utils.c ../lib/reg.S ptrace.h
-- 
GitLab


From 27825349d7b238533a47e3d98b8bb0efd886b752 Mon Sep 17 00:00:00 2001
From: Joel Stanley <joel@jms.id.au>
Date: Mon, 29 Oct 2018 22:23:50 +1100
Subject: [PATCH 0350/1890] selftests/powerpc/signal: Fix out-of-tree build

We should use TEST_GEN_PROGS, not TEST_PROGS. That tells the selftests
makefile (lib.mk) that those tests are generated (built), and so it
adds the $(OUTPUT) prefix for us, making the out-of-tree build work
correctly.

It also means we don't need our own clean rule, lib.mk does it.

We also have to update the signal_tm rule to use $(OUTPUT).

Signed-off-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 tools/testing/selftests/powerpc/signal/Makefile | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/tools/testing/selftests/powerpc/signal/Makefile b/tools/testing/selftests/powerpc/signal/Makefile
index 1fca25c6ace06..209a958dca127 100644
--- a/tools/testing/selftests/powerpc/signal/Makefile
+++ b/tools/testing/selftests/powerpc/signal/Makefile
@@ -1,15 +1,10 @@
 # SPDX-License-Identifier: GPL-2.0
-TEST_PROGS := signal signal_tm
-
-all: $(TEST_PROGS)
-
-$(TEST_PROGS): ../harness.c ../utils.c signal.S
+TEST_GEN_PROGS := signal signal_tm
 
 CFLAGS += -maltivec
-signal_tm: CFLAGS += -mhtm
+$(OUTPUT)/signal_tm: CFLAGS += -mhtm
 
 top_srcdir = ../../../../..
 include ../../lib.mk
 
-clean:
-	rm -f $(TEST_PROGS) *.o
+$(TEST_GEN_PROGS): ../harness.c ../utils.c signal.S
-- 
GitLab


From 98415da03ae6559dc62899fd31e55e194779c45b Mon Sep 17 00:00:00 2001
From: Joel Stanley <joel@jms.id.au>
Date: Mon, 29 Oct 2018 22:23:51 +1100
Subject: [PATCH 0351/1890] selftests/powerpc/pmu: Link ebb tests with -no-pie

When running the ebb tests after building on a ppc64le Ubuntu machine:

 $ pmu/ebb/reg_access_test: error while loading shared libraries:
 R_PPC64_ADDR16_HI reloc at 0x000000013a965130 for symbol `' out of
 range

This is because the Ubuntu toolchain builds has PIE enabled by default.
Change it to be always off instead.

Signed-off-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 tools/testing/selftests/powerpc/pmu/ebb/Makefile | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
index bd5dfa509272a..23f4caf48ffc6 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
@@ -5,6 +5,9 @@ noarg:
 # The EBB handler is 64-bit code and everything links against it
 CFLAGS += -m64
 
+# Toolchains may build PIE by default which breaks the assembly
+LDFLAGS += -no-pie
+
 TEST_GEN_PROGS := reg_access_test event_attributes_test cycles_test	\
 	 cycles_with_freeze_test pmc56_overflow_test		\
 	 ebb_vs_cpu_event_test cpu_event_vs_ebb_test		\
-- 
GitLab


From 266bac361d5677e61a6815bd29abeb3bdced2b07 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Mon, 29 Oct 2018 22:23:52 +1100
Subject: [PATCH 0352/1890] selftests/powerpc/switch_endian: Fix out-of-tree
 build

For the out-of-tree build to work we need to tell switch_endian_test
to look for check-reversed.S in $(OUTPUT).

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 tools/testing/selftests/powerpc/switch_endian/Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/powerpc/switch_endian/Makefile b/tools/testing/selftests/powerpc/switch_endian/Makefile
index fcd2dcb8972ba..bdc081afedb0f 100644
--- a/tools/testing/selftests/powerpc/switch_endian/Makefile
+++ b/tools/testing/selftests/powerpc/switch_endian/Makefile
@@ -8,6 +8,7 @@ EXTRA_CLEAN = $(OUTPUT)/*.o $(OUTPUT)/check-reversed.S
 top_srcdir = ../../../../..
 include ../../lib.mk
 
+$(OUTPUT)/switch_endian_test: ASFLAGS += -I $(OUTPUT)
 $(OUTPUT)/switch_endian_test: $(OUTPUT)/check-reversed.S
 
 $(OUTPUT)/check-reversed.o: $(OUTPUT)/check.o
-- 
GitLab


From 69f8117f17b332a68cd8f4bf8c2d0d3d5b84efc5 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Mon, 29 Oct 2018 22:23:53 +1100
Subject: [PATCH 0353/1890] selftests/powerpc/cache_shape: Fix out-of-tree
 build

Use TEST_GEN_PROGS and don't redefine all, this makes the out-of-tree
build work. We need to move the extra dependencies below the include
of lib.mk, because it adds the $(OUTPUT) prefix if it's defined.

We can also drop the clean rule, lib.mk does it for us.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 tools/testing/selftests/powerpc/cache_shape/Makefile | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/tools/testing/selftests/powerpc/cache_shape/Makefile b/tools/testing/selftests/powerpc/cache_shape/Makefile
index ede4d3dae7505..689f6c8ebcd8d 100644
--- a/tools/testing/selftests/powerpc/cache_shape/Makefile
+++ b/tools/testing/selftests/powerpc/cache_shape/Makefile
@@ -1,12 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
-TEST_PROGS := cache_shape
-
-all: $(TEST_PROGS)
-
-$(TEST_PROGS): ../harness.c ../utils.c
+TEST_GEN_PROGS := cache_shape
 
 top_srcdir = ../../../../..
 include ../../lib.mk
 
-clean:
-	rm -f $(TEST_PROGS) *.o
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
-- 
GitLab


From 218d61110f69632974034b6e27686ce482a1c455 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 29 Oct 2018 09:47:00 -0300
Subject: [PATCH 0354/1890] perf top: Do not use overwrite mode by default

Enabling --overwrite mode allows us to to use just the most recent
records, which helps in high core count machines such as Knights
Landing/Mill, but right now is being disabled by default as the pausing
used in this technique is leading to loss of metadata events such as
PERF_RECORD_MMAP which makes 'perf top' unable to resolve samples,
leading to lots of unknown samples appearing on the UI.

Enabling this may be useful if you are in such machines and profiling a
workload that doesn't creates short lived threads and/or doesn't uses
many executable mmap operations.

Work is being planed to solve this situation, till then, this will
remain disabled by default.

Reported-by: David Miller <davem@davemloft.net>
Acked-by: Kan Liang <kan.liang@intel.com>
Link: https://lkml.kernel.org/r/4f84468f-37d9-cf1b-12c1-514ef74b6a48@linux.intel.com
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Fixes: ebebbf082357 ("perf top: Switch default mode to overwrite mode")
Link: https://lkml.kernel.org/n/tip-ehvf77vi1si9409r7p4wx788@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-top.txt | 11 ++++++++---
 tools/perf/builtin-top.c              | 11 +++++++++--
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index d4be6061fe1c1..808b664343c9f 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -243,9 +243,14 @@ Default is to monitor all CPUS.
 	Enable hierarchy output.
 
 --overwrite::
-	This is the default, but for investigating problems with it or any other strange
-	behaviour like lots of unknown samples, we may want to disable this mode by using
-	--no-overwrite.
+	Enable this to use just the most recent records, which helps in high core count
+	machines such as Knights Landing/Mill, but right now is disabled by default as
+	the pausing used in this technique is leading to loss of metadata events such
+	as PERF_RECORD_MMAP which makes 'perf top' unable to resolve samples, leading
+	to lots of unknown samples appearing on the UI. Enable this if you are in such
+	machines and profiling a workload that doesn't creates short lived threads and/or
+	doesn't uses many executable mmap operations. Work is being planed to solve
+	this situation, till then, this will remain disabled by default.
 
 --force::
 	Don't do ownership validation.
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 214fad747b041..8e29e0cc86263 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1257,7 +1257,14 @@ int cmd_top(int argc, const char **argv)
 				.uses_mmap   = true,
 			},
 			.proc_map_timeout    = 500,
-			.overwrite	= 1,
+			/*
+			 * FIXME: This will lose PERF_RECORD_MMAP and other metadata
+			 * when we pause, fix that and reenable. Probably using a
+			 * separate evlist with a dummy event, i.e. a non-overwrite
+			 * ring buffer just for metadata events, while PERF_RECORD_SAMPLE
+			 * stays in overwrite mode. -acme
+			 * */
+			.overwrite	= 0,
 		},
 		.max_stack	     = sysctl__max_stack(),
 		.annotation_opts     = annotation__default_options,
@@ -1373,7 +1380,7 @@ int cmd_top(int argc, const char **argv)
 	OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy,
 		    "Show entries in a hierarchy"),
 	OPT_BOOLEAN(0, "overwrite", &top.record_opts.overwrite,
-		    "Use a backward ring buffer, default: yes"),
+		    "Use a backward ring buffer, default: no"),
 	OPT_BOOLEAN(0, "force", &symbol_conf.force, "don't complain, do it"),
 	OPT_UINTEGER(0, "num-thread-synthesize", &top.nr_threads_synthesize,
 			"number of thread to run event synthesize"),
-- 
GitLab


From 1fe627da30331024f453faef04d500079b901107 Mon Sep 17 00:00:00 2001
From: Milian Wolff <milian.wolff@kdab.com>
Date: Mon, 29 Oct 2018 15:16:44 +0100
Subject: [PATCH 0355/1890] perf unwind: Take pgoff into account when reporting
 elf to libdwfl

libdwfl parses an ELF file itself and creates mappings for the
individual sections. perf on the other hand sees raw mmap events which
represent individual sections. When we encounter an address pointing
into a mapping with pgoff != 0, we must take that into account and
report the file at the non-offset base address.

This fixes unwinding with libdwfl in some cases. E.g. for a file like:

```

using namespace std;

mutex g_mutex;

double worker()
{
    lock_guard<mutex> guard(g_mutex);
    uniform_real_distribution<double> uniform(-1E5, 1E5);
    default_random_engine engine;
    double s = 0;
    for (int i = 0; i < 1000; ++i) {
        s += norm(complex<double>(uniform(engine), uniform(engine)));
    }
    cout << s << endl;
    return s;
}

int main()
{
    vector<std::future<double>> results;
    for (int i = 0; i < 10000; ++i) {
        results.push_back(async(launch::async, worker));
    }
    return 0;
}
```

Compile it with `g++ -g -O2 -lpthread cpp-locking.cpp  -o cpp-locking`,
then record it with `perf record --call-graph dwarf -e
sched:sched_switch`.

When you analyze it with `perf script` and libunwind, you should see:

```
cpp-locking 20038 [005] 54830.236589: sched:sched_switch: prev_comm=cpp-locking prev_pid=20038 prev_prio=120 prev_state=T ==> next_comm=swapper/5 next_pid=0 next_prio=120
        ffffffffb166fec5 __sched_text_start+0x545 (/lib/modules/4.14.78-1-lts/build/vmlinux)
        ffffffffb166fec5 __sched_text_start+0x545 (/lib/modules/4.14.78-1-lts/build/vmlinux)
        ffffffffb1670208 schedule+0x28 (/lib/modules/4.14.78-1-lts/build/vmlinux)
        ffffffffb16737cc rwsem_down_read_failed+0xec (/lib/modules/4.14.78-1-lts/build/vmlinux)
        ffffffffb1665e04 call_rwsem_down_read_failed+0x14 (/lib/modules/4.14.78-1-lts/build/vmlinux)
        ffffffffb1672a03 down_read+0x13 (/lib/modules/4.14.78-1-lts/build/vmlinux)
        ffffffffb106bd85 __do_page_fault+0x445 (/lib/modules/4.14.78-1-lts/build/vmlinux)
        ffffffffb18015f5 page_fault+0x45 (/lib/modules/4.14.78-1-lts/build/vmlinux)
            7f38e4252591 new_heap+0x101 (/usr/lib/libc-2.28.so)
            7f38e4252d0b arena_get2.part.4+0x2fb (/usr/lib/libc-2.28.so)
            7f38e4255b1c tcache_init.part.6+0xec (/usr/lib/libc-2.28.so)
            7f38e42569e5 __GI___libc_malloc+0x115 (inlined)
            7f38e4241790 __GI__IO_file_doallocate+0x90 (inlined)
            7f38e424fbbf __GI__IO_doallocbuf+0x4f (inlined)
            7f38e424ee47 __GI__IO_file_overflow+0x197 (inlined)
            7f38e424df36 _IO_new_file_xsputn+0x116 (inlined)
            7f38e4242bfb __GI__IO_fwrite+0xdb (inlined)
            7f38e463fa6d std::basic_streambuf<char, std::char_traits<char> >::sputn(char const*, long)+0x1cd (inlined)
            7f38e463fa6d std::ostreambuf_iterator<char, std::char_traits<char> >::_M_put(char const*, long)+0x1cd (inlined)
            7f38e463fa6d std::ostreambuf_iterator<char, std::char_traits<char> > std::__write<char>(std::ostreambuf_iterator<char, std::char_traits<char> >, char const*, int)+0x1cd (inlined)
            7f38e463fa6d std::ostreambuf_iterator<char, std::char_traits<char> > std::num_put<char, std::ostreambuf_iterator<char, std::char_traits<char> > >::_M_insert_float<double>(std::ostreambuf_iterator<c>
            7f38e464bd70 std::num_put<char, std::ostreambuf_iterator<char, std::char_traits<char> > >::put(std::ostreambuf_iterator<char, std::char_traits<char> >, std::ios_base&, char, double) const+0x90 (inl>
            7f38e464bd70 std::ostream& std::ostream::_M_insert<double>(double)+0x90 (/usr/lib/libstdc++.so.6.0.25)
            563b9cb502f7 std::ostream::operator<<(double)+0xb7 (inlined)
            563b9cb502f7 worker()+0xb7 (/ssd/milian/projects/kdab/rnd/hotspot/build/tests/test-clients/cpp-locking/cpp-locking)
            563b9cb506fb double std::__invoke_impl<double, double (*)()>(std::__invoke_other, double (*&&)())+0x2b (inlined)
            563b9cb506fb std::__invoke_result<double (*)()>::type std::__invoke<double (*)()>(double (*&&)())+0x2b (inlined)
            563b9cb506fb decltype (__invoke((_S_declval<0ul>)())) std::thread::_Invoker<std::tuple<double (*)()> >::_M_invoke<0ul>(std::_Index_tuple<0ul>)+0x2b (inlined)
            563b9cb506fb std::thread::_Invoker<std::tuple<double (*)()> >::operator()()+0x2b (inlined)
            563b9cb506fb std::__future_base::_Task_setter<std::unique_ptr<std::__future_base::_Result<double>, std::__future_base::_Result_base::_Deleter>, std::thread::_Invoker<std::tuple<double (*)()> >, dou>
            563b9cb506fb std::_Function_handler<std::unique_ptr<std::__future_base::_Result_base, std::__future_base::_Result_base::_Deleter> (), std::__future_base::_Task_setter<std::unique_ptr<std::__future_>
            563b9cb507e8 std::function<std::unique_ptr<std::__future_base::_Result_base, std::__future_base::_Result_base::_Deleter> ()>::operator()() const+0x28 (inlined)
            563b9cb507e8 std::__future_base::_State_baseV2::_M_do_set(std::function<std::unique_ptr<std::__future_base::_Result_base, std::__future_base::_Result_base::_Deleter> ()>*, bool*)+0x28 (/ssd/milian/>
            7f38e46d24fe __pthread_once_slow+0xbe (/usr/lib/libpthread-2.28.so)
            563b9cb51149 __gthread_once+0xe9 (inlined)
            563b9cb51149 void std::call_once<void (std::__future_base::_State_baseV2::*)(std::function<std::unique_ptr<std::__future_base::_Result_base, std::__future_base::_Result_base::_Deleter> ()>*, bool*)>
            563b9cb51149 std::__future_base::_State_baseV2::_M_set_result(std::function<std::unique_ptr<std::__future_base::_Result_base, std::__future_base::_Result_base::_Deleter> ()>, bool)+0xe9 (inlined)
            563b9cb51149 std::__future_base::_Async_state_impl<std::thread::_Invoker<std::tuple<double (*)()> >, double>::_Async_state_impl(std::thread::_Invoker<std::tuple<double (*)()> >&&)::{lambda()#1}::op>
            563b9cb51149 void std::__invoke_impl<void, std::__future_base::_Async_state_impl<std::thread::_Invoker<std::tuple<double (*)()> >, double>::_Async_state_impl(std::thread::_Invoker<std::tuple<double>
            563b9cb51149 std::__invoke_result<std::__future_base::_Async_state_impl<std::thread::_Invoker<std::tuple<double (*)()> >, double>::_Async_state_impl(std::thread::_Invoker<std::tuple<double (*)()> >>
            563b9cb51149 decltype (__invoke((_S_declval<0ul>)())) std::thread::_Invoker<std::tuple<std::__future_base::_Async_state_impl<std::thread::_Invoker<std::tuple<double (*)()> >, double>::_Async_state_>
            563b9cb51149 std::thread::_Invoker<std::tuple<std::__future_base::_Async_state_impl<std::thread::_Invoker<std::tuple<double (*)()> >, double>::_Async_state_impl(std::thread::_Invoker<std::tuple<dou>
            563b9cb51149 std::thread::_State_impl<std::thread::_Invoker<std::tuple<std::__future_base::_Async_state_impl<std::thread::_Invoker<std::tuple<double (*)()> >, double>::_Async_state_impl(std::thread>
            7f38e45f0062 execute_native_thread_routine+0x12 (/usr/lib/libstdc++.so.6.0.25)
            7f38e46caa9c start_thread+0xfc (/usr/lib/libpthread-2.28.so)
            7f38e42ccb22 __GI___clone+0x42 (inlined)
```

Before this patch, using libdwfl, you would see:

```
cpp-locking 20038 [005] 54830.236589: sched:sched_switch: prev_comm=cpp-locking prev_pid=20038 prev_prio=120 prev_state=T ==> next_comm=swapper/5 next_pid=0 next_prio=120
        ffffffffb166fec5 __sched_text_start+0x545 (/lib/modules/4.14.78-1-lts/build/vmlinux)
        ffffffffb166fec5 __sched_text_start+0x545 (/lib/modules/4.14.78-1-lts/build/vmlinux)
        ffffffffb1670208 schedule+0x28 (/lib/modules/4.14.78-1-lts/build/vmlinux)
        ffffffffb16737cc rwsem_down_read_failed+0xec (/lib/modules/4.14.78-1-lts/build/vmlinux)
        ffffffffb1665e04 call_rwsem_down_read_failed+0x14 (/lib/modules/4.14.78-1-lts/build/vmlinux)
        ffffffffb1672a03 down_read+0x13 (/lib/modules/4.14.78-1-lts/build/vmlinux)
        ffffffffb106bd85 __do_page_fault+0x445 (/lib/modules/4.14.78-1-lts/build/vmlinux)
        ffffffffb18015f5 page_fault+0x45 (/lib/modules/4.14.78-1-lts/build/vmlinux)
            7f38e4252591 new_heap+0x101 (/usr/lib/libc-2.28.so)
        a041161e77950c5c [unknown] ([unknown])
```

With this patch applied, we get a bit further in unwinding:

```
cpp-locking 20038 [005] 54830.236589: sched:sched_switch: prev_comm=cpp-locking prev_pid=20038 prev_prio=120 prev_state=T ==> next_comm=swapper/5 next_pid=0 next_prio=120
        ffffffffb166fec5 __sched_text_start+0x545 (/lib/modules/4.14.78-1-lts/build/vmlinux)
        ffffffffb166fec5 __sched_text_start+0x545 (/lib/modules/4.14.78-1-lts/build/vmlinux)
        ffffffffb1670208 schedule+0x28 (/lib/modules/4.14.78-1-lts/build/vmlinux)
        ffffffffb16737cc rwsem_down_read_failed+0xec (/lib/modules/4.14.78-1-lts/build/vmlinux)
        ffffffffb1665e04 call_rwsem_down_read_failed+0x14 (/lib/modules/4.14.78-1-lts/build/vmlinux)
        ffffffffb1672a03 down_read+0x13 (/lib/modules/4.14.78-1-lts/build/vmlinux)
        ffffffffb106bd85 __do_page_fault+0x445 (/lib/modules/4.14.78-1-lts/build/vmlinux)
        ffffffffb18015f5 page_fault+0x45 (/lib/modules/4.14.78-1-lts/build/vmlinux)
            7f38e4252591 new_heap+0x101 (/usr/lib/libc-2.28.so)
            7f38e4252d0b arena_get2.part.4+0x2fb (/usr/lib/libc-2.28.so)
            7f38e4255b1c tcache_init.part.6+0xec (/usr/lib/libc-2.28.so)
            7f38e42569e5 __GI___libc_malloc+0x115 (inlined)
            7f38e4241790 __GI__IO_file_doallocate+0x90 (inlined)
            7f38e424fbbf __GI__IO_doallocbuf+0x4f (inlined)
            7f38e424ee47 __GI__IO_file_overflow+0x197 (inlined)
            7f38e424df36 _IO_new_file_xsputn+0x116 (inlined)
            7f38e4242bfb __GI__IO_fwrite+0xdb (inlined)
            7f38e463fa6d std::basic_streambuf<char, std::char_traits<char> >::sputn(char const*, long)+0x1cd (inlined)
            7f38e463fa6d std::ostreambuf_iterator<char, std::char_traits<char> >::_M_put(char const*, long)+0x1cd (inlined)
            7f38e463fa6d std::ostreambuf_iterator<char, std::char_traits<char> > std::__write<char>(std::ostreambuf_iterator<char, std::char_traits<char> >, char const*, int)+0x1cd (inlined)
            7f38e463fa6d std::ostreambuf_iterator<char, std::char_traits<char> > std::num_put<char, std::ostreambuf_iterator<char, std::char_traits<char> > >::_M_insert_float<double>(std::ostreambuf_iterator<c>
            7f38e464bd70 std::num_put<char, std::ostreambuf_iterator<char, std::char_traits<char> > >::put(std::ostreambuf_iterator<char, std::char_traits<char> >, std::ios_base&, char, double) const+0x90 (inl>
            7f38e464bd70 std::ostream& std::ostream::_M_insert<double>(double)+0x90 (/usr/lib/libstdc++.so.6.0.25)
            563b9cb502f7 std::ostream::operator<<(double)+0xb7 (inlined)
            563b9cb502f7 worker()+0xb7 (/ssd/milian/projects/kdab/rnd/hotspot/build/tests/test-clients/cpp-locking/cpp-locking)
        6eab825c1ee3e4ff [unknown] ([unknown])
```

Note that the backtrace is still stopping too early, when compared to
the nice results obtained via libunwind. It's unclear so far what the
reason for that is.

Committer note:

Further comment by Milian on the thread started on the Link: tag below:

 ---
The remaining issue is due to a bug in elfutils:

https://sourceware.org/ml/elfutils-devel/2018-q4/msg00089.html

With both patches applied, libunwind and elfutils produce the same output for
the above scenario.
 ---

Signed-off-by: Milian Wolff <milian.wolff@kdab.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: http://lkml.kernel.org/r/20181029141644.3907-1-milian.wolff@kdab.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/unwind-libdw.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c
index 6f318b15950e8..5eff9bfc57583 100644
--- a/tools/perf/util/unwind-libdw.c
+++ b/tools/perf/util/unwind-libdw.c
@@ -45,13 +45,13 @@ static int __report_module(struct addr_location *al, u64 ip,
 		Dwarf_Addr s;
 
 		dwfl_module_info(mod, NULL, &s, NULL, NULL, NULL, NULL, NULL);
-		if (s != al->map->start)
+		if (s != al->map->start - al->map->pgoff)
 			mod = 0;
 	}
 
 	if (!mod)
 		mod = dwfl_report_elf(ui->dwfl, dso->short_name,
-				      (dso->symsrc_filename ? dso->symsrc_filename : dso->long_name), -1, al->map->start,
+				      (dso->symsrc_filename ? dso->symsrc_filename : dso->long_name), -1, al->map->start - al->map->pgoff,
 				      false);
 
 	return mod && dwfl_addrmodule(ui->dwfl, ip) == mod ? 0 : -1;
-- 
GitLab


From d6c9c05fe1eb4b213b183d8a1e79416256dc833a Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Tue, 30 Oct 2018 15:18:28 +0800
Subject: [PATCH 0356/1890] perf cs-etm: Correct CPU mode for samples

Since commit edeb0c90df35 ("perf tools: Stop fallbacking to kallsyms for
vdso symbols lookup"), the kernel address cannot be properly parsed to
kernel symbol with command 'perf script -k vmlinux'.  The reason is
CoreSight samples is always to set CPU mode as PERF_RECORD_MISC_USER,
thus it fails to find corresponding map/dso in below flows:

  process_sample_event()
    `-> machine__resolve()
	  `-> thread__find_map(thread, sample->cpumode, sample->ip, al);

In this flow it needs to pass argument 'sample->cpumode' to tell what's
the CPU mode, before it always passed PERF_RECORD_MISC_USER but without
any failure until the commit edeb0c90df35 ("perf tools: Stop fallbacking
to kallsyms for vdso symbols lookup") has been merged.  The reason is
even with the wrong CPU mode the function thread__find_map() firstly
fails to find map but it will rollback to find kernel map for vdso
symbols lookup.  In the latest code it has removed the fallback code,
thus if CPU mode is PERF_RECORD_MISC_USER then it cannot find map
anymore with kernel address.

This patch is to correct samples CPU mode setting, it creates a new
helper function cs_etm__cpu_mode() to tell what's the CPU mode based on
the address with the info from machine structure; this patch has a bit
extension to check not only kernel and user mode, but also check for
host/guest and hypervisor mode.  Finally this patch uses the function in
instruction and branch samples and also apply in cs_etm__mem_access()
for a minor polishing.

Signed-off-by: Leo Yan <leo.yan@linaro.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Miller <davem@davemloft.net>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: coresight@lists.linaro.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: stable@kernel.org # v4.19
Link: http://lkml.kernel.org/r/1540883908-17018-1-git-send-email-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/cs-etm.c | 39 ++++++++++++++++++++++++++++++---------
 1 file changed, 30 insertions(+), 9 deletions(-)

diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 3b37d66dc5337..73430b73570d5 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -244,6 +244,27 @@ static void cs_etm__free(struct perf_session *session)
 	zfree(&aux);
 }
 
+static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address)
+{
+	struct machine *machine;
+
+	machine = etmq->etm->machine;
+
+	if (address >= etmq->etm->kernel_start) {
+		if (machine__is_host(machine))
+			return PERF_RECORD_MISC_KERNEL;
+		else
+			return PERF_RECORD_MISC_GUEST_KERNEL;
+	} else {
+		if (machine__is_host(machine))
+			return PERF_RECORD_MISC_USER;
+		else if (perf_guest)
+			return PERF_RECORD_MISC_GUEST_USER;
+		else
+			return PERF_RECORD_MISC_HYPERVISOR;
+	}
+}
+
 static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address,
 			      size_t size, u8 *buffer)
 {
@@ -258,10 +279,7 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address,
 		return -1;
 
 	machine = etmq->etm->machine;
-	if (address >= etmq->etm->kernel_start)
-		cpumode = PERF_RECORD_MISC_KERNEL;
-	else
-		cpumode = PERF_RECORD_MISC_USER;
+	cpumode = cs_etm__cpu_mode(etmq, address);
 
 	thread = etmq->thread;
 	if (!thread) {
@@ -653,7 +671,7 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
 	struct perf_sample sample = {.ip = 0,};
 
 	event->sample.header.type = PERF_RECORD_SAMPLE;
-	event->sample.header.misc = PERF_RECORD_MISC_USER;
+	event->sample.header.misc = cs_etm__cpu_mode(etmq, addr);
 	event->sample.header.size = sizeof(struct perf_event_header);
 
 	sample.ip = addr;
@@ -665,7 +683,7 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
 	sample.cpu = etmq->packet->cpu;
 	sample.flags = 0;
 	sample.insn_len = 1;
-	sample.cpumode = event->header.misc;
+	sample.cpumode = event->sample.header.misc;
 
 	if (etm->synth_opts.last_branch) {
 		cs_etm__copy_last_branch_rb(etmq);
@@ -706,12 +724,15 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq)
 		u64			nr;
 		struct branch_entry	entries;
 	} dummy_bs;
+	u64 ip;
+
+	ip = cs_etm__last_executed_instr(etmq->prev_packet);
 
 	event->sample.header.type = PERF_RECORD_SAMPLE;
-	event->sample.header.misc = PERF_RECORD_MISC_USER;
+	event->sample.header.misc = cs_etm__cpu_mode(etmq, ip);
 	event->sample.header.size = sizeof(struct perf_event_header);
 
-	sample.ip = cs_etm__last_executed_instr(etmq->prev_packet);
+	sample.ip = ip;
 	sample.pid = etmq->pid;
 	sample.tid = etmq->tid;
 	sample.addr = cs_etm__first_executed_instr(etmq->packet);
@@ -720,7 +741,7 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq)
 	sample.period = 1;
 	sample.cpu = etmq->packet->cpu;
 	sample.flags = 0;
-	sample.cpumode = PERF_RECORD_MISC_USER;
+	sample.cpumode = event->sample.header.misc;
 
 	/*
 	 * perf report cannot handle events without a branch stack
-- 
GitLab


From e9024d519d892b38176cafd46f68a7cdddd77412 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 30 Oct 2018 12:12:26 -0300
Subject: [PATCH 0357/1890] perf callchain: Honour the ordering of
 PERF_CONTEXT_{USER,KERNEL,etc}

When processing using 'perf report -g caller', which is the default, we
ended up reverting the callchain entries received from the kernel, but
simply reverting throws away the information that tells that from a
point onwards the addresses are for userspace, kernel, guest kernel,
guest user, hypervisor.

The idea is that if we are walking backwards, for each cluster of
non-cpumode entries we have to first scan backwards for the next one and
use that for the cluster.

This seems silly and more expensive than it needs to be but it is enough
for a initial fix.

The code here is really complicated because it is intimately intertwined
with the lbr and branch handling, as well as this callchain order,
further fixes will be needed to properly take into account the cpumode
in those cases.

Another problem with ORDER_CALLER is that the NULL "0" IP that is at the
end of most callchains shows up at the top of the histogram because
every callchain contains it and with ORDER_CALLER it is the first entry.

Signed-off-by: David S. Miller <davem@davemloft.net>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Souvik Banerjee <souvik1997@gmail.com>
Cc: Wang Nan <wangnan0@huawei.com>
Cc: stable@vger.kernel.org # 4.19
Link: https://lkml.kernel.org/n/tip-2wt3ayp6j2y2f2xowixa8y6y@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/machine.c | 35 ++++++++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 111ae858cbcbd..8ee8ab39d8acf 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -2140,6 +2140,27 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
 	return 0;
 }
 
+static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread,
+			     struct callchain_cursor *cursor,
+			     struct symbol **parent,
+			     struct addr_location *root_al,
+			     u8 *cpumode, int ent)
+{
+	int err = 0;
+
+	while (--ent >= 0) {
+		u64 ip = chain->ips[ent];
+
+		if (ip >= PERF_CONTEXT_MAX) {
+			err = add_callchain_ip(thread, cursor, parent,
+					       root_al, cpumode, ip,
+					       false, NULL, NULL, 0);
+			break;
+		}
+	}
+	return err;
+}
+
 static int thread__resolve_callchain_sample(struct thread *thread,
 					    struct callchain_cursor *cursor,
 					    struct perf_evsel *evsel,
@@ -2246,6 +2267,12 @@ static int thread__resolve_callchain_sample(struct thread *thread,
 	}
 
 check_calls:
+	if (callchain_param.order != ORDER_CALLEE) {
+		err = find_prev_cpumode(chain, thread, cursor, parent, root_al,
+					&cpumode, chain->nr - first_call);
+		if (err)
+			return (err < 0) ? err : 0;
+	}
 	for (i = first_call, nr_entries = 0;
 	     i < chain_nr && nr_entries < max_stack; i++) {
 		u64 ip;
@@ -2260,9 +2287,15 @@ static int thread__resolve_callchain_sample(struct thread *thread,
 			continue;
 #endif
 		ip = chain->ips[j];
-
 		if (ip < PERF_CONTEXT_MAX)
                        ++nr_entries;
+		else if (callchain_param.order != ORDER_CALLEE) {
+			err = find_prev_cpumode(chain, thread, cursor, parent,
+						root_al, &cpumode, j);
+			if (err)
+				return (err < 0) ? err : 0;
+			continue;
+		}
 
 		err = add_callchain_ip(thread, cursor, parent,
 				       root_al, &cpumode, ip,
-- 
GitLab


From 733ac4f9935c4b257aff2ec476e6c20665daf6c2 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 30 Oct 2018 12:27:37 -0300
Subject: [PATCH 0358/1890] tools include uapi: Update linux/fs.h copy

To silence this perf build warning:

  Warning: Kernel ABI header at 'tools/include/uapi/linux/fs.h' differs from latest version at 'include/uapi/linux/fs.h'
  diff -u tools/include/uapi/linux/fs.h include/uapi/linux/fs.h

Due to just two comments added by:

  Fixes: 578bdaabd015 ("crypto: speck - remove Speck")

So nothing that entails changes in tools/, that so far uses fs.h to
generate the mount and umount syscalls 'flags' argument integer->string
tables with:

  $ tools/perf/trace/beauty/mount_flags.sh
  static const char *mount_flags[] = {
	[4096 ? (ilog2(4096) + 1) : 0] = "BIND",
<SNIP>
	[30 + 1] = "ACTIVE",
	[31 + 1] = "NOUSER",
  };
  $
  # trace -e mount,umount mount --bind /proc /mnt
     1.228 ( 2.581 ms): mount/1068 mount(dev_name: /mnt, dir_name: 0x55f011c354a0, type: 0x55f011c38170, flags: BIND) = 0
  # trace -e mount,umount umount /proc /mnt
  umount: /proc: target is busy.
     1.587 ( 0.010 ms): umount/1070 umount2(name: /proc) = -1 EBUSY Device or resource busy
     1.799 (12.660 ms): umount/1070 umount2(name: /mnt) = 0
  #

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Cc: Jason A. Donenfeld <Jason@zx2c4.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Link: https://lkml.kernel.org/n/tip-c00bqzclscgah26z2g5zxm73@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/linux/fs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h
index 73e01918f9963..a441ea1bfe6d9 100644
--- a/tools/include/uapi/linux/fs.h
+++ b/tools/include/uapi/linux/fs.h
@@ -279,8 +279,8 @@ struct fsxattr {
 #define FS_ENCRYPTION_MODE_AES_256_CTS		4
 #define FS_ENCRYPTION_MODE_AES_128_CBC		5
 #define FS_ENCRYPTION_MODE_AES_128_CTS		6
-#define FS_ENCRYPTION_MODE_SPECK128_256_XTS	7
-#define FS_ENCRYPTION_MODE_SPECK128_256_CTS	8
+#define FS_ENCRYPTION_MODE_SPECK128_256_XTS	7 /* Removed, do not use. */
+#define FS_ENCRYPTION_MODE_SPECK128_256_CTS	8 /* Removed, do not use. */
 
 struct fscrypt_policy {
 	__u8 version;
-- 
GitLab


From 8dd4c0f68c0db4c0f01af60a99a7ed34fd3dee2b Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 30 Oct 2018 13:10:50 -0300
Subject: [PATCH 0359/1890] tools arch uapi: Update asm-generic/unistd.h and
 arm64 unistd.h copies

To get the changes in:

  82b355d161c9 ("y2038: Remove newstat family from default syscall set")

Which will make the syscall table used by 'perf trace' for arm64 to be
updated from the changes in that patch.

This silences these perf build warnings:

  Warning: Kernel ABI header at 'tools/arch/arm64/include/uapi/asm/unistd.h' differs from latest version at 'arch/arm64/include/uapi/asm/unistd.h'
  diff -u tools/arch/arm64/include/uapi/asm/unistd.h arch/arm64/include/uapi/asm/unistd.h
  Warning: Kernel ABI header at 'tools/include/uapi/asm-generic/unistd.h' differs from latest version at 'include/uapi/asm-generic/unistd.h'
  diff -u tools/include/uapi/asm-generic/unistd.h include/uapi/asm-generic/unistd.h

Cc: Kim Phillips <kim.phillips@arm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: https://lkml.kernel.org/n/tip-3euy7c4yy5mvnp5bm16t9vqg@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/arm64/include/uapi/asm/unistd.h | 1 +
 tools/include/uapi/asm-generic/unistd.h    | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/tools/arch/arm64/include/uapi/asm/unistd.h b/tools/arch/arm64/include/uapi/asm/unistd.h
index 5072cbd15c829..dae1584cf017f 100644
--- a/tools/arch/arm64/include/uapi/asm/unistd.h
+++ b/tools/arch/arm64/include/uapi/asm/unistd.h
@@ -16,5 +16,6 @@
  */
 
 #define __ARCH_WANT_RENAMEAT
+#define __ARCH_WANT_NEW_STAT
 
 #include <asm-generic/unistd.h>
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index df4bedb9b01c2..538546edbfbd2 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -242,10 +242,12 @@ __SYSCALL(__NR_tee, sys_tee)
 /* fs/stat.c */
 #define __NR_readlinkat 78
 __SYSCALL(__NR_readlinkat, sys_readlinkat)
+#if defined(__ARCH_WANT_NEW_STAT) || defined(__ARCH_WANT_STAT64)
 #define __NR3264_fstatat 79
 __SC_3264(__NR3264_fstatat, sys_fstatat64, sys_newfstatat)
 #define __NR3264_fstat 80
 __SC_3264(__NR3264_fstat, sys_fstat64, sys_newfstat)
+#endif
 
 /* fs/sync.c */
 #define __NR_sync 81
-- 
GitLab


From 89eb1f3b7f2a9156ce6f78713d7924c1bb2fab9f Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 30 Oct 2018 14:20:07 -0300
Subject: [PATCH 0360/1890] tools include uapi: Update asound.h copy

To silence this perf build warning:

  Warning: Kernel ABI header at 'tools/include/uapi/sound/asound.h' differs from latest version at 'include/uapi/sound/asound.h'
  diff -u tools/include/uapi/sound/asound.h include/uapi/sound/asound.h

Due to this cset:

  a98401518def ("ALSA: timer: fix wrong comment to refer to 'SNDRV_TIMER_PSFLG_*'")

Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Cc: Takashi Iwai <tiwai@suse.de>
Link: https://lkml.kernel.org/n/tip-76gsvs0w2g0x723ivqa2xua3@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/sound/asound.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/include/uapi/sound/asound.h b/tools/include/uapi/sound/asound.h
index ed0a120d4f084..404d4b9ffe764 100644
--- a/tools/include/uapi/sound/asound.h
+++ b/tools/include/uapi/sound/asound.h
@@ -752,7 +752,7 @@ struct snd_timer_info {
 #define SNDRV_TIMER_PSFLG_EARLY_EVENT	(1<<2)	/* write early event to the poll queue */
 
 struct snd_timer_params {
-	unsigned int flags;		/* flags - SNDRV_MIXER_PSFLG_* */
+	unsigned int flags;		/* flags - SNDRV_TIMER_PSFLG_* */
 	unsigned int ticks;		/* requested resolution in ticks */
 	unsigned int queue_size;	/* total size of queue (32-1024) */
 	unsigned int reserved0;		/* reserved, was: failure locations */
-- 
GitLab


From 80ee5668b8a755039ca6180c6272a6e8efee5aad Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 30 Oct 2018 15:26:47 -0300
Subject: [PATCH 0361/1890] perf beauty: Add a generator for MAP_ mmap's flag
 constants

It'll use tools/{arch}/*,include copies of mman.h to generate a table to
be used by tools, initially by the 'mmap' beautifiers in 'perf trace',
but that could also be used to translate from a string constant to the
integer value to be used in a eBPF or tracefs tracepoint filter.

Tested for all archs using:

$ for arch in `ls tools/arch/` ; \
	do echo $arch ; tools/perf/trace/beauty/mmap_flags.sh $arch ; \
   done | less

Example for alpha, an oddball, doesn't include any header, defines all
its stuff:

  $ tools/perf/trace/beauty/mmap_flags.sh alpha
  static const char *mmap_flags[] = {
	[ilog2(0x10) + 1] = "ANONYMOUS",
	[ilog2(0x02000) + 1] = "DENYWRITE",
	[ilog2(0x04000) + 1] = "EXECUTABLE",
	[ilog2(0x100) + 1] = "FIXED",
	[ilog2(0x01000) + 1] = "GROWSDOWN",
	[ilog2(0x100000) + 1] = "HUGETLB",
	[ilog2(0x08000) + 1] = "LOCKED",
	[ilog2(0x40000) + 1] = "NONBLOCK",
	[ilog2(0x10000) + 1] = "NORESERVE",
	[ilog2(0x20000) + 1] = "POPULATE",
	[ilog2(0x02) + 1] = "PRIVATE",
	[ilog2(0x01) + 1] = "SHARED",
	[ilog2(0x80000) + 1] = "STACK",
  };
  $

Common case, my workstation, defines one entry (MAP_32BIT), then
includes mman.h, which gets it to include mman-common.h too:

  $ tools/perf/trace/beauty/mmap_flags.sh
  static const char *mmap_flags[] = {
	[ilog2(0x40) + 1] = "32BIT",
	[ilog2(0x01) + 1] = "SHARED",
	[ilog2(0x02) + 1] = "PRIVATE",
	[ilog2(0x10) + 1] = "FIXED",
	[ilog2(0x20) + 1] = "ANONYMOUS",
	[ilog2(0x100000) + 1] = "FIXED_NOREPLACE",
	[ilog2(0x0100) + 1] = "GROWSDOWN",
	[ilog2(0x0800) + 1] = "DENYWRITE",
	[ilog2(0x1000) + 1] = "EXECUTABLE",
	[ilog2(0x2000) + 1] = "LOCKED",
	[ilog2(0x4000) + 1] = "NORESERVE",
	[ilog2(0x8000) + 1] = "POPULATE",
	[ilog2(0x10000) + 1] = "NONBLOCK",
	[ilog2(0x20000) + 1] = "STACK",
	[ilog2(0x40000) + 1] = "HUGETLB",
	[ilog2(0x80000) + 1] = "SYNC",
  };
  $ uname -m
  x86_64
  $

Sparc, that defines a bunch then includes just mman-common.h:

  $ tools/perf/trace/beauty/mmap_flags.sh sparc
  static const char *mmap_flags[] = {
	[ilog2(0x0800) + 1] = "DENYWRITE",
	[ilog2(0x1000) + 1] = "EXECUTABLE",
	[ilog2(0x0200) + 1] = "GROWSDOWN",
	[ilog2(0x40000) + 1] = "HUGETLB",
	[ilog2(0x100) + 1] = "LOCKED",
	[ilog2(0x10000) + 1] = "NONBLOCK",
	[ilog2(0x40) + 1] = "NORESERVE",
	[ilog2(0x8000) + 1] = "POPULATE",
	[ilog2(0x20000) + 1] = "STACK",
	[ilog2(0x01) + 1] = "SHARED",
	[ilog2(0x02) + 1] = "PRIVATE",
	[ilog2(0x10) + 1] = "FIXED",
	[ilog2(0x20) + 1] = "ANONYMOUS",
	[ilog2(0x100000) + 1] = "FIXED_NOREPLACE",
  };
  [acme@jouet perf]$

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-xydeh491z8fkgglcmqnl5thj@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/trace/beauty/mmap_flags.sh | 32 +++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)
 create mode 100755 tools/perf/trace/beauty/mmap_flags.sh

diff --git a/tools/perf/trace/beauty/mmap_flags.sh b/tools/perf/trace/beauty/mmap_flags.sh
new file mode 100755
index 0000000000000..22c3fdca89751
--- /dev/null
+++ b/tools/perf/trace/beauty/mmap_flags.sh
@@ -0,0 +1,32 @@
+#!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
+
+if [ $# -ne 2 ] ; then
+	[ $# -eq 1 ] && hostarch=$1 || hostarch=`uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/`
+	header_dir=tools/include/uapi/asm-generic
+	arch_header_dir=tools/arch/${hostarch}/include/uapi/asm
+else
+	header_dir=$1
+	arch_header_dir=$2
+fi
+
+arch_mman=${arch_header_dir}/mman.h
+
+# those in egrep -vw are flags, we want just the bits
+
+printf "static const char *mmap_flags[] = {\n"
+regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MAP_([[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*'
+egrep -q $regex ${arch_mman} && \
+(egrep $regex ${arch_mman} | \
+	sed -r "s/$regex/\2 \1/g"	| \
+	xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n")
+egrep -q '#[[:space:]]*include[[:space:]]+<uapi/asm-generic/mman.*' ${arch_mman} &&
+(egrep $regex ${header_dir}/mman-common.h | \
+	egrep -vw 'MAP_(UNINITIALIZED|TYPE|SHARED_VALIDATE)' | \
+	sed -r "s/$regex/\2 \1/g"	| \
+	xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n")
+egrep -q '#[[:space:]]*include[[:space:]]+<uapi/asm-generic/mman.h>.*' ${arch_mman} &&
+(egrep $regex ${header_dir}/mman.h | \
+	sed -r "s/$regex/\2 \1/g"	| \
+	xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n")
+printf "};\n"
-- 
GitLab


From fbd7458db75790e37903c75be68513a4c087737c Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 30 Oct 2018 16:11:59 -0300
Subject: [PATCH 0362/1890] perf beauty: Wire up the mmap flags table generator
 to the Makefile

Now when we run 'make -C tools/perf O=/tmp/build/perf' we end up with:

  $ cat /tmp/build/perf/trace/beauty/generated/mmap_flags_array.c
  static const char *mmap_flags[] = {
	[ilog2(0x40) + 1] = "32BIT",
	[ilog2(0x01) + 1] = "SHARED",
	[ilog2(0x02) + 1] = "PRIVATE",
	[ilog2(0x10) + 1] = "FIXED",
	[ilog2(0x20) + 1] = "ANONYMOUS",
	[ilog2(0x100000) + 1] = "FIXED_NOREPLACE",
	[ilog2(0x0100) + 1] = "GROWSDOWN",
	[ilog2(0x0800) + 1] = "DENYWRITE",
	[ilog2(0x1000) + 1] = "EXECUTABLE",
	[ilog2(0x2000) + 1] = "LOCKED",
	[ilog2(0x4000) + 1] = "NORESERVE",
	[ilog2(0x8000) + 1] = "POPULATE",
	[ilog2(0x10000) + 1] = "NONBLOCK",
	[ilog2(0x20000) + 1] = "STACK",
	[ilog2(0x40000) + 1] = "HUGETLB",
	[ilog2(0x80000) + 1] = "SYNC",
  };
  $

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-t3fn7u3tjsupio6e6vkufx9m@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Makefile.perf | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index a31c5c29d53ab..3ccb4f0bf0883 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -1,4 +1,5 @@
 include ../scripts/Makefile.include
+include ../scripts/Makefile.arch
 
 # The default target of this Makefile is...
 all:
@@ -385,6 +386,8 @@ export INSTALL SHELL_PATH
 SHELL = $(SHELL_PATH)
 
 linux_uapi_dir := $(srctree)/tools/include/uapi/linux
+asm_generic_uapi_dir := $(srctree)/tools/include/uapi/asm-generic
+arch_asm_uapi_dir := $(srctree)/tools/arch/$(ARCH)/include/uapi/asm/
 
 beauty_outdir := $(OUTPUT)trace/beauty/generated
 beauty_ioctl_outdir := $(beauty_outdir)/ioctl
@@ -460,6 +463,12 @@ madvise_behavior_tbl := $(srctree)/tools/perf/trace/beauty/madvise_behavior.sh
 $(madvise_behavior_array): $(madvise_hdr_dir)/mman-common.h $(madvise_behavior_tbl)
 	$(Q)$(SHELL) '$(madvise_behavior_tbl)' $(madvise_hdr_dir) > $@
 
+mmap_flags_array := $(beauty_outdir)/mmap_flags_array.c
+mmap_flags_tbl := $(srctree)/tools/perf/trace/beauty/mmap_flags.sh
+
+$(mmap_flags_array): $(asm_generic_uapi_dir)/mman.h $(asm_generic_uapi_dir)/mman-common.h $(arch_asm_uapi_dir)/mman.h $(mmap_flags_tbl)
+	$(Q)$(SHELL) '$(mmap_flags_tbl)' $(asm_generic_uapi_dir) $(arch_asm_uapi_dir) > $@
+
 mount_flags_array := $(beauty_outdir)/mount_flags_array.c
 mount_flags_tbl := $(srctree)/tools/perf/trace/beauty/mount_flags.sh
 
@@ -583,6 +592,7 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc
 	$(socket_ipproto_array) \
 	$(vhost_virtio_ioctl_array) \
 	$(madvise_behavior_array) \
+	$(mmap_flags_array) \
 	$(mount_flags_array) \
 	$(perf_ioctl_array) \
 	$(prctl_option_array) \
@@ -870,6 +880,7 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea
 		$(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \
 		$(OUTPUT)pmu-events/pmu-events.c \
 		$(OUTPUT)$(madvise_behavior_array) \
+		$(OUTPUT)$(mmap_flags_array) \
 		$(OUTPUT)$(mount_flags_array) \
 		$(OUTPUT)$(drm_ioctl_array) \
 		$(OUTPUT)$(pkey_alloc_access_rights_array) \
-- 
GitLab


From 2f967f1dbbc1f746ad0deda8352bcf563d59e1c7 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 30 Oct 2018 16:30:38 -0300
Subject: [PATCH 0363/1890] perf trace beauty: Use the mmap flags table
 generated from headers

Instead of requiring us to go on and edit sources to add new flag.

  # perf trace -e *mmap sleep 0.1
     0.025 ( 0.005 ms): sleep/29876 mmap(len: 163746, prot: READ, flags: PRIVATE, fd: 3) = 0x7faa68ad1000
     0.059 ( 0.004 ms): sleep/29876 mmap(len: 8192, prot: READ|WRITE, flags: PRIVATE|ANONYMOUS) = 0x7faa68acf000
     0.069 ( 0.006 ms): sleep/29876 mmap(len: 3889792, prot: EXEC|READ, flags: PRIVATE|DENYWRITE, fd: 3) = 0x7faa6851f000
     0.086 ( 0.009 ms): sleep/29876 mmap(addr: 0x7faa688cb000, len: 24576, prot: READ|WRITE, flags: PRIVATE|FIXED|DENYWRITE, fd: 3, off: 1753088) = 0x7faa688cb000
     0.101 ( 0.005 ms): sleep/29876 mmap(addr: 0x7faa688d1000, len: 14976, prot: READ|WRITE, flags: PRIVATE|FIXED|ANONYMOUS) = 0x7faa688d1000
     0.348 ( 0.005 ms): sleep/29876 mmap(len: 111950656, prot: READ, flags: PRIVATE, fd: 3) = 0x7faa61a5b000
  #

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-ggmoy6vxoygh5yim890ht0kf@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/trace/beauty/mmap.c | 48 ++++++++--------------------------
 1 file changed, 11 insertions(+), 37 deletions(-)

diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c
index 0605593552c6d..c534bd96ef5c9 100644
--- a/tools/perf/trace/beauty/mmap.c
+++ b/tools/perf/trace/beauty/mmap.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: LGPL-2.1
 #include <uapi/linux/mman.h>
+#include <linux/log2.h>
 
 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
 					       struct syscall_arg *arg)
@@ -30,50 +31,23 @@ static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
 
 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
 
+static size_t mmap__scnprintf_flags(unsigned long flags, char *bf, size_t size)
+{
+#include "trace/beauty/generated/mmap_flags_array.c"
+       static DEFINE_STRARRAY(mmap_flags);
+
+       return strarray__scnprintf_flags(&strarray__mmap_flags, bf, size, flags);
+}
+
 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
 						struct syscall_arg *arg)
 {
-	int printed = 0, flags = arg->val;
+	unsigned long flags = arg->val;
 
 	if (flags & MAP_ANONYMOUS)
 		arg->mask |= (1 << 4) | (1 << 5); /* Mask 4th ('fd') and 5th ('offset') args, ignored */
 
-#define	P_MMAP_FLAG(n) \
-	if (flags & MAP_##n) { \
-		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
-		flags &= ~MAP_##n; \
-	}
-
-	P_MMAP_FLAG(SHARED);
-	P_MMAP_FLAG(PRIVATE);
-#ifdef MAP_32BIT
-	P_MMAP_FLAG(32BIT);
-#endif
-	P_MMAP_FLAG(ANONYMOUS);
-	P_MMAP_FLAG(DENYWRITE);
-	P_MMAP_FLAG(EXECUTABLE);
-	P_MMAP_FLAG(FILE);
-	P_MMAP_FLAG(FIXED);
-#ifdef MAP_FIXED_NOREPLACE
-	P_MMAP_FLAG(FIXED_NOREPLACE);
-#endif
-	P_MMAP_FLAG(GROWSDOWN);
-	P_MMAP_FLAG(HUGETLB);
-	P_MMAP_FLAG(LOCKED);
-	P_MMAP_FLAG(NONBLOCK);
-	P_MMAP_FLAG(NORESERVE);
-	P_MMAP_FLAG(POPULATE);
-	P_MMAP_FLAG(STACK);
-	P_MMAP_FLAG(UNINITIALIZED);
-#ifdef MAP_SYNC
-	P_MMAP_FLAG(SYNC);
-#endif
-#undef P_MMAP_FLAG
-
-	if (flags)
-		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
-
-	return printed;
+	return mmap__scnprintf_flags(flags, bf, size);
 }
 
 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
-- 
GitLab


From 685626dc26bd9cead850d06520708acbd16bcfda Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 30 Oct 2018 16:50:08 -0300
Subject: [PATCH 0364/1890] tools include uapi: Update linux/mmap.h copy

To pick up the changes from:

  20916d4636a9 ("mm/hugetlb: add mmap() encodings for 32MB and 512MB page sizes")

That do not entail changes in in tools, this just shows that we have to
consider bits [26:31] of flags to beautify that in tools like 'perf
trace'

This silences this perf build warning:

  Warning: Kernel ABI header at 'tools/include/uapi/linux/mman.h' differs from latest version at 'include/uapi/linux/mman.h'
  diff -u tools/include/uapi/linux/mman.h include/uapi/linux/mman.h

Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: https://lkml.kernel.org/n/tip-3rvc39lon93kgt5pl31d8g4x@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/linux/mman.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/include/uapi/linux/mman.h b/tools/include/uapi/linux/mman.h
index bfd5938fede6c..d0f515d53299e 100644
--- a/tools/include/uapi/linux/mman.h
+++ b/tools/include/uapi/linux/mman.h
@@ -28,7 +28,9 @@
 #define MAP_HUGE_2MB	HUGETLB_FLAG_ENCODE_2MB
 #define MAP_HUGE_8MB	HUGETLB_FLAG_ENCODE_8MB
 #define MAP_HUGE_16MB	HUGETLB_FLAG_ENCODE_16MB
+#define MAP_HUGE_32MB	HUGETLB_FLAG_ENCODE_32MB
 #define MAP_HUGE_256MB	HUGETLB_FLAG_ENCODE_256MB
+#define MAP_HUGE_512MB	HUGETLB_FLAG_ENCODE_512MB
 #define MAP_HUGE_1GB	HUGETLB_FLAG_ENCODE_1GB
 #define MAP_HUGE_2GB	HUGETLB_FLAG_ENCODE_2GB
 #define MAP_HUGE_16GB	HUGETLB_FLAG_ENCODE_16GB
-- 
GitLab


From 827758129a0f84fbd0b2dda15e14a77a7604803d Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 30 Oct 2018 17:01:46 -0300
Subject: [PATCH 0365/1890] tools headers: Sync the various kvm.h header copies

For powerpc, s390, x86 and the main uapi linux/kvm.h header, none of
them entail changes in tooling.

Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: https://lkml.kernel.org/n/tip-avn7iy8f4tcm2y40sbsdk31m@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/powerpc/include/uapi/asm/kvm.h |  1 +
 tools/arch/s390/include/uapi/asm/kvm.h    |  2 ++
 tools/arch/x86/include/uapi/asm/kvm.h     |  6 ++----
 tools/include/uapi/linux/kvm.h            | 21 +++++++++++++++++++--
 4 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h
index 1b32b56a03d34..8c876c166ef27 100644
--- a/tools/arch/powerpc/include/uapi/asm/kvm.h
+++ b/tools/arch/powerpc/include/uapi/asm/kvm.h
@@ -634,6 +634,7 @@ struct kvm_ppc_cpu_char {
 
 #define KVM_REG_PPC_DEC_EXPIRY	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe)
 #define KVM_REG_PPC_ONLINE	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbf)
+#define KVM_REG_PPC_PTCR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc0)
 
 /* Transactional Memory checkpointed state:
  * This is all GPRs, all VSX regs and a subset of SPRs
diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h
index 9a50f02b98946..16511d97e8dc0 100644
--- a/tools/arch/s390/include/uapi/asm/kvm.h
+++ b/tools/arch/s390/include/uapi/asm/kvm.h
@@ -160,6 +160,8 @@ struct kvm_s390_vm_cpu_subfunc {
 #define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW	1
 #define KVM_S390_VM_CRYPTO_DISABLE_AES_KW	2
 #define KVM_S390_VM_CRYPTO_DISABLE_DEA_KW	3
+#define KVM_S390_VM_CRYPTO_ENABLE_APIE		4
+#define KVM_S390_VM_CRYPTO_DISABLE_APIE		5
 
 /* kvm attributes for migration mode */
 #define KVM_S390_VM_MIGRATION_STOP	0
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 8a6eff9c27f3f..dabfcf7c3941a 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -300,10 +300,7 @@ struct kvm_vcpu_events {
 		__u8 injected;
 		__u8 nr;
 		__u8 has_error_code;
-		union {
-			__u8 pad;
-			__u8 pending;
-		};
+		__u8 pending;
 		__u32 error_code;
 	} exception;
 	struct {
@@ -387,6 +384,7 @@ struct kvm_sync_regs {
 
 #define KVM_STATE_NESTED_GUEST_MODE	0x00000001
 #define KVM_STATE_NESTED_RUN_PENDING	0x00000002
+#define KVM_STATE_NESTED_EVMCS		0x00000004
 
 #define KVM_STATE_NESTED_SMM_GUEST_MODE	0x00000001
 #define KVM_STATE_NESTED_SMM_VMXON	0x00000002
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 2875ce85b3226..2b7a652c9fa46 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -420,13 +420,19 @@ struct kvm_run {
 struct kvm_coalesced_mmio_zone {
 	__u64 addr;
 	__u32 size;
-	__u32 pad;
+	union {
+		__u32 pad;
+		__u32 pio;
+	};
 };
 
 struct kvm_coalesced_mmio {
 	__u64 phys_addr;
 	__u32 len;
-	__u32 pad;
+	union {
+		__u32 pad;
+		__u32 pio;
+	};
 	__u8  data[8];
 };
 
@@ -751,6 +757,15 @@ struct kvm_ppc_resize_hpt {
 
 #define KVM_S390_SIE_PAGE_OFFSET 1
 
+/*
+ * On arm64, machine type can be used to request the physical
+ * address size for the VM. Bits[7-0] are reserved for the guest
+ * PA size shift (i.e, log2(PA_Size)). For backward compatibility,
+ * value 0 implies the default IPA size, 40bits.
+ */
+#define KVM_VM_TYPE_ARM_IPA_SIZE_MASK	0xffULL
+#define KVM_VM_TYPE_ARM_IPA_SIZE(x)		\
+	((x) & KVM_VM_TYPE_ARM_IPA_SIZE_MASK)
 /*
  * ioctls for /dev/kvm fds:
  */
@@ -958,6 +973,8 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_HYPERV_SEND_IPI 161
 #define KVM_CAP_COALESCED_PIO 162
 #define KVM_CAP_HYPERV_ENLIGHTENED_VMCS 163
+#define KVM_CAP_EXCEPTION_PAYLOAD 164
+#define KVM_CAP_ARM_VM_IPA_SIZE 165
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
GitLab


From d45a57fff0a657045a77b395ae713ffae0cb4e46 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 30 Oct 2018 17:04:47 -0300
Subject: [PATCH 0366/1890] tools headers uapi: Update linux/netlink.h header
 copy

Picking the changes from:

  89d35528d17d ("netlink: Add new socket option to enable strict checking on dumps")

To silence this build warning:

  Warning: Kernel ABI header at 'tools/include/uapi/linux/netlink.h' differs from latest version at 'include/uapi/linux/netlink.h'

Cc: Alexei Starovoitov <ast@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Eric Leblond <eric@regit.org>
Link: https://lkml.kernel.org/n/tip-1xymkfjpmhxfzrs46t8z8mjw@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/linux/netlink.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/include/uapi/linux/netlink.h b/tools/include/uapi/linux/netlink.h
index 776bc92e91180..486ed1f0c0bc1 100644
--- a/tools/include/uapi/linux/netlink.h
+++ b/tools/include/uapi/linux/netlink.h
@@ -155,6 +155,7 @@ enum nlmsgerr_attrs {
 #define NETLINK_LIST_MEMBERSHIPS	9
 #define NETLINK_CAP_ACK			10
 #define NETLINK_EXT_ACK			11
+#define NETLINK_DUMP_STRICT_CHK		12
 
 struct nl_pktinfo {
 	__u32	group;
-- 
GitLab


From 76b0b801782b34b3028dcef3de36cb634e3908a8 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 30 Oct 2018 17:06:57 -0300
Subject: [PATCH 0367/1890] tools headers uapi: Update linux/if_link.h header
 copy

To pick the changes from:

  9163a0fc1f0c ("net: bridge: add support for per-port vlan stats")

And silence this build warning:

  Warning: Kernel ABI header at 'tools/include/uapi/linux/if_link.h' differs from latest version at 'include/uapi/linux/if_link.h'

Cc: Alexei Starovoitov <ast@kernel.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: Eric Leblond <eric@regit.org>
Cc: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Link: https://lkml.kernel.org/n/tip-7p53ghippywz7fqkwo3nkzet@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/linux/if_link.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index 58faab897201f..1debfa42cba1a 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -287,6 +287,7 @@ enum {
 	IFLA_BR_MCAST_STATS_ENABLED,
 	IFLA_BR_MCAST_IGMP_VERSION,
 	IFLA_BR_MCAST_MLD_VERSION,
+	IFLA_BR_VLAN_STATS_PER_PORT,
 	__IFLA_BR_MAX,
 };
 
-- 
GitLab


From ff27a06af6ffd3f49b9e193eb68f487ad76651e1 Mon Sep 17 00:00:00 2001
From: David Miller <davem@davemloft.net>
Date: Tue, 30 Oct 2018 22:30:03 -0700
Subject: [PATCH 0368/1890] perf top: Start display thread earlier

If events are coming in at a rate such that the event processing thread
can barely keep up, our initial run of the event ring will almost never
terminate and this delays the starting of the display thread.

The screen basically stays black until the event thread can get out of
it's endless loop.

Therefore, start the display thread before we start processing the ring
buffer.

This also make sure that we always have the user requested real time
setting engaged when processing the ring.

Signed-off-by: David S. Miller <davem@davemloft.net>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/r/20181030.223003.2242527041807905962.davem@davemloft.net
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-top.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 8e29e0cc86263..b2838de13de02 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1134,11 +1134,6 @@ static int __cmd_top(struct perf_top *top)
         if (!target__none(&opts->target))
                 perf_evlist__enable(top->evlist);
 
-	/* Wait for a minimal set of events before starting the snapshot */
-	perf_evlist__poll(top->evlist, 100);
-
-	perf_top__mmap_read(top);
-
 	ret = -1;
 	if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui :
 							    display_thread), top)) {
@@ -1156,6 +1151,11 @@ static int __cmd_top(struct perf_top *top)
 		}
 	}
 
+	/* Wait for a minimal set of events before starting the snapshot */
+	perf_evlist__poll(top->evlist, 100);
+
+	perf_top__mmap_read(top);
+
 	while (!done) {
 		u64 hits = top->samples;
 
-- 
GitLab


From 4f8f382e635707ddaddf8269a116e4f8cc8835c0 Mon Sep 17 00:00:00 2001
From: David Miller <davem@davemloft.net>
Date: Tue, 30 Oct 2018 22:24:04 -0700
Subject: [PATCH 0369/1890] perf tools: Don't clone maps from parent when
 synthesizing forks

When synthesizing FORK events, we are trying to create thread objects
for the already running tasks on the machine.

Normally, for a kernel FORK event, we want to clone the parent's maps
because that is what the kernel just did.

But when synthesizing, this should not be done.  If we do, we end up
with overlapping maps as we process the sythesized MMAP2 events that
get delivered shortly thereafter.

Use the FORK event misc flags in an internal way to signal this
situation, so we can elide the map clone when appropriate.

Signed-off-by: David S. Miller <davem@davemloft.net>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Joe Mario <jmario@redhat.com>
Link: http://lkml.kernel.org/r/20181030.222404.2085088822877051075.davem@davemloft.net
[ Added comment about flag use in machine__process_fork_event(),
  use ternary op in thread__clone_map_groups() as suggested by Jiri ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 include/uapi/linux/perf_event.h       |  2 ++
 tools/include/uapi/linux/perf_event.h |  2 ++
 tools/perf/util/event.c               |  1 +
 tools/perf/util/machine.c             | 19 ++++++++++++++++++-
 tools/perf/util/thread.c              | 13 +++++--------
 tools/perf/util/thread.h              |  2 +-
 6 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index f35eb72739c09..9de8780ac8d97 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -646,10 +646,12 @@ struct perf_event_mmap_page {
  *
  *   PERF_RECORD_MISC_MMAP_DATA  - PERF_RECORD_MMAP* events
  *   PERF_RECORD_MISC_COMM_EXEC  - PERF_RECORD_COMM event
+ *   PERF_RECORD_MISC_FORK_EXEC  - PERF_RECORD_FORK event (perf internal)
  *   PERF_RECORD_MISC_SWITCH_OUT - PERF_RECORD_SWITCH* events
  */
 #define PERF_RECORD_MISC_MMAP_DATA		(1 << 13)
 #define PERF_RECORD_MISC_COMM_EXEC		(1 << 13)
+#define PERF_RECORD_MISC_FORK_EXEC		(1 << 13)
 #define PERF_RECORD_MISC_SWITCH_OUT		(1 << 13)
 /*
  * These PERF_RECORD_MISC_* flags below are safely reused
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index f35eb72739c09..9de8780ac8d97 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -646,10 +646,12 @@ struct perf_event_mmap_page {
  *
  *   PERF_RECORD_MISC_MMAP_DATA  - PERF_RECORD_MMAP* events
  *   PERF_RECORD_MISC_COMM_EXEC  - PERF_RECORD_COMM event
+ *   PERF_RECORD_MISC_FORK_EXEC  - PERF_RECORD_FORK event (perf internal)
  *   PERF_RECORD_MISC_SWITCH_OUT - PERF_RECORD_SWITCH* events
  */
 #define PERF_RECORD_MISC_MMAP_DATA		(1 << 13)
 #define PERF_RECORD_MISC_COMM_EXEC		(1 << 13)
+#define PERF_RECORD_MISC_FORK_EXEC		(1 << 13)
 #define PERF_RECORD_MISC_SWITCH_OUT		(1 << 13)
 /*
  * These PERF_RECORD_MISC_* flags below are safely reused
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index bc646185f8d91..e9c108a6b1c34 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -308,6 +308,7 @@ static int perf_event__synthesize_fork(struct perf_tool *tool,
 	event->fork.pid  = tgid;
 	event->fork.tid  = pid;
 	event->fork.header.type = PERF_RECORD_FORK;
+	event->fork.header.misc = PERF_RECORD_MISC_FORK_EXEC;
 
 	event->fork.header.size = (sizeof(event->fork) + machine->id_hdr_size);
 
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 8ee8ab39d8acf..8f36ce813bc5b 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1708,6 +1708,7 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event
 	struct thread *parent = machine__findnew_thread(machine,
 							event->fork.ppid,
 							event->fork.ptid);
+	bool do_maps_clone = true;
 	int err = 0;
 
 	if (dump_trace)
@@ -1736,9 +1737,25 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event
 
 	thread = machine__findnew_thread(machine, event->fork.pid,
 					 event->fork.tid);
+	/*
+	 * When synthesizing FORK events, we are trying to create thread
+	 * objects for the already running tasks on the machine.
+	 *
+	 * Normally, for a kernel FORK event, we want to clone the parent's
+	 * maps because that is what the kernel just did.
+	 *
+	 * But when synthesizing, this should not be done.  If we do, we end up
+	 * with overlapping maps as we process the sythesized MMAP2 events that
+	 * get delivered shortly thereafter.
+	 *
+	 * Use the FORK event misc flags in an internal way to signal this
+	 * situation, so we can elide the map clone when appropriate.
+	 */
+	if (event->fork.header.misc & PERF_RECORD_MISC_FORK_EXEC)
+		do_maps_clone = false;
 
 	if (thread == NULL || parent == NULL ||
-	    thread__fork(thread, parent, sample->time) < 0) {
+	    thread__fork(thread, parent, sample->time, do_maps_clone) < 0) {
 		dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n");
 		err = -1;
 	}
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 2048d393ece6f..3d9ed7d0e2818 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -330,7 +330,8 @@ static int thread__prepare_access(struct thread *thread)
 }
 
 static int thread__clone_map_groups(struct thread *thread,
-				    struct thread *parent)
+				    struct thread *parent,
+				    bool do_maps_clone)
 {
 	/* This is new thread, we share map groups for process. */
 	if (thread->pid_ == parent->pid_)
@@ -341,15 +342,11 @@ static int thread__clone_map_groups(struct thread *thread,
 			 thread->pid_, thread->tid, parent->pid_, parent->tid);
 		return 0;
 	}
-
 	/* But this one is new process, copy maps. */
-	if (map_groups__clone(thread, parent->mg) < 0)
-		return -ENOMEM;
-
-	return 0;
+	return do_maps_clone ? map_groups__clone(thread, parent->mg) : 0;
 }
 
-int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp)
+int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone)
 {
 	if (parent->comm_set) {
 		const char *comm = thread__comm_str(parent);
@@ -362,7 +359,7 @@ int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp)
 	}
 
 	thread->ppid = parent->tid;
-	return thread__clone_map_groups(thread, parent);
+	return thread__clone_map_groups(thread, parent, do_maps_clone);
 }
 
 void thread__find_cpumode_addr_location(struct thread *thread, u64 addr,
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 36c09a9904e66..30e2b4c165fe7 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -89,7 +89,7 @@ struct comm *thread__comm(const struct thread *thread);
 struct comm *thread__exec_comm(const struct thread *thread);
 const char *thread__comm_str(const struct thread *thread);
 int thread__insert_map(struct thread *thread, struct map *map);
-int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp);
+int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone);
 size_t thread__fprintf(struct thread *thread, FILE *fp);
 
 struct thread *thread__main_thread(struct machine *machine, struct thread *thread);
-- 
GitLab


From c57cdf7a9e51d97a43e29b8f4a04157875104000 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Wed, 24 Oct 2018 21:18:09 +0800
Subject: [PATCH 0370/1890] block: call rq_qos_exit() after queue is frozen

rq_qos_exit() removes the current q->rq_qos, this action has to be
done after queue is frozen, otherwise the IO queue path may never
be waken up, then IO hang is caused.

So fixes this issue by moving rq_qos_exit() after queue is frozen.

Cc: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c  | 3 +++
 block/blk-sysfs.c | 2 --
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index bc6ea87d10e02..26a5dac80ed98 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -785,6 +785,9 @@ void blk_cleanup_queue(struct request_queue *q)
 	 * prevent that q->request_fn() gets invoked after draining finished.
 	 */
 	blk_freeze_queue(q);
+
+	rq_qos_exit(q);
+
 	spin_lock_irq(lock);
 	queue_flag_set(QUEUE_FLAG_DEAD, q);
 	spin_unlock_irq(lock);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 0641533597f1b..844a454a7b3a6 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -1007,8 +1007,6 @@ void blk_unregister_queue(struct gendisk *disk)
 	kobject_del(&q->kobj);
 	blk_trace_remove_sysfs(disk_to_dev(disk));
 
-	rq_qos_exit(q);
-
 	mutex_lock(&q->sysfs_lock);
 	if (q->request_fn || (q->mq_ops && q->elevator))
 		elv_unregister_queue(q);
-- 
GitLab


From 242483068b4b9ad02f1653819b6e683577681e0e Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 31 Oct 2018 11:10:42 +0200
Subject: [PATCH 0371/1890] perf intel-pt: Insert callchain context into
 synthesized callchains

In the absence of a fallback, callchains must encode also the callchain
context. Do that now there is no fallback.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Reviewed-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: stable@vger.kernel.org # 4.19
Link: http://lkml.kernel.org/r/100ea2ec-ed14-b56d-d810-e0a6d2f4b069@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/intel-pt.c     |  6 +++--
 tools/perf/util/thread-stack.c | 44 +++++++++++++++++++++++++++-------
 tools/perf/util/thread-stack.h |  2 +-
 3 files changed, 40 insertions(+), 12 deletions(-)

diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index ffa385a029b3f..60732213d16a1 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -759,7 +759,8 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
 	if (pt->synth_opts.callchain) {
 		size_t sz = sizeof(struct ip_callchain);
 
-		sz += pt->synth_opts.callchain_sz * sizeof(u64);
+		/* Add 1 to callchain_sz for callchain context */
+		sz += (pt->synth_opts.callchain_sz + 1) * sizeof(u64);
 		ptq->chain = zalloc(sz);
 		if (!ptq->chain)
 			goto out_free;
@@ -1160,7 +1161,8 @@ static void intel_pt_prep_sample(struct intel_pt *pt,
 
 	if (pt->synth_opts.callchain) {
 		thread_stack__sample(ptq->thread, ptq->chain,
-				     pt->synth_opts.callchain_sz, sample->ip);
+				     pt->synth_opts.callchain_sz + 1,
+				     sample->ip, pt->kernel_start);
 		sample->callchain = ptq->chain;
 	}
 
diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c
index c091635bf7dcb..61a4286a74dc9 100644
--- a/tools/perf/util/thread-stack.c
+++ b/tools/perf/util/thread-stack.c
@@ -310,20 +310,46 @@ void thread_stack__free(struct thread *thread)
 	}
 }
 
+static inline u64 callchain_context(u64 ip, u64 kernel_start)
+{
+	return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL;
+}
+
 void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
-			  size_t sz, u64 ip)
+			  size_t sz, u64 ip, u64 kernel_start)
 {
-	size_t i;
+	u64 context = callchain_context(ip, kernel_start);
+	u64 last_context;
+	size_t i, j;
 
-	if (!thread || !thread->ts)
-		chain->nr = 1;
-	else
-		chain->nr = min(sz, thread->ts->cnt + 1);
+	if (sz < 2) {
+		chain->nr = 0;
+		return;
+	}
 
-	chain->ips[0] = ip;
+	chain->ips[0] = context;
+	chain->ips[1] = ip;
+
+	if (!thread || !thread->ts) {
+		chain->nr = 2;
+		return;
+	}
+
+	last_context = context;
+
+	for (i = 2, j = 1; i < sz && j <= thread->ts->cnt; i++, j++) {
+		ip = thread->ts->stack[thread->ts->cnt - j].ret_addr;
+		context = callchain_context(ip, kernel_start);
+		if (context != last_context) {
+			if (i >= sz - 1)
+				break;
+			chain->ips[i++] = context;
+			last_context = context;
+		}
+		chain->ips[i] = ip;
+	}
 
-	for (i = 1; i < chain->nr; i++)
-		chain->ips[i] = thread->ts->stack[thread->ts->cnt - i].ret_addr;
+	chain->nr = i;
 }
 
 struct call_return_processor *
diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h
index b7e41c4ebfdd9..f97c00a8c2514 100644
--- a/tools/perf/util/thread-stack.h
+++ b/tools/perf/util/thread-stack.h
@@ -84,7 +84,7 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
 			u64 to_ip, u16 insn_len, u64 trace_nr);
 void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr);
 void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
-			  size_t sz, u64 ip);
+			  size_t sz, u64 ip, u64 kernel_start);
 int thread_stack__flush(struct thread *thread);
 void thread_stack__free(struct thread *thread);
 size_t thread_stack__depth(struct thread *thread);
-- 
GitLab


From 5d4f0edaa3ac4f1844ed7c64cd2bae6f1912bac5 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 31 Oct 2018 11:10:43 +0200
Subject: [PATCH 0372/1890] perf intel-pt/bts: Calculate cpumode for
 synthesized samples

In the absence of a fallback, samples must provide a correct cpumode for
the 'ip'. Do that now there is no fallback.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Reviewed-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: stable@vger.kernel.org # 4.19
Link: http://lkml.kernel.org/r/20181031091043.23465-6-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/intel-bts.c | 17 ++++++++++++-----
 tools/perf/util/intel-pt.c  | 22 +++++++++++++---------
 2 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index 3b3a3d55dca18..7b27d77306c22 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c
@@ -269,6 +269,13 @@ static int intel_bts_do_fix_overlap(struct auxtrace_queue *queue,
 	return 0;
 }
 
+static inline u8 intel_bts_cpumode(struct intel_bts *bts, uint64_t ip)
+{
+	return machine__kernel_ip(bts->machine, ip) ?
+	       PERF_RECORD_MISC_KERNEL :
+	       PERF_RECORD_MISC_USER;
+}
+
 static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
 					 struct branch *branch)
 {
@@ -281,12 +288,8 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
 	    bts->num_events++ <= bts->synth_opts.initial_skip)
 		return 0;
 
-	event.sample.header.type = PERF_RECORD_SAMPLE;
-	event.sample.header.misc = PERF_RECORD_MISC_USER;
-	event.sample.header.size = sizeof(struct perf_event_header);
-
-	sample.cpumode = PERF_RECORD_MISC_USER;
 	sample.ip = le64_to_cpu(branch->from);
+	sample.cpumode = intel_bts_cpumode(bts, sample.ip);
 	sample.pid = btsq->pid;
 	sample.tid = btsq->tid;
 	sample.addr = le64_to_cpu(branch->to);
@@ -298,6 +301,10 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
 	sample.insn_len = btsq->intel_pt_insn.length;
 	memcpy(sample.insn, btsq->intel_pt_insn.buf, INTEL_PT_INSN_BUF_SZ);
 
+	event.sample.header.type = PERF_RECORD_SAMPLE;
+	event.sample.header.misc = sample.cpumode;
+	event.sample.header.size = sizeof(struct perf_event_header);
+
 	if (bts->synth_opts.inject) {
 		event.sample.header.size = bts->branches_event_size;
 		ret = perf_event__synthesize_sample(&event,
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 60732213d16a1..86cc9a64e9827 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -407,6 +407,13 @@ intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset)
 	return auxtrace_cache__lookup(dso->auxtrace_cache, offset);
 }
 
+static inline u8 intel_pt_cpumode(struct intel_pt *pt, uint64_t ip)
+{
+	return ip >= pt->kernel_start ?
+	       PERF_RECORD_MISC_KERNEL :
+	       PERF_RECORD_MISC_USER;
+}
+
 static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
 				   uint64_t *insn_cnt_ptr, uint64_t *ip,
 				   uint64_t to_ip, uint64_t max_insn_cnt,
@@ -429,10 +436,7 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
 	if (to_ip && *ip == to_ip)
 		goto out_no_cache;
 
-	if (*ip >= ptq->pt->kernel_start)
-		cpumode = PERF_RECORD_MISC_KERNEL;
-	else
-		cpumode = PERF_RECORD_MISC_USER;
+	cpumode = intel_pt_cpumode(ptq->pt, *ip);
 
 	thread = ptq->thread;
 	if (!thread) {
@@ -1059,15 +1063,11 @@ static void intel_pt_prep_b_sample(struct intel_pt *pt,
 				   union perf_event *event,
 				   struct perf_sample *sample)
 {
-	event->sample.header.type = PERF_RECORD_SAMPLE;
-	event->sample.header.misc = PERF_RECORD_MISC_USER;
-	event->sample.header.size = sizeof(struct perf_event_header);
-
 	if (!pt->timeless_decoding)
 		sample->time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
 
-	sample->cpumode = PERF_RECORD_MISC_USER;
 	sample->ip = ptq->state->from_ip;
+	sample->cpumode = intel_pt_cpumode(pt, sample->ip);
 	sample->pid = ptq->pid;
 	sample->tid = ptq->tid;
 	sample->addr = ptq->state->to_ip;
@@ -1076,6 +1076,10 @@ static void intel_pt_prep_b_sample(struct intel_pt *pt,
 	sample->flags = ptq->flags;
 	sample->insn_len = ptq->insn_len;
 	memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
+
+	event->sample.header.type = PERF_RECORD_SAMPLE;
+	event->sample.header.misc = sample->cpumode;
+	event->sample.header.size = sizeof(struct perf_event_header);
 }
 
 static int intel_pt_inject_event(union perf_event *event,
-- 
GitLab


From 094bf4d0e9657f6ea1ee3d7e07ce3970796949ce Mon Sep 17 00:00:00 2001
From: Miroslav Lichvar <mlichvar@redhat.com>
Date: Fri, 12 Oct 2018 13:13:39 +0200
Subject: [PATCH 0373/1890] igb: shorten maximum PHC timecounter update
 interval

The timecounter needs to be updated at least once per ~550 seconds in
order to avoid a 40-bit SYSTIM timestamp to be misinterpreted as an old
timestamp.

Since commit 500462a9d ("timers: Switch to a non-cascading wheel"),
scheduling of delayed work seems to be less accurate and a requested
delay of 540 seconds may actually be longer than 550 seconds. Shorten
the delay to 480 seconds to be sure the timecounter is updated in time.

This fixes an issue with HW timestamps on 82580/I350/I354 being off by
~1100 seconds for few seconds every ~9 minutes.

Cc: Jacob Keller <jacob.e.keller@intel.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Miroslav Lichvar <mlichvar@redhat.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/igb_ptp.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
index 9f4d700e09df3..29ced6b74d364 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -51,9 +51,15 @@
  *
  * The 40 bit 82580 SYSTIM overflows every
  *   2^40 * 10^-9 /  60  = 18.3 minutes.
+ *
+ * SYSTIM is converted to real time using a timecounter. As
+ * timecounter_cyc2time() allows old timestamps, the timecounter
+ * needs to be updated at least once per half of the SYSTIM interval.
+ * Scheduling of delayed work is not very accurate, so we aim for 8
+ * minutes to be sure the actual interval is shorter than 9.16 minutes.
  */
 
-#define IGB_SYSTIM_OVERFLOW_PERIOD	(HZ * 60 * 9)
+#define IGB_SYSTIM_OVERFLOW_PERIOD	(HZ * 60 * 8)
 #define IGB_PTP_TX_TIMEOUT		(HZ * 15)
 #define INCPERIOD_82576			BIT(E1000_TIMINCA_16NS_SHIFT)
 #define INCVALUE_82576_MASK		GENMASK(E1000_TIMINCA_16NS_SHIFT - 1, 0)
-- 
GitLab


From e69e40c8066c0e2fafd63a5919cb9b2fb9f81f2e Mon Sep 17 00:00:00 2001
From: Ngai-Mint Kwan <ngai-mint.kwan@intel.com>
Date: Mon, 15 Oct 2018 12:18:27 -0700
Subject: [PATCH 0374/1890] fm10k: fix SM mailbox full condition

Current condition will always incorrectly report a full SM mailbox if an
IES API application is not running. Due to this, the
"fm10k_service_task" will be infinitely queued into the driver's
workqueue. This, in turn, will cause a "kworker" thread to report 100%
CPU utilization and might cause "soft lockup" events or system crashes.

To fix this issue, a new condition is added to determine if the SM
mailbox is in the correct state of FM10K_STATE_OPEN before proceeding.
In other words, an instance of the IES API must be running. If there is,
the remainder of the flow stays the same which is to determine if the SM
mailbox capacity has been exceeded or not and take appropriate action.

Signed-off-by: Ngai-Mint Kwan <ngai-mint.kwan@intel.com>
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/fm10k/fm10k_iov.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
index e707d717012fa..74160c2095ee4 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
@@ -244,7 +244,8 @@ s32 fm10k_iov_mbx(struct fm10k_intfc *interface)
 		}
 
 		/* guarantee we have free space in the SM mailbox */
-		if (!hw->mbx.ops.tx_ready(&hw->mbx, FM10K_VFMBX_MSG_MTU)) {
+		if (hw->mbx.state == FM10K_STATE_OPEN &&
+		    !hw->mbx.ops.tx_ready(&hw->mbx, FM10K_VFMBX_MSG_MTU)) {
 			/* keep track of how many times this occurs */
 			interface->hw_sm_mbx_full++;
 
-- 
GitLab


From e330af788998b0de4da4f5bd7ddd087507999800 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Mon, 15 Oct 2018 12:18:28 -0700
Subject: [PATCH 0375/1890] fm10k: ensure completer aborts are marked as
 non-fatal after a resume

VF drivers can trigger PCIe completer aborts any time they read a queue
that they don't own. Even in nominal circumstances, it is not possible
to prevent the VF driver from reading queues it doesn't own. VF drivers
may attempt to read queues it previously owned, but which it no longer
does due to a PF reset.

Normally these completer aborts aren't an issue. However, on some
platforms these trigger machine check errors. This is true even if we
lower their severity from fatal to non-fatal. Indeed, we already have
code for lowering the severity.

We could attempt to mask these errors conditionally around resets, which
is the most common time they would occur. However this would essentially
be a race between the PF and VF drivers, and we may still occasionally
see machine check exceptions on these strictly configured platforms.

Instead, mask the errors entirely any time we resume VFs. By doing so,
we prevent the completer aborts from being sent to the parent PCIe
device, and thus these strict platforms will not upgrade them into
machine check errors.

Additionally, we don't lose any information by masking these errors,
because we'll still report VFs which attempt to access queues via the
FUM_BAD_VF_QACCESS errors.

Without this change, on platforms where completer aborts cause machine
check exceptions, the VF reading queues it doesn't own could crash the
host system. Masking the completer abort prevents this, so we should
mask it for good, and not just around a PCIe reset. Otherwise malicious
or misconfigured VFs could cause the host system to crash.

Because we are masking the error entirely, there is little reason to
also keep setting the severity bit, so that code is also removed.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/fm10k/fm10k_iov.c | 48 ++++++++++++--------
 1 file changed, 28 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
index 74160c2095ee4..5d4f1761dc0c2 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
@@ -303,6 +303,28 @@ void fm10k_iov_suspend(struct pci_dev *pdev)
 	}
 }
 
+static void fm10k_mask_aer_comp_abort(struct pci_dev *pdev)
+{
+	u32 err_mask;
+	int pos;
+
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR);
+	if (!pos)
+		return;
+
+	/* Mask the completion abort bit in the ERR_UNCOR_MASK register,
+	 * preventing the device from reporting these errors to the upstream
+	 * PCIe root device. This avoids bringing down platforms which upgrade
+	 * non-fatal completer aborts into machine check exceptions. Completer
+	 * aborts can occur whenever a VF reads a queue it doesn't own.
+	 */
+	pci_read_config_dword(pdev, pos + PCI_ERR_UNCOR_MASK, &err_mask);
+	err_mask |= PCI_ERR_UNC_COMP_ABORT;
+	pci_write_config_dword(pdev, pos + PCI_ERR_UNCOR_MASK, err_mask);
+
+	mmiowb();
+}
+
 int fm10k_iov_resume(struct pci_dev *pdev)
 {
 	struct fm10k_intfc *interface = pci_get_drvdata(pdev);
@@ -318,6 +340,12 @@ int fm10k_iov_resume(struct pci_dev *pdev)
 	if (!iov_data)
 		return -ENOMEM;
 
+	/* Lower severity of completer abort error reporting as
+	 * the VFs can trigger this any time they read a queue
+	 * that they don't own.
+	 */
+	fm10k_mask_aer_comp_abort(pdev);
+
 	/* allocate hardware resources for the VFs */
 	hw->iov.ops.assign_resources(hw, num_vfs, num_vfs);
 
@@ -461,20 +489,6 @@ void fm10k_iov_disable(struct pci_dev *pdev)
 	fm10k_iov_free_data(pdev);
 }
 
-static void fm10k_disable_aer_comp_abort(struct pci_dev *pdev)
-{
-	u32 err_sev;
-	int pos;
-
-	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR);
-	if (!pos)
-		return;
-
-	pci_read_config_dword(pdev, pos + PCI_ERR_UNCOR_SEVER, &err_sev);
-	err_sev &= ~PCI_ERR_UNC_COMP_ABORT;
-	pci_write_config_dword(pdev, pos + PCI_ERR_UNCOR_SEVER, err_sev);
-}
-
 int fm10k_iov_configure(struct pci_dev *pdev, int num_vfs)
 {
 	int current_vfs = pci_num_vf(pdev);
@@ -496,12 +510,6 @@ int fm10k_iov_configure(struct pci_dev *pdev, int num_vfs)
 
 	/* allocate VFs if not already allocated */
 	if (num_vfs && num_vfs != current_vfs) {
-		/* Disable completer abort error reporting as
-		 * the VFs can trigger this any time they read a queue
-		 * that they don't own.
-		 */
-		fm10k_disable_aer_comp_abort(pdev);
-
 		err = pci_enable_sriov(pdev, num_vfs);
 		if (err) {
 			dev_err(&pdev->dev,
-- 
GitLab


From 9a1fe1e2bbc4194103bd6f5f8d78383d3bef41ae Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Mon, 15 Oct 2018 12:18:29 -0700
Subject: [PATCH 0376/1890] fm10k: add missing device IDs to the upstream
 driver

The device IDs for the Ethernet SDI Adapter devices were never added to
the upstream driver. The IDs are already in the pci.ids database, and
are supported by the out-of-tree driver.

Add the device IDs now, so that the upstream driver can recognize and
load these devices.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/fm10k/fm10k_pci.c  | 2 ++
 drivers/net/ethernet/intel/fm10k/fm10k_type.h | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
index 02345d3813036..e49fb51d36133 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
@@ -23,6 +23,8 @@ static const struct fm10k_info *fm10k_info_tbl[] = {
  */
 static const struct pci_device_id fm10k_pci_tbl[] = {
 	{ PCI_VDEVICE(INTEL, FM10K_DEV_ID_PF), fm10k_device_pf },
+	{ PCI_VDEVICE(INTEL, FM10K_DEV_ID_SDI_FM10420_QDA2), fm10k_device_pf },
+	{ PCI_VDEVICE(INTEL, FM10K_DEV_ID_SDI_FM10420_DA2), fm10k_device_pf },
 	{ PCI_VDEVICE(INTEL, FM10K_DEV_ID_VF), fm10k_device_vf },
 	/* required last entry */
 	{ 0, }
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_type.h b/drivers/net/ethernet/intel/fm10k/fm10k_type.h
index 3e608e493f9df..9fb9fca375e3f 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_type.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_type.h
@@ -15,6 +15,8 @@ struct fm10k_hw;
 
 #define FM10K_DEV_ID_PF			0x15A4
 #define FM10K_DEV_ID_VF			0x15A5
+#define FM10K_DEV_ID_SDI_FM10420_QDA2	0x15D0
+#define FM10K_DEV_ID_SDI_FM10420_DA2	0x15D5
 
 #define FM10K_MAX_QUEUES		256
 #define FM10K_MAX_QUEUES_PF		128
-- 
GitLab


From 35ae5414e7085dfabe3d1737569eff549b04942e Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Mon, 15 Oct 2018 12:21:28 -0700
Subject: [PATCH 0377/1890] fm10k: bump driver version to match out-of-tree
 release

The upstream and out-of-tree drivers are once again at comparable
functionality. It's been a while since we updated the upstream driver
version, so bump it now.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/fm10k/fm10k_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
index 503bbc0177922..5b2a50e5798f7 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
@@ -11,7 +11,7 @@
 
 #include "fm10k.h"
 
-#define DRV_VERSION	"0.23.4-k"
+#define DRV_VERSION	"0.26.1-k"
 #define DRV_SUMMARY	"Intel(R) Ethernet Switch Host Interface Driver"
 const char fm10k_driver_version[] = DRV_VERSION;
 char fm10k_driver_name[] = "fm10k";
-- 
GitLab


From 48e01e001da31d5a40e31ed5f8cea83a18823cc1 Mon Sep 17 00:00:00 2001
From: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Date: Thu, 18 Oct 2018 15:39:43 -0700
Subject: [PATCH 0378/1890] ixgbe/ixgbevf: fix XFRM_ALGO dependency

Based on the original work from Arnd Bergmann.

When XFRM_ALGO is not enabled, the new ixgbe IPsec code produces a
link error:

drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.o: In function `ixgbe_ipsec_vf_add_sa':
ixgbe_ipsec.c:(.text+0x1266): undefined reference to `xfrm_aead_get_byname'

Simply selecting XFRM_ALGO from here causes circular dependencies, so
to fix it, we probably want this slightly more complex solution that is
similar to what other drivers with XFRM offload do:

A separate Kconfig symbol now controls whether we include the IPsec
offload code. To keep the old behavior, this is left as 'default y'. The
dependency in XFRM_OFFLOAD still causes a circular dependency but is
not actually needed because this symbol is not user visible, so removing
that dependency on top makes it all work.

CC: Arnd Bergmann <arnd@arndb.de>
CC: Shannon Nelson <shannon.nelson@oracle.com>
Fixes: eda0333ac293 ("ixgbe: add VF IPsec management")
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
---
 drivers/net/ethernet/intel/Kconfig             | 18 ++++++++++++++++++
 drivers/net/ethernet/intel/ixgbe/Makefile      |  2 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe.h       |  8 ++++----
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c  |  6 +++---
 drivers/net/ethernet/intel/ixgbevf/Makefile    |  2 +-
 drivers/net/ethernet/intel/ixgbevf/ixgbevf.h   |  4 ++--
 .../net/ethernet/intel/ixgbevf/ixgbevf_main.c  |  2 +-
 net/xfrm/Kconfig                               |  1 -
 8 files changed, 30 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig
index fd3373d82a9e9..59e1bc0f609ee 100644
--- a/drivers/net/ethernet/intel/Kconfig
+++ b/drivers/net/ethernet/intel/Kconfig
@@ -200,6 +200,15 @@ config IXGBE_DCB
 
 	  If unsure, say N.
 
+config IXGBE_IPSEC
+	bool "IPSec XFRM cryptography-offload acceleration"
+	depends on IXGBE
+	depends on XFRM_OFFLOAD
+	default y
+	select XFRM_ALGO
+	---help---
+	  Enable support for IPSec offload in ixgbe.ko
+
 config IXGBEVF
 	tristate "Intel(R) 10GbE PCI Express Virtual Function Ethernet support"
 	depends on PCI_MSI
@@ -217,6 +226,15 @@ config IXGBEVF
 	  will be called ixgbevf.  MSI-X interrupt support is required
 	  for this driver to work correctly.
 
+config IXGBEVF_IPSEC
+	bool "IPSec XFRM cryptography-offload acceleration"
+	depends on IXGBEVF
+	depends on XFRM_OFFLOAD
+	default y
+	select XFRM_ALGO
+	---help---
+	  Enable support for IPSec offload in ixgbevf.ko
+
 config I40E
 	tristate "Intel(R) Ethernet Controller XL710 Family support"
 	imply PTP_1588_CLOCK
diff --git a/drivers/net/ethernet/intel/ixgbe/Makefile b/drivers/net/ethernet/intel/ixgbe/Makefile
index ca6b0c458e4a5..4fb0d9e3f2da2 100644
--- a/drivers/net/ethernet/intel/ixgbe/Makefile
+++ b/drivers/net/ethernet/intel/ixgbe/Makefile
@@ -17,4 +17,4 @@ ixgbe-$(CONFIG_IXGBE_DCB) +=  ixgbe_dcb.o ixgbe_dcb_82598.o \
 ixgbe-$(CONFIG_IXGBE_HWMON) += ixgbe_sysfs.o
 ixgbe-$(CONFIG_DEBUG_FS) += ixgbe_debugfs.o
 ixgbe-$(CONFIG_FCOE:m=y) += ixgbe_fcoe.o
-ixgbe-$(CONFIG_XFRM_OFFLOAD) += ixgbe_ipsec.o
+ixgbe-$(CONFIG_IXGBE_IPSEC) += ixgbe_ipsec.o
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index ec1b87cc44100..143bdd5ee2a08 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -769,9 +769,9 @@ struct ixgbe_adapter {
 #define IXGBE_RSS_KEY_SIZE     40  /* size of RSS Hash Key in bytes */
 	u32 *rss_key;
 
-#ifdef CONFIG_XFRM_OFFLOAD
+#ifdef CONFIG_IXGBE_IPSEC
 	struct ixgbe_ipsec *ipsec;
-#endif /* CONFIG_XFRM_OFFLOAD */
+#endif /* CONFIG_IXGBE_IPSEC */
 
 	/* AF_XDP zero-copy */
 	struct xdp_umem **xsk_umems;
@@ -1008,7 +1008,7 @@ void ixgbe_store_key(struct ixgbe_adapter *adapter);
 void ixgbe_store_reta(struct ixgbe_adapter *adapter);
 s32 ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg,
 		       u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm);
-#ifdef CONFIG_XFRM_OFFLOAD
+#ifdef CONFIG_IXGBE_IPSEC
 void ixgbe_init_ipsec_offload(struct ixgbe_adapter *adapter);
 void ixgbe_stop_ipsec_offload(struct ixgbe_adapter *adapter);
 void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter);
@@ -1036,5 +1036,5 @@ static inline int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter,
 					u32 *mbuf, u32 vf) { return -EACCES; }
 static inline int ixgbe_ipsec_vf_del_sa(struct ixgbe_adapter *adapter,
 					u32 *mbuf, u32 vf) { return -EACCES; }
-#endif /* CONFIG_XFRM_OFFLOAD */
+#endif /* CONFIG_IXGBE_IPSEC */
 #endif /* _IXGBE_H_ */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 0049a2becd7e7..113b38e0defbf 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -8694,7 +8694,7 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb,
 
 #endif /* IXGBE_FCOE */
 
-#ifdef CONFIG_XFRM_OFFLOAD
+#ifdef CONFIG_IXGBE_IPSEC
 	if (skb->sp && !ixgbe_ipsec_tx(tx_ring, first, &ipsec_tx))
 		goto out_drop;
 #endif
@@ -10190,7 +10190,7 @@ ixgbe_features_check(struct sk_buff *skb, struct net_device *dev,
 	 * the TSO, so it's the exception.
 	 */
 	if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID)) {
-#ifdef CONFIG_XFRM_OFFLOAD
+#ifdef CONFIG_IXGBE_IPSEC
 		if (!skb->sp)
 #endif
 			features &= ~NETIF_F_TSO;
@@ -10883,7 +10883,7 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (hw->mac.type >= ixgbe_mac_82599EB)
 		netdev->features |= NETIF_F_SCTP_CRC;
 
-#ifdef CONFIG_XFRM_OFFLOAD
+#ifdef CONFIG_IXGBE_IPSEC
 #define IXGBE_ESP_FEATURES	(NETIF_F_HW_ESP | \
 				 NETIF_F_HW_ESP_TX_CSUM | \
 				 NETIF_F_GSO_ESP)
diff --git a/drivers/net/ethernet/intel/ixgbevf/Makefile b/drivers/net/ethernet/intel/ixgbevf/Makefile
index 297d0f0858b59..186a4bb24fdea 100644
--- a/drivers/net/ethernet/intel/ixgbevf/Makefile
+++ b/drivers/net/ethernet/intel/ixgbevf/Makefile
@@ -10,5 +10,5 @@ ixgbevf-objs := vf.o \
                 mbx.o \
                 ethtool.o \
                 ixgbevf_main.o
-ixgbevf-$(CONFIG_XFRM_OFFLOAD) += ipsec.o
+ixgbevf-$(CONFIG_IXGBEVF_IPSEC) += ipsec.o
 
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
index e399e1c0c54ab..ecab686574b65 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
@@ -459,7 +459,7 @@ int ethtool_ioctl(struct ifreq *ifr);
 
 extern void ixgbevf_write_eitr(struct ixgbevf_q_vector *q_vector);
 
-#ifdef CONFIG_XFRM_OFFLOAD
+#ifdef CONFIG_IXGBEVF_IPSEC
 void ixgbevf_init_ipsec_offload(struct ixgbevf_adapter *adapter);
 void ixgbevf_stop_ipsec_offload(struct ixgbevf_adapter *adapter);
 void ixgbevf_ipsec_restore(struct ixgbevf_adapter *adapter);
@@ -482,7 +482,7 @@ static inline int ixgbevf_ipsec_tx(struct ixgbevf_ring *tx_ring,
 				   struct ixgbevf_tx_buffer *first,
 				   struct ixgbevf_ipsec_tx_data *itd)
 { return 0; }
-#endif /* CONFIG_XFRM_OFFLOAD */
+#endif /* CONFIG_IXGBEVF_IPSEC */
 
 void ixgbe_napi_add_all(struct ixgbevf_adapter *adapter);
 void ixgbe_napi_del_all(struct ixgbevf_adapter *adapter);
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 98707ee11d726..5e47ede7e8320 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -4150,7 +4150,7 @@ static int ixgbevf_xmit_frame_ring(struct sk_buff *skb,
 	first->tx_flags = tx_flags;
 	first->protocol = vlan_get_protocol(skb);
 
-#ifdef CONFIG_XFRM_OFFLOAD
+#ifdef CONFIG_IXGBEVF_IPSEC
 	if (skb->sp && !ixgbevf_ipsec_tx(tx_ring, first, &ipsec_tx))
 		goto out_drop;
 #endif
diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig
index 4a9ee2d83158b..140270a13d54f 100644
--- a/net/xfrm/Kconfig
+++ b/net/xfrm/Kconfig
@@ -8,7 +8,6 @@ config XFRM
 
 config XFRM_OFFLOAD
        bool
-       depends on XFRM
 
 config XFRM_ALGO
 	tristate
-- 
GitLab


From bb58fd7eeffc9bd5d6e2a16cbf0e9e259f8d09f2 Mon Sep 17 00:00:00 2001
From: Mitch Williams <mitch.a.williams@intel.com>
Date: Fri, 19 Oct 2018 14:11:03 -0700
Subject: [PATCH 0379/1890] i40e: Update status codes

Add a few new status code which will be used by the ice driver, and
rename a few to make them more consistent. Error code are mapped to
similar values as in i40e_status.h, so as to be compatible with older
VF drivers not using this status enum.

Signed-off-by: Mitch Williams <mitch.a.williams@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c |  2 +-
 include/linux/avf/virtchnl.h                       | 12 +++++++++---
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 81b0e1f8d14b6..ac5698ed0b111 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -3674,7 +3674,7 @@ int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode,
 		dev_err(&pf->pdev->dev, "Invalid message from VF %d, opcode %d, len %d\n",
 			local_vf_id, v_opcode, msglen);
 		switch (ret) {
-		case VIRTCHNL_ERR_PARAM:
+		case VIRTCHNL_STATUS_ERR_PARAM:
 			return -EPERM;
 		default:
 			return -EINVAL;
diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index 2c9756bd9c4cd..b2488055fd1d1 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -62,13 +62,19 @@
 /* Error Codes */
 enum virtchnl_status_code {
 	VIRTCHNL_STATUS_SUCCESS				= 0,
-	VIRTCHNL_ERR_PARAM				= -5,
+	VIRTCHNL_STATUS_ERR_PARAM			= -5,
+	VIRTCHNL_STATUS_ERR_NO_MEMORY			= -18,
 	VIRTCHNL_STATUS_ERR_OPCODE_MISMATCH		= -38,
 	VIRTCHNL_STATUS_ERR_CQP_COMPL_ERROR		= -39,
 	VIRTCHNL_STATUS_ERR_INVALID_VF_ID		= -40,
-	VIRTCHNL_STATUS_NOT_SUPPORTED			= -64,
+	VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR		= -53,
+	VIRTCHNL_STATUS_ERR_NOT_SUPPORTED		= -64,
 };
 
+/* Backward compatibility */
+#define VIRTCHNL_ERR_PARAM VIRTCHNL_STATUS_ERR_PARAM
+#define VIRTCHNL_STATUS_NOT_SUPPORTED VIRTCHNL_STATUS_ERR_NOT_SUPPORTED
+
 #define VIRTCHNL_LINK_SPEED_100MB_SHIFT		0x1
 #define VIRTCHNL_LINK_SPEED_1000MB_SHIFT	0x2
 #define VIRTCHNL_LINK_SPEED_10GB_SHIFT		0x3
@@ -831,7 +837,7 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode,
 	case VIRTCHNL_OP_EVENT:
 	case VIRTCHNL_OP_UNKNOWN:
 	default:
-		return VIRTCHNL_ERR_PARAM;
+		return VIRTCHNL_STATUS_ERR_PARAM;
 	}
 	/* few more checks */
 	if (err_msg_format || valid_len != msglen)
-- 
GitLab


From 6702185c1ffec3421181b5e24491e3fac920cb61 Mon Sep 17 00:00:00 2001
From: Radoslaw Tyl <radoslawx.tyl@intel.com>
Date: Mon, 22 Oct 2018 08:44:31 +0200
Subject: [PATCH 0380/1890] ixgbe: fix MAC anti-spoofing filter after VFLR

This change resolves a driver bug where the driver is logging a
message that says "Spoofed packets detected". This can occur on the PF
(host) when a VF has VLAN+MACVLAN enabled and is re-started with a
different MAC address.

MAC and VLAN anti-spoofing filters are to be enabled together.

Signed-off-by: Radoslaw Tyl <radoslawx.tyl@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Acked-by: Piotr Skajewski <piotrx.skajewski@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index af25a8fffeb8b..5dacfc8702598 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -722,8 +722,10 @@ static inline void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf)
 			ixgbe_set_vmvir(adapter, vfinfo->pf_vlan,
 					adapter->default_up, vf);
 
-		if (vfinfo->spoofchk_enabled)
+		if (vfinfo->spoofchk_enabled) {
 			hw->mac.ops.set_vlan_anti_spoofing(hw, true, vf);
+			hw->mac.ops.set_mac_anti_spoofing(hw, true, vf);
+		}
 	}
 
 	/* reset multicast table array for vf */
-- 
GitLab


From a324e9396ca3d00e1101476ba067b412e0aba232 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Wed, 31 Oct 2018 19:20:20 +0100
Subject: [PATCH 0381/1890] EDAC, skx: Fix randconfig builds

The driver depends on the ADXL component glue and selects it. However,
ADXL itself implicitly depends on ACPI and in nonsensical randconfig
builds like this:

  # CONFIG_ACPI is not set
  CONFIG_ACPI_ADXL=y

where ACPI is not enabled, the build fails with:

  drivers/edac/skx_edac.o: In function `skx_mce_check_error':
  skx_edac.c:(.text+0xab): undefined reference to `adxl_decode'
  drivers/edac/skx_edac.o: In function `skx_init':
  skx_edac.c:(.init.text+0x8bf): undefined reference to `adxl_get_component_names'
  make: *** [vmlinux] Error 1

Add stubs for that case so that the build succeeds. CONFIG_ACPI=n
doesn't make any sense for real configurations but this fix will at
least silence randconfig builds.

Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Tony Luck <tony.luck@intel.com>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
---
 drivers/edac/Kconfig | 2 +-
 include/linux/adxl.h | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index ffd349c124794..68e479b4d9c97 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -234,7 +234,7 @@ config EDAC_SKX
 	depends on PCI && X86_64 && X86_MCE_INTEL && PCI_MMCONFIG
 	depends on ACPI_NFIT || !ACPI_NFIT # if ACPI_NFIT=m, EDAC_SKX can't be y
 	select DMI
-	select ACPI_ADXL
+	select ACPI_ADXL if ACPI
 	help
 	  Support for error detection and correction the Intel
 	  Skylake server Integrated Memory Controllers. If your
diff --git a/include/linux/adxl.h b/include/linux/adxl.h
index 2a629acb4c3f4..2d29f55923e3a 100644
--- a/include/linux/adxl.h
+++ b/include/linux/adxl.h
@@ -7,7 +7,12 @@
 #ifndef _LINUX_ADXL_H
 #define _LINUX_ADXL_H
 
+#ifdef CONFIG_ACPI_ADXL
 const char * const *adxl_get_component_names(void);
 int adxl_decode(u64 addr, u64 component_values[]);
+#else
+static inline const char * const *adxl_get_component_names(void)  { return NULL; }
+static inline int adxl_decode(u64 addr, u64 component_values[])   { return  -EOPNOTSUPP; }
+#endif
 
 #endif /* _LINUX_ADXL_H */
-- 
GitLab


From e7611088f0357339d8c30540222debfa24095d4b Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 29 Oct 2018 22:46:11 +0000
Subject: [PATCH 0382/1890] net: hns3: fix spelling mistake "intrerrupt" ->
 "interrupt"

Trivial fix to spelling mistake in dev_err message

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
index dca6f2326c267..123c37e653f3e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
@@ -751,7 +751,7 @@ static void hclge_process_ncsi_error(struct hclge_dev *hdev,
 	ret = hclge_cmd_clear_error(hdev, &desc_wr, &desc_rd,
 				    HCLGE_NCSI_INT_CLR, 0);
 	if (ret)
-		dev_err(dev, "failed(=%d) to clear NCSI intrerrupt status\n",
+		dev_err(dev, "failed(=%d) to clear NCSI interrupt status\n",
 			ret);
 }
 
-- 
GitLab


From c4147beabec19fc7b37eb79251114bad3e9915dd Mon Sep 17 00:00:00 2001
From: Bo YU <tsu.yubo@gmail.com>
Date: Mon, 29 Oct 2018 23:42:09 -0400
Subject: [PATCH 0383/1890] net: add an identifier name for 'struct sock *'

Fix a warning from checkpatch:
function definition argument 'struct sock *' should also have an
identifier name in include/net/af_unix.h.

Signed-off-by: Bo YU <tsu.yubo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/af_unix.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index e2695c4bf3580..d53aea859a762 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -13,7 +13,7 @@ void unix_notinflight(struct user_struct *user, struct file *fp);
 void unix_gc(void);
 void wait_for_unix_gc(void);
 struct sock *unix_get_socket(struct file *filp);
-struct sock *unix_peer_get(struct sock *);
+struct sock *unix_peer_get(struct sock *sk);
 
 #define UNIX_HASH_SIZE	256
 #define UNIX_HASH_BITS	8
-- 
GitLab


From b1c234441e07da748ccded3aaa37177622d469d3 Mon Sep 17 00:00:00 2001
From: Bo YU <tsu.yubo@gmail.com>
Date: Mon, 29 Oct 2018 23:42:10 -0400
Subject: [PATCH 0384/1890] net: drop a space before tabs

Fix a warning from checkpatch.pl:'please no space before tabs'
in include/net/af_unix.h

Signed-off-by: Bo YU <tsu.yubo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/af_unix.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index d53aea859a762..ddbba838d048d 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -40,7 +40,7 @@ struct unix_skb_parms {
 	u32			consumed;
 } __randomize_layout;
 
-#define UNIXCB(skb) 	(*(struct unix_skb_parms *)&((skb)->cb))
+#define UNIXCB(skb)	(*(struct unix_skb_parms *)&((skb)->cb))
 
 #define unix_state_lock(s)	spin_lock(&unix_sk(s)->lock)
 #define unix_state_unlock(s)	spin_unlock(&unix_sk(s)->lock)
-- 
GitLab


From ff002269a4ee9c769dbf9365acef633ebcbd6cbe Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Tue, 30 Oct 2018 14:10:49 +0800
Subject: [PATCH 0385/1890] vhost: Fix Spectre V1 vulnerability

The idx in vhost_vring_ioctl() was controlled by userspace, hence a
potential exploitation of the Spectre variant 1 vulnerability.

Fixing this by sanitizing idx before using it to index d->vqs.

Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/vhost/vhost.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index f52008bb8df76..3a5f81a66d34f 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -30,6 +30,7 @@
 #include <linux/sched/mm.h>
 #include <linux/sched/signal.h>
 #include <linux/interval_tree_generic.h>
+#include <linux/nospec.h>
 
 #include "vhost.h"
 
@@ -1387,6 +1388,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
 	if (idx >= d->nvqs)
 		return -ENOBUFS;
 
+	idx = array_index_nospec(idx, d->nvqs);
 	vq = d->vqs[idx];
 
 	mutex_lock(&vq->mutex);
-- 
GitLab


From d48051c5b8376038c2b287c3b1bd55b8d391d567 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 30 Oct 2018 00:57:25 -0700
Subject: [PATCH 0386/1890] net/mlx5e: fix csum adjustments caused by RXFCS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As shown by Dmitris, we need to use csum_block_add() instead of csum_add()
when adding the FCS contribution to skb csum.

Before 4.18 (more exactly commit 88078d98d1bb "net: pskb_trim_rcsum()
and CHECKSUM_COMPLETE are friends"), the whole skb csum was thrown away,
so RXFCS changes were ignored.

Then before commit d55bef5059dd ("net: fix pskb_trim_rcsum_slow() with
odd trim offset") both mlx5 and pskb_trim_rcsum_slow() bugs were canceling
each other.

Now we fixed pskb_trim_rcsum_slow() we need to fix mlx5.

Note that this patch also rewrites mlx5e_get_fcs() to :

- Use skb_header_pointer() instead of reinventing it.
- Use __get_unaligned_cpu32() to avoid possible non aligned accesses
  as Dmitris pointed out.

Fixes: 902a545904c7 ("net/mlx5e: When RXFCS is set, add FCS data into checksum calculation")
Reported-by: PaweÅ‚ Staszewski <pstaszewski@itcare.pl>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Eran Ben Elisha <eranbe@mellanox.com>
Cc: Saeed Mahameed <saeedm@mellanox.com>
Cc: Dimitris Michailidis <dmichail@google.com>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: PaweÅ‚ Staszewski <pstaszewski@itcare.pl>
Reviewed-by: Eran Ben Elisha <eranbe@mellanox.com>
Tested-By: Maria Pasechnik <mariap@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlx5/core/en_rx.c   | 45 ++++---------------
 1 file changed, 9 insertions(+), 36 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 94224c22ecc31..79638dcbae783 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -713,43 +713,15 @@ static inline void mlx5e_enable_ecn(struct mlx5e_rq *rq, struct sk_buff *skb)
 	rq->stats->ecn_mark += !!rc;
 }
 
-static __be32 mlx5e_get_fcs(struct sk_buff *skb)
+static u32 mlx5e_get_fcs(const struct sk_buff *skb)
 {
-	int last_frag_sz, bytes_in_prev, nr_frags;
-	u8 *fcs_p1, *fcs_p2;
-	skb_frag_t *last_frag;
-	__be32 fcs_bytes;
+	const void *fcs_bytes;
+	u32 _fcs_bytes;
 
-	if (!skb_is_nonlinear(skb))
-		return *(__be32 *)(skb->data + skb->len - ETH_FCS_LEN);
+	fcs_bytes = skb_header_pointer(skb, skb->len - ETH_FCS_LEN,
+				       ETH_FCS_LEN, &_fcs_bytes);
 
-	nr_frags = skb_shinfo(skb)->nr_frags;
-	last_frag = &skb_shinfo(skb)->frags[nr_frags - 1];
-	last_frag_sz = skb_frag_size(last_frag);
-
-	/* If all FCS data is in last frag */
-	if (last_frag_sz >= ETH_FCS_LEN)
-		return *(__be32 *)(skb_frag_address(last_frag) +
-				   last_frag_sz - ETH_FCS_LEN);
-
-	fcs_p2 = (u8 *)skb_frag_address(last_frag);
-	bytes_in_prev = ETH_FCS_LEN - last_frag_sz;
-
-	/* Find where the other part of the FCS is - Linear or another frag */
-	if (nr_frags == 1) {
-		fcs_p1 = skb_tail_pointer(skb);
-	} else {
-		skb_frag_t *prev_frag = &skb_shinfo(skb)->frags[nr_frags - 2];
-
-		fcs_p1 = skb_frag_address(prev_frag) +
-			    skb_frag_size(prev_frag);
-	}
-	fcs_p1 -= bytes_in_prev;
-
-	memcpy(&fcs_bytes, fcs_p1, bytes_in_prev);
-	memcpy(((u8 *)&fcs_bytes) + bytes_in_prev, fcs_p2, last_frag_sz);
-
-	return fcs_bytes;
+	return __get_unaligned_cpu32(fcs_bytes);
 }
 
 static u8 get_ip_proto(struct sk_buff *skb, __be16 proto)
@@ -797,8 +769,9 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
 						 network_depth - ETH_HLEN,
 						 skb->csum);
 		if (unlikely(netdev->features & NETIF_F_RXFCS))
-			skb->csum = csum_add(skb->csum,
-					     (__force __wsum)mlx5e_get_fcs(skb));
+			skb->csum = csum_block_add(skb->csum,
+						   (__force __wsum)mlx5e_get_fcs(skb),
+						   skb->len - ETH_FCS_LEN);
 		stats->csum_complete++;
 		return;
 	}
-- 
GitLab


From ece4bf46e98c9f3775a488f3932a531508d3b1a2 Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Tue, 30 Oct 2018 21:50:43 +0800
Subject: [PATCH 0387/1890] net: hns3: add error handler for
 hns3_nic_init_vector_data()

When hns3_nic_init_vector_data() fails to map ring to vector,
it should cancel the netif_napi_add() that has been successfully
done and then exits.

Fixes: 76ad4f0ee747 ("net: hns3: Add support of HNS3 Ethernet Driver for hip08 SoC")
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 32f3aca814e78..0b4323b1f9303 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -2821,7 +2821,7 @@ static int hns3_nic_init_vector_data(struct hns3_nic_priv *priv)
 	struct hnae3_handle *h = priv->ae_handle;
 	struct hns3_enet_tqp_vector *tqp_vector;
 	int ret = 0;
-	u16 i;
+	int i;
 
 	hns3_nic_set_cpumask(priv);
 
@@ -2868,13 +2868,19 @@ static int hns3_nic_init_vector_data(struct hns3_nic_priv *priv)
 		hns3_free_vector_ring_chain(tqp_vector, &vector_ring_chain);
 
 		if (ret)
-			return ret;
+			goto map_ring_fail;
 
 		netif_napi_add(priv->netdev, &tqp_vector->napi,
 			       hns3_nic_common_poll, NAPI_POLL_WEIGHT);
 	}
 
 	return 0;
+
+map_ring_fail:
+	while (i--)
+		netif_napi_del(&priv->tqp_vector[i].napi);
+
+	return ret;
 }
 
 static int hns3_nic_alloc_vector_data(struct hns3_nic_priv *priv)
-- 
GitLab


From 73b907a083b8a8c1c62cb494bc9fbe6ae086c460 Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Tue, 30 Oct 2018 21:50:44 +0800
Subject: [PATCH 0388/1890] net: hns3: bugfix for buffer not free problem
 during resetting

When hns3_get_ring_config()/hns3_queue_to_ring()/
hns3_get_vector_ring_chain() failed during resetting, the allocated
memory has not been freed before these three functions return. So
this patch adds error handler in these functions to fix it.

Fixes: 76ad4f0ee747 ("net: hns3: Add support of HNS3 Ethernet Driver for hip08 SoC")
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/hisilicon/hns3/hns3_enet.c   | 24 ++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 0b4323b1f9303..b767ff96b5370 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -2727,7 +2727,7 @@ static int hns3_get_vector_ring_chain(struct hns3_enet_tqp_vector *tqp_vector,
 			chain = devm_kzalloc(&pdev->dev, sizeof(*chain),
 					     GFP_KERNEL);
 			if (!chain)
-				return -ENOMEM;
+				goto err_free_chain;
 
 			cur_chain->next = chain;
 			chain->tqp_index = tx_ring->tqp->tqp_index;
@@ -2757,7 +2757,7 @@ static int hns3_get_vector_ring_chain(struct hns3_enet_tqp_vector *tqp_vector,
 	while (rx_ring) {
 		chain = devm_kzalloc(&pdev->dev, sizeof(*chain), GFP_KERNEL);
 		if (!chain)
-			return -ENOMEM;
+			goto err_free_chain;
 
 		cur_chain->next = chain;
 		chain->tqp_index = rx_ring->tqp->tqp_index;
@@ -2772,6 +2772,16 @@ static int hns3_get_vector_ring_chain(struct hns3_enet_tqp_vector *tqp_vector,
 	}
 
 	return 0;
+
+err_free_chain:
+	cur_chain = head->next;
+	while (cur_chain) {
+		chain = cur_chain->next;
+		devm_kfree(&pdev->dev, chain);
+		cur_chain = chain;
+	}
+
+	return -ENOMEM;
 }
 
 static void hns3_free_vector_ring_chain(struct hns3_enet_tqp_vector *tqp_vector,
@@ -3037,8 +3047,10 @@ static int hns3_queue_to_ring(struct hnae3_queue *tqp,
 		return ret;
 
 	ret = hns3_ring_get_cfg(tqp, priv, HNAE3_RING_TYPE_RX);
-	if (ret)
+	if (ret) {
+		devm_kfree(priv->dev, priv->ring_data[tqp->tqp_index].ring);
 		return ret;
+	}
 
 	return 0;
 }
@@ -3065,6 +3077,12 @@ static int hns3_get_ring_config(struct hns3_nic_priv *priv)
 
 	return 0;
 err:
+	while (i--) {
+		devm_kfree(priv->dev, priv->ring_data[i].ring);
+		devm_kfree(priv->dev,
+			   priv->ring_data[i + h->kinfo.num_tqps].ring);
+	}
+
 	devm_kfree(&pdev->dev, priv->ring_data);
 	return ret;
 }
-- 
GitLab


From 0d4411408a7fb9aad0645f23911d9bfdd2ce3177 Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Tue, 30 Oct 2018 21:50:45 +0800
Subject: [PATCH 0389/1890] net: hns3: bugfix for reporting unknown vector0
 interrupt repeatly problem

The current driver supports handling two vector0 interrupts, reset and
mailbox. When the hardware reports an interrupt of another type of
interrupt source, if the driver does not process the interrupt, but
enables the interrupt, the hardware will repeatedly report the unknown
interrupt.

Therefore, the driver enables the vector0 interrupt after clearing the
known type of interrupt source. Other conditions are not enabled.

Fixes: cd8c5c269b1d ("net: hns3: Fix for hclge_reset running repeatly problem")
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 5234b5373ed3b..2a6314784c8a0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -2236,7 +2236,7 @@ static irqreturn_t hclge_misc_irq_handle(int irq, void *data)
 	}
 
 	/* clear the source of interrupt if it is not cause by reset */
-	if (event_cause != HCLGE_VECTOR0_EVENT_RST) {
+	if (event_cause == HCLGE_VECTOR0_EVENT_MBX) {
 		hclge_clear_event_cause(hdev, event_cause, clearval);
 		hclge_enable_vector(&hdev->misc_vector, true);
 	}
-- 
GitLab


From b2f74dbaf12bf59ff35d451005b3cdee78232ff0 Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Tue, 30 Oct 2018 21:50:46 +0800
Subject: [PATCH 0390/1890] net: hns3: bugfix for the initialization of command
 queue's spin lock

The spin lock of the command queue only need to be initialized once
when the driver initializes the command queue. It is not necessary to
initialize the spin lock when resetting. At the same time, the
modification of the queue member should be performed after acquiring
the lock.

Fixes: 3efb960f056d ("net: hns3: Refactor the initialization of command queue")
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
index ac13cb2b168e5..68026a5ad7e77 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
@@ -304,6 +304,10 @@ int hclge_cmd_queue_init(struct hclge_dev *hdev)
 {
 	int ret;
 
+	/* Setup the lock for command queue */
+	spin_lock_init(&hdev->hw.cmq.csq.lock);
+	spin_lock_init(&hdev->hw.cmq.crq.lock);
+
 	/* Setup the queue entries for use cmd queue */
 	hdev->hw.cmq.csq.desc_num = HCLGE_NIC_CMQ_DESC_NUM;
 	hdev->hw.cmq.crq.desc_num = HCLGE_NIC_CMQ_DESC_NUM;
@@ -337,18 +341,20 @@ int hclge_cmd_init(struct hclge_dev *hdev)
 	u32 version;
 	int ret;
 
+	spin_lock_bh(&hdev->hw.cmq.csq.lock);
+	spin_lock_bh(&hdev->hw.cmq.crq.lock);
+
 	hdev->hw.cmq.csq.next_to_clean = 0;
 	hdev->hw.cmq.csq.next_to_use = 0;
 	hdev->hw.cmq.crq.next_to_clean = 0;
 	hdev->hw.cmq.crq.next_to_use = 0;
 
-	/* Setup the lock for command queue */
-	spin_lock_init(&hdev->hw.cmq.csq.lock);
-	spin_lock_init(&hdev->hw.cmq.crq.lock);
-
 	hclge_cmd_init_regs(&hdev->hw);
 	clear_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
 
+	spin_unlock_bh(&hdev->hw.cmq.crq.lock);
+	spin_unlock_bh(&hdev->hw.cmq.csq.lock);
+
 	ret = hclge_cmd_query_firmware_version(&hdev->hw, &version);
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
-- 
GitLab


From 5faaf0752a0c43735b7c508dcf3c4c7b36a032db Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Tue, 30 Oct 2018 21:50:47 +0800
Subject: [PATCH 0391/1890] net: hns3: remove unnecessary queue reset in the
 hns3_uninit_all_ring()

It is not necessary to reset the queue in the hns3_uninit_all_ring(),
since the queue is stopped in the down operation, and will be reset
in the up operation. And the judgment of the HCLGE_STATE_RST_HANDLING
flag in the hclge_reset_tqp() is not correct, because we need to reset
tqp during pf reset, otherwise it may cause queue not being reset to
working state problem.

Fixes: 76ad4f0ee747 ("net: hns3: Add support of HNS3 Ethernet Driver for hip08 SoC")
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c         | 3 ---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 3 ---
 2 files changed, 6 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index b767ff96b5370..bf71c23be4092 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -3250,9 +3250,6 @@ int hns3_uninit_all_ring(struct hns3_nic_priv *priv)
 	int i;
 
 	for (i = 0; i < h->kinfo.num_tqps; i++) {
-		if (h->ae_algo->ops->reset_queue)
-			h->ae_algo->ops->reset_queue(h, i);
-
 		hns3_fini_ring(priv->ring_data[i].ring);
 		hns3_fini_ring(priv->ring_data[i + h->kinfo.num_tqps].ring);
 	}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 2a6314784c8a0..4dd0506885496 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -6116,9 +6116,6 @@ void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
 	u16 queue_gid;
 	int ret;
 
-	if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
-		return;
-
 	queue_gid = hclge_covert_handle_qid_global(handle, queue_id);
 
 	ret = hclge_tqp_enable(hdev, queue_id, 0, false);
-- 
GitLab


From 6d71ec6cbf74ac9c2823ef751b1baa5b889bb3ac Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Tue, 30 Oct 2018 21:50:48 +0800
Subject: [PATCH 0392/1890] net: hns3: bugfix for is_valid_csq_clean_head()

The HEAD pointer of the hardware command queue maybe equal to the command
queue's next_to_use in the driver, so that does not belong to the invalid
HEAD pointer, since the hardware may not process the command in time,
causing the HEAD pointer to be too late to update. The variables' name
in this function is unreadable, so give them a more readable one.

Fixes: 3ff504908f95 ("net: hns3: fix a dead loop in hclge_cmd_csq_clean")
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c   | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
index 68026a5ad7e77..690f62ed87dca 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
@@ -24,15 +24,15 @@ static int hclge_ring_space(struct hclge_cmq_ring *ring)
 	return ring->desc_num - used - 1;
 }
 
-static int is_valid_csq_clean_head(struct hclge_cmq_ring *ring, int h)
+static int is_valid_csq_clean_head(struct hclge_cmq_ring *ring, int head)
 {
-	int u = ring->next_to_use;
-	int c = ring->next_to_clean;
+	int ntu = ring->next_to_use;
+	int ntc = ring->next_to_clean;
 
-	if (unlikely(h >= ring->desc_num))
-		return 0;
+	if (ntu > ntc)
+		return head >= ntc && head <= ntu;
 
-	return u > c ? (h > c && h <= u) : (h > c || h <= u);
+	return head >= ntc || head <= ntu;
 }
 
 static int hclge_alloc_cmd_desc(struct hclge_cmq_ring *ring)
-- 
GitLab


From 1c12493809924deda6c0834cb2f2c5a6dc786390 Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Tue, 30 Oct 2018 21:50:49 +0800
Subject: [PATCH 0393/1890] net: hns3: bugfix for hclge_mdio_write and
 hclge_mdio_read

When there is a PHY, the driver needs to complete some operations through
MDIO during reset reinitialization, so HCLGE_STATE_CMD_DISABLE is more
suitable than HCLGE_STATE_RST_HANDLING to prevent the MDIO operation from
being sent during the hardware reset.

Fixes: b50ae26c57cb ("net: hns3: never send command queue message to IMP when reset)
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
index 24b1f2a0c32af..03018638f701b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
@@ -52,7 +52,7 @@ static int hclge_mdio_write(struct mii_bus *bus, int phyid, int regnum,
 	struct hclge_desc desc;
 	int ret;
 
-	if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
+	if (test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state))
 		return 0;
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MDIO_CONFIG, false);
@@ -90,7 +90,7 @@ static int hclge_mdio_read(struct mii_bus *bus, int phyid, int regnum)
 	struct hclge_desc desc;
 	int ret;
 
-	if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
+	if (test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state))
 		return 0;
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MDIO_CONFIG, true);
-- 
GitLab


From 7fa6be4fd2f60399f3f3370a87629e102407a724 Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Tue, 30 Oct 2018 21:50:50 +0800
Subject: [PATCH 0394/1890] net: hns3: fix incorrect return value/type of some
 functions

There are some functions that, when they fail to send the command,
need to return the corresponding error value to its caller.

Fixes: 46a3df9f9718 ("net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support")
Fixes: 681ec3999b3d ("net: hns3: fix for vlan table lost problem when resetting")
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h   |  6 +-
 .../net/ethernet/hisilicon/hns3/hns3_enet.c   | 80 +++++++++++++------
 .../net/ethernet/hisilicon/hns3/hns3_enet.h   |  2 +-
 .../hisilicon/hns3/hns3pf/hclge_main.c        | 34 ++++----
 .../hisilicon/hns3/hns3pf/hclge_main.h        |  2 +-
 .../hisilicon/hns3/hns3vf/hclgevf_main.c      | 14 ++--
 6 files changed, 85 insertions(+), 53 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index e82e4ca206205..055b40606dbc2 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -316,8 +316,8 @@ struct hnae3_ae_ops {
 	int (*set_loopback)(struct hnae3_handle *handle,
 			    enum hnae3_loop loop_mode, bool en);
 
-	void (*set_promisc_mode)(struct hnae3_handle *handle, bool en_uc_pmc,
-				 bool en_mc_pmc);
+	int (*set_promisc_mode)(struct hnae3_handle *handle, bool en_uc_pmc,
+				bool en_mc_pmc);
 	int (*set_mtu)(struct hnae3_handle *handle, int new_mtu);
 
 	void (*get_pauseparam)(struct hnae3_handle *handle,
@@ -391,7 +391,7 @@ struct hnae3_ae_ops {
 				      int vector_num,
 				      struct hnae3_ring_chain_node *vr_chain);
 
-	void (*reset_queue)(struct hnae3_handle *handle, u16 queue_id);
+	int (*reset_queue)(struct hnae3_handle *handle, u16 queue_id);
 	u32 (*get_fw_version)(struct hnae3_handle *handle);
 	void (*get_mdix_mode)(struct hnae3_handle *handle,
 			      u8 *tp_mdix_ctrl, u8 *tp_mdix);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index bf71c23be4092..3f96aa30068ec 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -509,16 +509,18 @@ static void hns3_nic_set_rx_mode(struct net_device *netdev)
 	h->netdev_flags = new_flags;
 }
 
-void hns3_update_promisc_mode(struct net_device *netdev, u8 promisc_flags)
+int hns3_update_promisc_mode(struct net_device *netdev, u8 promisc_flags)
 {
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
 	struct hnae3_handle *h = priv->ae_handle;
 
 	if (h->ae_algo->ops->set_promisc_mode) {
-		h->ae_algo->ops->set_promisc_mode(h,
-						  promisc_flags & HNAE3_UPE,
-						  promisc_flags & HNAE3_MPE);
+		return h->ae_algo->ops->set_promisc_mode(h,
+						promisc_flags & HNAE3_UPE,
+						promisc_flags & HNAE3_MPE);
 	}
+
+	return 0;
 }
 
 void hns3_enable_vlan_filter(struct net_device *netdev, bool enable)
@@ -1494,18 +1496,22 @@ static int hns3_vlan_rx_kill_vid(struct net_device *netdev,
 	return ret;
 }
 
-static void hns3_restore_vlan(struct net_device *netdev)
+static int hns3_restore_vlan(struct net_device *netdev)
 {
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
+	int ret = 0;
 	u16 vid;
-	int ret;
 
 	for_each_set_bit(vid, priv->active_vlans, VLAN_N_VID) {
 		ret = hns3_vlan_rx_add_vid(netdev, htons(ETH_P_8021Q), vid);
-		if (ret)
-			netdev_warn(netdev, "Restore vlan: %d filter, ret:%d\n",
-				    vid, ret);
+		if (ret) {
+			netdev_err(netdev, "Restore vlan: %d filter, ret:%d\n",
+				   vid, ret);
+			return ret;
+		}
 	}
+
+	return ret;
 }
 
 static int hns3_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
@@ -3257,11 +3263,12 @@ int hns3_uninit_all_ring(struct hns3_nic_priv *priv)
 }
 
 /* Set mac addr if it is configured. or leave it to the AE driver */
-static void hns3_init_mac_addr(struct net_device *netdev, bool init)
+static int hns3_init_mac_addr(struct net_device *netdev, bool init)
 {
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
 	struct hnae3_handle *h = priv->ae_handle;
 	u8 mac_addr_temp[ETH_ALEN];
+	int ret = 0;
 
 	if (h->ae_algo->ops->get_mac_addr && init) {
 		h->ae_algo->ops->get_mac_addr(h, mac_addr_temp);
@@ -3276,8 +3283,9 @@ static void hns3_init_mac_addr(struct net_device *netdev, bool init)
 	}
 
 	if (h->ae_algo->ops->set_mac_addr)
-		h->ae_algo->ops->set_mac_addr(h, netdev->dev_addr, true);
+		ret = h->ae_algo->ops->set_mac_addr(h, netdev->dev_addr, true);
 
+	return ret;
 }
 
 static int hns3_restore_fd_rules(struct net_device *netdev)
@@ -3490,20 +3498,29 @@ static int hns3_client_setup_tc(struct hnae3_handle *handle, u8 tc)
 	return ret;
 }
 
-static void hns3_recover_hw_addr(struct net_device *ndev)
+static int hns3_recover_hw_addr(struct net_device *ndev)
 {
 	struct netdev_hw_addr_list *list;
 	struct netdev_hw_addr *ha, *tmp;
+	int ret = 0;
 
 	/* go through and sync uc_addr entries to the device */
 	list = &ndev->uc;
-	list_for_each_entry_safe(ha, tmp, &list->list, list)
-		hns3_nic_uc_sync(ndev, ha->addr);
+	list_for_each_entry_safe(ha, tmp, &list->list, list) {
+		ret = hns3_nic_uc_sync(ndev, ha->addr);
+		if (ret)
+			return ret;
+	}
 
 	/* go through and sync mc_addr entries to the device */
 	list = &ndev->mc;
-	list_for_each_entry_safe(ha, tmp, &list->list, list)
-		hns3_nic_mc_sync(ndev, ha->addr);
+	list_for_each_entry_safe(ha, tmp, &list->list, list) {
+		ret = hns3_nic_mc_sync(ndev, ha->addr);
+		if (ret)
+			return ret;
+	}
+
+	return ret;
 }
 
 static void hns3_remove_hw_addr(struct net_device *netdev)
@@ -3630,7 +3647,10 @@ int hns3_nic_reset_all_ring(struct hnae3_handle *h)
 	int ret;
 
 	for (i = 0; i < h->kinfo.num_tqps; i++) {
-		h->ae_algo->ops->reset_queue(h, i);
+		ret = h->ae_algo->ops->reset_queue(h, i);
+		if (ret)
+			return ret;
+
 		hns3_init_ring_hw(priv->ring_data[i].ring);
 
 		/* We need to clear tx ring here because self test will
@@ -3722,18 +3742,30 @@ static int hns3_reset_notify_init_enet(struct hnae3_handle *handle)
 	bool vlan_filter_enable;
 	int ret;
 
-	hns3_init_mac_addr(netdev, false);
-	hns3_recover_hw_addr(netdev);
-	hns3_update_promisc_mode(netdev, handle->netdev_flags);
+	ret = hns3_init_mac_addr(netdev, false);
+	if (ret)
+		return ret;
+
+	ret = hns3_recover_hw_addr(netdev);
+	if (ret)
+		return ret;
+
+	ret = hns3_update_promisc_mode(netdev, handle->netdev_flags);
+	if (ret)
+		return ret;
+
 	vlan_filter_enable = netdev->flags & IFF_PROMISC ? false : true;
 	hns3_enable_vlan_filter(netdev, vlan_filter_enable);
 
-
 	/* Hardware table is only clear when pf resets */
-	if (!(handle->flags & HNAE3_SUPPORT_VF))
-		hns3_restore_vlan(netdev);
+	if (!(handle->flags & HNAE3_SUPPORT_VF)) {
+		ret = hns3_restore_vlan(netdev);
+		return ret;
+	}
 
-	hns3_restore_fd_rules(netdev);
+	ret = hns3_restore_fd_rules(netdev);
+	if (ret)
+		return ret;
 
 	/* Carrier off reporting is important to ethtool even BEFORE open */
 	netif_carrier_off(netdev);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index 71cfca132d0bd..d3636d088aa3d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -640,7 +640,7 @@ void hns3_set_vector_coalesce_rl(struct hns3_enet_tqp_vector *tqp_vector,
 				 u32 rl_value);
 
 void hns3_enable_vlan_filter(struct net_device *netdev, bool enable);
-void hns3_update_promisc_mode(struct net_device *netdev, u8 promisc_flags);
+int hns3_update_promisc_mode(struct net_device *netdev, u8 promisc_flags);
 
 #ifdef CONFIG_HNS3_DCB
 void hns3_dcbnl_setup(struct hnae3_handle *handle);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 4dd0506885496..f3212c96bb646 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -3314,8 +3314,8 @@ void hclge_promisc_param_init(struct hclge_promisc_param *param, bool en_uc,
 	param->vf_id = vport_id;
 }
 
-static void hclge_set_promisc_mode(struct hnae3_handle *handle, bool en_uc_pmc,
-				   bool en_mc_pmc)
+static int hclge_set_promisc_mode(struct hnae3_handle *handle, bool en_uc_pmc,
+				  bool en_mc_pmc)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
@@ -3323,7 +3323,7 @@ static void hclge_set_promisc_mode(struct hnae3_handle *handle, bool en_uc_pmc,
 
 	hclge_promisc_param_init(&param, en_uc_pmc, en_mc_pmc, true,
 				 vport->vport_id);
-	hclge_cmd_set_promisc_mode(hdev, &param);
+	return hclge_cmd_set_promisc_mode(hdev, &param);
 }
 
 static int hclge_get_fd_mode(struct hclge_dev *hdev, u8 *fd_mode)
@@ -6107,28 +6107,28 @@ static u16 hclge_covert_handle_qid_global(struct hnae3_handle *handle,
 	return tqp->index;
 }
 
-void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
+int hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
 	int reset_try_times = 0;
 	int reset_status;
 	u16 queue_gid;
-	int ret;
+	int ret = 0;
 
 	queue_gid = hclge_covert_handle_qid_global(handle, queue_id);
 
 	ret = hclge_tqp_enable(hdev, queue_id, 0, false);
 	if (ret) {
-		dev_warn(&hdev->pdev->dev, "Disable tqp fail, ret = %d\n", ret);
-		return;
+		dev_err(&hdev->pdev->dev, "Disable tqp fail, ret = %d\n", ret);
+		return ret;
 	}
 
 	ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, true);
 	if (ret) {
-		dev_warn(&hdev->pdev->dev,
-			 "Send reset tqp cmd fail, ret = %d\n", ret);
-		return;
+		dev_err(&hdev->pdev->dev,
+			"Send reset tqp cmd fail, ret = %d\n", ret);
+		return ret;
 	}
 
 	reset_try_times = 0;
@@ -6141,16 +6141,16 @@ void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
 	}
 
 	if (reset_try_times >= HCLGE_TQP_RESET_TRY_TIMES) {
-		dev_warn(&hdev->pdev->dev, "Reset TQP fail\n");
-		return;
+		dev_err(&hdev->pdev->dev, "Reset TQP fail\n");
+		return ret;
 	}
 
 	ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, false);
-	if (ret) {
-		dev_warn(&hdev->pdev->dev,
-			 "Deassert the soft reset fail, ret = %d\n", ret);
-		return;
-	}
+	if (ret)
+		dev_err(&hdev->pdev->dev,
+			"Deassert the soft reset fail, ret = %d\n", ret);
+
+	return ret;
 }
 
 void hclge_reset_vf_queue(struct hclge_vport *vport, u16 queue_id)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index e3dfd654eca9a..0d92154042699 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -778,7 +778,7 @@ int hclge_rss_init_hw(struct hclge_dev *hdev);
 void hclge_rss_indir_init_cfg(struct hclge_dev *hdev);
 
 void hclge_mbx_handler(struct hclge_dev *hdev);
-void hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id);
+int hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id);
 void hclge_reset_vf_queue(struct hclge_vport *vport, u16 queue_id);
 int hclge_cfg_flowctrl(struct hclge_dev *hdev);
 int hclge_func_reset_cmd(struct hclge_dev *hdev, int func_id);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index e0a86a58342c2..b224f6a340300 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -925,12 +925,12 @@ static int hclgevf_cmd_set_promisc_mode(struct hclgevf_dev *hdev,
 	return status;
 }
 
-static void hclgevf_set_promisc_mode(struct hnae3_handle *handle,
-				     bool en_uc_pmc, bool en_mc_pmc)
+static int hclgevf_set_promisc_mode(struct hnae3_handle *handle,
+				    bool en_uc_pmc, bool en_mc_pmc)
 {
 	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
 
-	hclgevf_cmd_set_promisc_mode(hdev, en_uc_pmc, en_mc_pmc);
+	return hclgevf_cmd_set_promisc_mode(hdev, en_uc_pmc, en_mc_pmc);
 }
 
 static int hclgevf_tqp_enable(struct hclgevf_dev *hdev, int tqp_id,
@@ -1080,7 +1080,7 @@ static int hclgevf_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable)
 				    1, false, NULL, 0);
 }
 
-static void hclgevf_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
+static int hclgevf_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
 {
 	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
 	u8 msg_data[2];
@@ -1091,10 +1091,10 @@ static void hclgevf_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
 	/* disable vf queue before send queue reset msg to PF */
 	ret = hclgevf_tqp_enable(hdev, queue_id, 0, false);
 	if (ret)
-		return;
+		return ret;
 
-	hclgevf_send_mbx_msg(hdev, HCLGE_MBX_QUEUE_RESET, 0, msg_data,
-			     2, true, NULL, 0);
+	return hclgevf_send_mbx_msg(hdev, HCLGE_MBX_QUEUE_RESET, 0, msg_data,
+				    2, true, NULL, 0);
 }
 
 static int hclgevf_notify_client(struct hclgevf_dev *hdev,
-- 
GitLab


From 3c88ed1d798da355859ca083d3884a16ce0841f2 Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Tue, 30 Oct 2018 21:50:51 +0800
Subject: [PATCH 0395/1890] net: hns3: bugfix for handling mailbox while the
 command queue reinitialized

In a multi-core machine, the mailbox service and reset service
will be executed at the same time. The reset service will re-initialize
the command queue, before that, the mailbox handler can only get some
invalid messages.

The HCLGE_STATE_CMD_DISABLE flag means that the command queue is not
available and needs to be reinitialized. Therefore, when the mailbox
handler recognizes this flag, it should not process the command.

Fixes: dde1a86e93ca ("net: hns3: Add mailbox support to PF driver")
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index 04462a347a940..f890022938d9a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -400,6 +400,12 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
 
 	/* handle all the mailbox requests in the queue */
 	while (!hclge_cmd_crq_empty(&hdev->hw)) {
+		if (test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state)) {
+			dev_warn(&hdev->pdev->dev,
+				 "command queue needs re-initializing\n");
+			return;
+		}
+
 		desc = &crq->desc[crq->next_to_use];
 		req = (struct hclge_mbx_vf_to_pf_cmd *)desc->data;
 
-- 
GitLab


From a963052e539887df481d4d3a6ad4c92ca6461852 Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Tue, 30 Oct 2018 21:50:52 +0800
Subject: [PATCH 0396/1890] net: hns3: bugfix for rtnl_lock's range in the
 hclge_reset()

Since hclge_reset_wait() is used to wait for the hardware to complete
the reset, it is not necessary to hold the rtnl_lock during
hclge_reset_wait(). So this patch releases the lock for the duration
of hclge_reset_wait().

Fixes: 6d4fab39533f ("net: hns3: Reset net device with rtnl_lock")
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index f3212c96bb646..ffdd96020860d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -2470,14 +2470,17 @@ static void hclge_reset(struct hclge_dev *hdev)
 	handle = &hdev->vport[0].nic;
 	rtnl_lock();
 	hclge_notify_client(hdev, HNAE3_DOWN_CLIENT);
+	rtnl_unlock();
 
 	if (!hclge_reset_wait(hdev)) {
+		rtnl_lock();
 		hclge_notify_client(hdev, HNAE3_UNINIT_CLIENT);
 		hclge_reset_ae_dev(hdev->ae_dev);
 		hclge_notify_client(hdev, HNAE3_INIT_CLIENT);
 
 		hclge_clear_reset_cause(hdev);
 	} else {
+		rtnl_lock();
 		/* schedule again to check pending resets later */
 		set_bit(hdev->reset_type, &hdev->reset_pending);
 		hclge_reset_task_schedule(hdev);
-- 
GitLab


From 29118ab962d5476fdc65fae312ac38db68092d78 Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Tue, 30 Oct 2018 21:50:53 +0800
Subject: [PATCH 0397/1890] net: hns3: bugfix for rtnl_lock's range in the
 hclgevf_reset()

Since hclgevf_reset_wait() is used to wait for the hardware to complete
the reset, it is not necessary to hold the rtnl_lock during
hclgevf_reset_wait(). So this patch releases the lock for the duration
of hclgevf_reset_wait().

Fixes: 6988eb2a9b77 ("net: hns3: Add support to reset the enet/ring mgmt layer")
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index b224f6a340300..085edb945389c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -1170,6 +1170,8 @@ static int hclgevf_reset(struct hclgevf_dev *hdev)
 	/* bring down the nic to stop any ongoing TX/RX */
 	hclgevf_notify_client(hdev, HNAE3_DOWN_CLIENT);
 
+	rtnl_unlock();
+
 	/* check if VF could successfully fetch the hardware reset completion
 	 * status from the hardware
 	 */
@@ -1181,12 +1183,15 @@ static int hclgevf_reset(struct hclgevf_dev *hdev)
 			ret);
 
 		dev_warn(&hdev->pdev->dev, "VF reset failed, disabling VF!\n");
+		rtnl_lock();
 		hclgevf_notify_client(hdev, HNAE3_UNINIT_CLIENT);
 
 		rtnl_unlock();
 		return ret;
 	}
 
+	rtnl_lock();
+
 	/* now, re-initialize the nic client and ae device*/
 	ret = hclgevf_reset_stack(hdev);
 	if (ret)
-- 
GitLab


From 8b931821aa04823e2e5df0ae93937baabbd23286 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Wed, 31 Oct 2018 09:56:42 +0000
Subject: [PATCH 0398/1890] mlxsw: reg: QEEC: Add minimum shaper fields

Add QEEC.mise (minimum shaper enable) and QEEC.min_shaper_rate to enable
configuration of minimum shaper.

Increase the QEEC length to 0x20 as well: that's the length that the
register has had for a long time now, but with the configurations that
mlxsw typically exercises, the firmware tolerated 0x1C-sized packets.
With mise=true however, FW rejects packets unless they have the full
required length.

Fixes: b9b7cee40579 ("mlxsw: reg: Add QoS ETS Element Configuration register")
Signed-off-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 32cb6718bb173..db3d2790aeecf 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -3284,7 +3284,7 @@ static inline void mlxsw_reg_qtct_pack(char *payload, u8 local_port,
  * Configures the ETS elements.
  */
 #define MLXSW_REG_QEEC_ID 0x400D
-#define MLXSW_REG_QEEC_LEN 0x1C
+#define MLXSW_REG_QEEC_LEN 0x20
 
 MLXSW_REG_DEFINE(qeec, MLXSW_REG_QEEC_ID, MLXSW_REG_QEEC_LEN);
 
@@ -3326,6 +3326,15 @@ MLXSW_ITEM32(reg, qeec, element_index, 0x04, 0, 8);
  */
 MLXSW_ITEM32(reg, qeec, next_element_index, 0x08, 0, 8);
 
+/* reg_qeec_mise
+ * Min shaper configuration enable. Enables configuration of the min
+ * shaper on this ETS element
+ * 0 - Disable
+ * 1 - Enable
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qeec, mise, 0x0C, 31, 1);
+
 enum {
 	MLXSW_REG_QEEC_BYTES_MODE,
 	MLXSW_REG_QEEC_PACKETS_MODE,
@@ -3342,6 +3351,17 @@ enum {
  */
 MLXSW_ITEM32(reg, qeec, pb, 0x0C, 28, 1);
 
+/* The smallest permitted min shaper rate. */
+#define MLXSW_REG_QEEC_MIS_MIN	200000		/* Kbps */
+
+/* reg_qeec_min_shaper_rate
+ * Min shaper information rate.
+ * For CPU port, can only be configured for port hierarchy.
+ * When in bytes mode, value is specified in units of 1000bps.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qeec, min_shaper_rate, 0x0C, 0, 28);
+
 /* reg_qeec_mase
  * Max shaper configuration enable. Enables configuration of the max
  * shaper on this ETS element.
-- 
GitLab


From 0fe64023162aef123de2f1993ba13a35a786e1de Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Wed, 31 Oct 2018 09:56:44 +0000
Subject: [PATCH 0399/1890] mlxsw: spectrum: Set minimum shaper on MC TCs

An MC-aware mode was introduced in commit 7b8195306694 ("mlxsw:
spectrum: Configure MC-aware mode on mlxsw ports"). In MC-aware mode,
BUM traffic gets a special treatment by being assigned to a separate set
of traffic classes 8..15. Pairs of TCs 0 and 8, 1 and 9, etc., are then
configured to strictly prioritize the lower-numbered ones. The intention
is to prevent BUM traffic from flooding the switch and push out all UC
traffic, which would otherwise happen, and instead give UC traffic
precedence.

However strictly prioritizing UC traffic has the effect that UC overload
pushes out all BUM traffic, such as legitimate ARP queries. These
packets are kept in queues for a while, but under sustained UC overload,
their lifetime eventually expires and these packets are dropped. That is
detrimental to network performance as well.

Therefore configure the MC TCs (8..15) with minimum shaper of 200Mbps (a
minimum permitted value) to allow a trickle of necessary control traffic
to get through.

Fixes: 7b8195306694 ("mlxsw: spectrum: Configure MC-aware mode on mlxsw ports")
Signed-off-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum.c    | 25 +++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 8a4983adae940..a2df12b79f8e9 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -2740,6 +2740,21 @@ int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port,
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qeec), qeec_pl);
 }
 
+static int mlxsw_sp_port_min_bw_set(struct mlxsw_sp_port *mlxsw_sp_port,
+				    enum mlxsw_reg_qeec_hr hr, u8 index,
+				    u8 next_index, u32 minrate)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	char qeec_pl[MLXSW_REG_QEEC_LEN];
+
+	mlxsw_reg_qeec_pack(qeec_pl, mlxsw_sp_port->local_port, hr, index,
+			    next_index);
+	mlxsw_reg_qeec_mise_set(qeec_pl, true);
+	mlxsw_reg_qeec_min_shaper_rate_set(qeec_pl, minrate);
+
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qeec), qeec_pl);
+}
+
 int mlxsw_sp_port_prio_tc_set(struct mlxsw_sp_port *mlxsw_sp_port,
 			      u8 switch_prio, u8 tclass)
 {
@@ -2817,6 +2832,16 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port)
 			return err;
 	}
 
+	/* Configure the min shaper for multicast TCs. */
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+		err = mlxsw_sp_port_min_bw_set(mlxsw_sp_port,
+					       MLXSW_REG_QEEC_HIERARCY_TC,
+					       i + 8, i,
+					       MLXSW_REG_QEEC_MIS_MIN);
+		if (err)
+			return err;
+	}
+
 	/* Map all priorities to traffic class 0. */
 	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
 		err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, 0);
-- 
GitLab


From 8f3f09358c81248109463b3cae254b7db4ea9af0 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Wed, 31 Oct 2018 09:56:45 +0000
Subject: [PATCH 0400/1890] selftests: mlxsw: qos_mc_aware: Tweak for min
 shaper

Since the minimum shaper is now being enabled for MC TCs, it's
unreasonable to expect no UC traffic loss. Minimal min shaper value is
200Mbps, which is 20% of the 1Gbps that this test configures on egress.
To cover for glitches, tolerate up to 25% UC degradation under MC
overload.

Fixes: b5638d46c90a ("selftests: mlxsw: Add a test for UC behavior under MC flood")
Signed-off-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
index 0150bb2741eb1..a8fc36d670e15 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
@@ -311,7 +311,7 @@ test_mc_aware()
 			ret = 100 * ($ucth1 - $ucth2) / $ucth1
 			if (ret > 0) { ret } else { 0 }
 		    ")
-	check_err $(bc <<< "$deg > 10")
+	check_err $(bc <<< "$deg > 25")
 
 	local interval=$((d1 - d0))
 	local mc_ir=$(rate $u0 $u1 $interval)
-- 
GitLab


From a5ee171d087ee632a5f190bb3ce1c0f98e06ec0a Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Wed, 31 Oct 2018 09:56:46 +0000
Subject: [PATCH 0401/1890] selftests: mlxsw: qos_mc_aware: Add a test for UC
 awareness

In a previous patch, mlxsw was updated to configure a minimum bandwidth
allowance on MC TCs. Test that this indeed fixes the problem of UC
traffic overload pushing out all MC traffic.

Fixes: b5638d46c90a ("selftests: mlxsw: Add a test for UC behavior under MC flood")
Signed-off-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../drivers/net/mlxsw/qos_mc_aware.sh         | 93 ++++++++++++++-----
 1 file changed, 70 insertions(+), 23 deletions(-)

diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
index a8fc36d670e15..117f6f35d72fa 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
@@ -25,24 +25,24 @@
 # Thus we set MTU to 10K on all involved interfaces. Then both unicast and
 # multicast traffic uses 8K frames.
 #
-# +-----------------------+                +----------------------------------+
-# | H1                    |                |                               H2 |
-# |                       |                |  unicast --> + $h2.111           |
-# |                       |                |  traffic     | 192.0.2.129/28    |
-# |          multicast    |                |              | e-qos-map 0:1     |
-# |          traffic      |                |              |                   |
-# | $h1 + <-----          |                |              + $h2               |
-# +-----|-----------------+                +--------------|-------------------+
-#       |                                                 |
-# +-----|-------------------------------------------------|-------------------+
-# |     + $swp1                                           + $swp2             |
-# |     | >1Gbps                                          | >1Gbps            |
-# | +---|----------------+                     +----------|----------------+  |
-# | |   + $swp1.1        |                     |          + $swp2.111      |  |
+# +---------------------------+            +----------------------------------+
+# | H1                        |            |                               H2 |
+# |                           |            |  unicast --> + $h2.111           |
+# |                 multicast |            |  traffic     | 192.0.2.129/28    |
+# |                 traffic   |            |              | e-qos-map 0:1     |
+# |           $h1 + <-----    |            |              |                   |
+# | 192.0.2.65/28 |           |            |              + $h2               |
+# +---------------|-----------+            +--------------|-------------------+
+#                 |                                       |
+# +---------------|---------------------------------------|-------------------+
+# |         $swp1 +                                       + $swp2             |
+# |        >1Gbps |                                       | >1Gbps            |
+# | +-------------|------+                     +----------|----------------+  |
+# | |     $swp1.1 +      |                     |          + $swp2.111      |  |
 # | |                BR1 |             SW      | BR111                     |  |
-# | |   + $swp3.1        |                     |          + $swp3.111      |  |
-# | +---|----------------+                     +----------|----------------+  |
-# |     \_________________________________________________/                   |
+# | |     $swp3.1 +      |                     |          + $swp3.111      |  |
+# | +-------------|------+                     +----------|----------------+  |
+# |               \_______________________________________/                   |
 # |                                    |                                      |
 # |                                    + $swp3                                |
 # |                                    | 1Gbps bottleneck                     |
@@ -51,6 +51,7 @@
 #                                      |
 #                                   +--|-----------------+
 #                                   |  + $h3          H3 |
+#                                   |  | 192.0.2.66/28   |
 #                                   |  |                 |
 #                                   |  + $h3.111         |
 #                                   |    192.0.2.130/28  |
@@ -59,6 +60,7 @@
 ALL_TESTS="
 	ping_ipv4
 	test_mc_aware
+	test_uc_aware
 "
 
 lib_dir=$(dirname $0)/../../../net/forwarding
@@ -68,14 +70,14 @@ source $lib_dir/lib.sh
 
 h1_create()
 {
-	simple_if_init $h1
+	simple_if_init $h1 192.0.2.65/28
 	mtu_set $h1 10000
 }
 
 h1_destroy()
 {
 	mtu_restore $h1
-	simple_if_fini $h1
+	simple_if_fini $h1 192.0.2.65/28
 }
 
 h2_create()
@@ -97,7 +99,7 @@ h2_destroy()
 
 h3_create()
 {
-	simple_if_init $h3
+	simple_if_init $h3 192.0.2.66/28
 	mtu_set $h3 10000
 
 	vlan_create $h3 111 v$h3 192.0.2.130/28
@@ -108,7 +110,7 @@ h3_destroy()
 	vlan_destroy $h3 111
 
 	mtu_restore $h3
-	simple_if_fini $h3
+	simple_if_fini $h3 192.0.2.66/28
 }
 
 switch_create()
@@ -251,7 +253,7 @@ measure_uc_rate()
 	# average ingress rate to somewhat mitigate this.
 	local min_ingress=2147483648
 
-	mausezahn $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \
+	$MZ $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \
 		-a own -b $h3mac -t udp -q &
 	sleep 1
 
@@ -291,7 +293,7 @@ test_mc_aware()
 	check_err $? "Could not get high enough UC-only ingress rate"
 	local ucth1=${uc_rate[1]}
 
-	mausezahn $h1 -p 8000 -c 0 -a own -b bc -t udp -q &
+	$MZ $h1 -p 8000 -c 0 -a own -b bc -t udp -q &
 
 	local d0=$(date +%s)
 	local t0=$(ethtool_stats_get $h3 rx_octets_prio_0)
@@ -335,6 +337,51 @@ test_mc_aware()
 	echo "    egress UC throughput  $(humanize ${uc_rate_2[1]})"
 	echo "    ingress MC throughput $(humanize $mc_ir)"
 	echo "    egress MC throughput  $(humanize $mc_er)"
+	echo
+}
+
+test_uc_aware()
+{
+	RET=0
+
+	$MZ $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \
+		-a own -b $h3mac -t udp -q &
+
+	local d0=$(date +%s)
+	local t0=$(ethtool_stats_get $h3 rx_octets_prio_1)
+	local u0=$(ethtool_stats_get $swp2 rx_octets_prio_1)
+	sleep 1
+
+	local attempts=50
+	local passes=0
+	local i
+
+	for ((i = 0; i < attempts; ++i)); do
+		if $ARPING -c 1 -I $h1 -b 192.0.2.66 -q -w 0.1; then
+			((passes++))
+		fi
+
+		sleep 0.1
+	done
+
+	local d1=$(date +%s)
+	local t1=$(ethtool_stats_get $h3 rx_octets_prio_1)
+	local u1=$(ethtool_stats_get $swp2 rx_octets_prio_1)
+
+	local interval=$((d1 - d0))
+	local uc_ir=$(rate $u0 $u1 $interval)
+	local uc_er=$(rate $t0 $t1 $interval)
+
+	((attempts == passes))
+	check_err $?
+
+	# Suppress noise from killing mausezahn.
+	{ kill %% && wait; } 2>/dev/null
+
+	log_test "MC performace under UC overload"
+	echo "    ingress UC throughput $(humanize ${uc_ir})"
+	echo "    egress UC throughput  $(humanize ${uc_er})"
+	echo "    sent $attempts BC ARPs, got $passes responses"
 }
 
 trap cleanup EXIT
-- 
GitLab


From a861594b1b7ffd630f335b351c4e9f938feadb8e Mon Sep 17 00:00:00 2001
From: Jon Mason <jdmason@kudzu.us>
Date: Mon, 11 Jun 2018 16:13:12 -0400
Subject: [PATCH 0402/1890] ntb_netdev: fix sleep time mismatch

The tx_time should be in usecs (according to the comment above the
variable), but the setting of the timer during the rearming is done in
msecs.  Change it to match the expected units.

Fixes: e74bfeedad08 ("NTB: Add flow control to the ntb_netdev")
Suggested-by: Gerd W. Haeussler <gerd.haeussler@cesys-it.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
Acked-by: Dave Jiang <dave.jiang@intel.com>
---
 drivers/net/ntb_netdev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c
index b12023bc2cab5..df8d49ad48c38 100644
--- a/drivers/net/ntb_netdev.c
+++ b/drivers/net/ntb_netdev.c
@@ -236,7 +236,7 @@ static void ntb_netdev_tx_timer(struct timer_list *t)
 	struct net_device *ndev = dev->ndev;
 
 	if (ntb_transport_tx_free_entry(dev->qp) < tx_stop) {
-		mod_timer(&dev->tx_timer, jiffies + msecs_to_jiffies(tx_time));
+		mod_timer(&dev->tx_timer, jiffies + usecs_to_jiffies(tx_time));
 	} else {
 		/* Make sure anybody stopping the queue after this sees the new
 		 * value of ntb_transport_tx_free_entry()
-- 
GitLab


From 7756e2b5d68c36e170a111dceea22f7365f83256 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Tue, 28 Aug 2018 17:13:59 -0700
Subject: [PATCH 0403/1890] ntb: intel: fix return value for ndev_vec_mask()

ndev_vec_mask() should be returning u64 mask value instead of int.
Otherwise the mask value returned can be incorrect for larger
vectors.

Fixes: e26a5843f7f5 ("NTB: Split ntb_hw_intel and ntb_transport drivers")

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Tested-by: Lucas Van <lucas.van@intel.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 drivers/ntb/hw/intel/ntb_hw_gen1.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/ntb/hw/intel/ntb_hw_gen1.c b/drivers/ntb/hw/intel/ntb_hw_gen1.c
index 6aa5732272791..2ad263f708da7 100644
--- a/drivers/ntb/hw/intel/ntb_hw_gen1.c
+++ b/drivers/ntb/hw/intel/ntb_hw_gen1.c
@@ -265,7 +265,7 @@ static inline int ndev_db_clear_mask(struct intel_ntb_dev *ndev, u64 db_bits,
 	return 0;
 }
 
-static inline int ndev_vec_mask(struct intel_ntb_dev *ndev, int db_vector)
+static inline u64 ndev_vec_mask(struct intel_ntb_dev *ndev, int db_vector)
 {
 	u64 shift, mask;
 
-- 
GitLab


From 1b7619828d0c341612f58683e73f279c37e70bbc Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Mon, 27 Aug 2018 17:13:06 -0500
Subject: [PATCH 0404/1890] NTB: ntb_hw_idt: replace IS_ERR_OR_NULL with
 regular NULL checks

Both devm_kcalloc() and devm_kzalloc() return NULL on error. They
never return error pointers.

The use of IS_ERR_OR_NULL is currently applied to the wrong
context.

Fix this by replacing IS_ERR_OR_NULL with regular NULL checks.

Fixes: bf2a952d31d2 ("NTB: Add IDT 89HPESxNTx PCIe-switches support")
Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 drivers/ntb/hw/idt/ntb_hw_idt.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/ntb/hw/idt/ntb_hw_idt.c b/drivers/ntb/hw/idt/ntb_hw_idt.c
index dbe72f116017a..a67ef23e81bca 100644
--- a/drivers/ntb/hw/idt/ntb_hw_idt.c
+++ b/drivers/ntb/hw/idt/ntb_hw_idt.c
@@ -1105,9 +1105,9 @@ static struct idt_mw_cfg *idt_scan_mws(struct idt_ntb_dev *ndev, int port,
 	}
 
 	/* Allocate memory for memory window descriptors */
-	ret_mws = devm_kcalloc(&ndev->ntb.pdev->dev, *mw_cnt,
-				sizeof(*ret_mws), GFP_KERNEL);
-	if (IS_ERR_OR_NULL(ret_mws))
+	ret_mws = devm_kcalloc(&ndev->ntb.pdev->dev, *mw_cnt, sizeof(*ret_mws),
+			       GFP_KERNEL);
+	if (!ret_mws)
 		return ERR_PTR(-ENOMEM);
 
 	/* Copy the info of detected memory windows */
@@ -2390,7 +2390,7 @@ static struct idt_ntb_dev *idt_create_dev(struct pci_dev *pdev,
 
 	/* Allocate memory for the IDT PCIe-device descriptor */
 	ndev = devm_kzalloc(&pdev->dev, sizeof(*ndev), GFP_KERNEL);
-	if (IS_ERR_OR_NULL(ndev)) {
+	if (!ndev) {
 		dev_err(&pdev->dev, "Memory allocation failed for descriptor");
 		return ERR_PTR(-ENOMEM);
 	}
-- 
GitLab


From deee2cae27d1914850195e3fb219cc611e953560 Mon Sep 17 00:00:00 2001
From: Li Zhijian <lizhijian@cn.fujitsu.com>
Date: Tue, 30 Oct 2018 11:15:55 +0800
Subject: [PATCH 0405/1890] kselftests/bpf: use ping6 as the default ipv6 ping
 binary if it exists

ping binary on some distros doesn't support "ping -6" anymore.

Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_skb_cgroup_id.sh | 3 ++-
 tools/testing/selftests/bpf/test_sock_addr.sh     | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_skb_cgroup_id.sh b/tools/testing/selftests/bpf/test_skb_cgroup_id.sh
index 42544a969abc6..a9bc6f82abc16 100755
--- a/tools/testing/selftests/bpf/test_skb_cgroup_id.sh
+++ b/tools/testing/selftests/bpf/test_skb_cgroup_id.sh
@@ -10,7 +10,7 @@ wait_for_ip()
 	echo -n "Wait for testing link-local IP to become available "
 	for _i in $(seq ${MAX_PING_TRIES}); do
 		echo -n "."
-		if ping -6 -q -c 1 -W 1 ff02::1%${TEST_IF} >/dev/null 2>&1; then
+		if $PING6 -c 1 -W 1 ff02::1%${TEST_IF} >/dev/null 2>&1; then
 			echo " OK"
 			return
 		fi
@@ -58,5 +58,6 @@ BPF_PROG_OBJ="${DIR}/test_skb_cgroup_id_kern.o"
 BPF_PROG_SECTION="cgroup_id_logger"
 BPF_PROG_ID=0
 PROG="${DIR}/test_skb_cgroup_id_user"
+type ping6 >/dev/null 2>&1 && PING6="ping6" || PING6="ping -6"
 
 main
diff --git a/tools/testing/selftests/bpf/test_sock_addr.sh b/tools/testing/selftests/bpf/test_sock_addr.sh
index 9832a875a8289..3b9fdb8094aa2 100755
--- a/tools/testing/selftests/bpf/test_sock_addr.sh
+++ b/tools/testing/selftests/bpf/test_sock_addr.sh
@@ -4,7 +4,8 @@ set -eu
 
 ping_once()
 {
-	ping -${1} -q -c 1 -W 1 ${2%%/*} >/dev/null 2>&1
+	type ping${1} >/dev/null 2>&1 && PING="ping${1}" || PING="ping -${1}"
+	$PING -q -c 1 -W 1 ${2%%/*} >/dev/null 2>&1
 }
 
 wait_for_ip()
-- 
GitLab


From 3615353218744bb60f55170c620ce4dce1a008c7 Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Wed, 31 Oct 2018 12:57:18 -0700
Subject: [PATCH 0406/1890] libbpf: Fix compile error in
 libbpf_attach_type_by_name

Arnaldo Carvalho de Melo reported build error in libbpf when clang
version 3.8.1-24 (tags/RELEASE_381/final) is used:

libbpf.c:2201:36: error: comparison of constant -22 with expression of
type 'const enum bpf_attach_type' is always false
[-Werror,-Wtautological-constant-out-of-range-compare]
                if (section_names[i].attach_type == -EINVAL)
                    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ^  ~~~~~~~
1 error generated.

Fix the error by keeping "is_attachable" property of a program in a
separate struct field instead of trying to use attach_type itself.

Fixes: 956b620fcf0b ("libbpf: Introduce libbpf_attach_type_by_name")
Reported-by: Arnaldo Carvalho de Melo <acme@kernel.org>
Signed-off-by: Andrey Ignatov <rdna@fb.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/lib/bpf/libbpf.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index b607be7236d3e..d6e62e90e8d44 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -2084,19 +2084,19 @@ void bpf_program__set_expected_attach_type(struct bpf_program *prog,
 	prog->expected_attach_type = type;
 }
 
-#define BPF_PROG_SEC_IMPL(string, ptype, eatype, atype) \
-	{ string, sizeof(string) - 1, ptype, eatype, atype }
+#define BPF_PROG_SEC_IMPL(string, ptype, eatype, is_attachable, atype) \
+	{ string, sizeof(string) - 1, ptype, eatype, is_attachable, atype }
 
 /* Programs that can NOT be attached. */
-#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, -EINVAL)
+#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, 0, 0)
 
 /* Programs that can be attached. */
 #define BPF_APROG_SEC(string, ptype, atype) \
-	BPF_PROG_SEC_IMPL(string, ptype, 0, atype)
+	BPF_PROG_SEC_IMPL(string, ptype, 0, 1, atype)
 
 /* Programs that must specify expected attach type at load time. */
 #define BPF_EAPROG_SEC(string, ptype, eatype) \
-	BPF_PROG_SEC_IMPL(string, ptype, eatype, eatype)
+	BPF_PROG_SEC_IMPL(string, ptype, eatype, 1, eatype)
 
 /* Programs that can be attached but attach type can't be identified by section
  * name. Kept for backward compatibility.
@@ -2108,6 +2108,7 @@ static const struct {
 	size_t len;
 	enum bpf_prog_type prog_type;
 	enum bpf_attach_type expected_attach_type;
+	int is_attachable;
 	enum bpf_attach_type attach_type;
 } section_names[] = {
 	BPF_PROG_SEC("socket",			BPF_PROG_TYPE_SOCKET_FILTER),
@@ -2198,7 +2199,7 @@ int libbpf_attach_type_by_name(const char *name,
 	for (i = 0; i < ARRAY_SIZE(section_names); i++) {
 		if (strncmp(name, section_names[i].sec, section_names[i].len))
 			continue;
-		if (section_names[i].attach_type == -EINVAL)
+		if (!section_names[i].is_attachable)
 			return -EINVAL;
 		*attach_type = section_names[i].attach_type;
 		return 0;
-- 
GitLab


From 1936f094e164cc13ebf17aba1d6b34e033e64188 Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Wed, 31 Oct 2018 22:48:13 +0530
Subject: [PATCH 0407/1890] selftests/powerpc: Fix compilation issue due to asm
 label

We are using 'dscr_insn' as a label in inline asm to identify if a
SIGILL was generated by the mtspr instruction at that point. However,
with inline assembly, the compiler is still free to duplicate the asm
statement for optimization purposes, which results in the label being
defined twice with the error:
	/tmp/ccerQCql.s:874: Error: symbol `dscr_insn' is already defined

With different compiler versions, we may also see:
	/tmp/ccJzLDlN.o:(.toc+0x0): undefined reference to `dscr_insn'

Remove the use of the label in the inline assembly. Instead, just look
for the offending instruction in the signal handler.

Fixes: d2bf793237b3 ("selftests/powerpc: Add test to verify rfi flush across a system call")
Reported-by: Breno Leitao <leitao@debian.org>
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Tested-by: Breno Leitao <leitao@debian.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 tools/testing/selftests/powerpc/utils.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/powerpc/utils.c b/tools/testing/selftests/powerpc/utils.c
index 43c342845be0e..ed62f4153d3eb 100644
--- a/tools/testing/selftests/powerpc/utils.c
+++ b/tools/testing/selftests/powerpc/utils.c
@@ -25,7 +25,6 @@
 #include "utils.h"
 
 static char auxv[4096];
-extern unsigned int dscr_insn[];
 
 int read_auxv(char *buf, ssize_t buf_size)
 {
@@ -247,7 +246,8 @@ static void sigill_handler(int signr, siginfo_t *info, void *unused)
 	ucontext_t *ctx = (ucontext_t *)unused;
 	unsigned long *pc = &UCONTEXT_NIA(ctx);
 
-	if (*pc == (unsigned long)&dscr_insn) {
+	/* mtspr 3,RS to check for move to DSCR below */
+	if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) {
 		if (!warned++)
 			printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n");
 		*pc += 4;
@@ -271,5 +271,5 @@ void set_dscr(unsigned long val)
 		init = 1;
 	}
 
-	asm volatile("dscr_insn: mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
+	asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
 }
-- 
GitLab


From 439cd39ea136d2c026805264d58a91f36b6b64ca Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Sat, 14 Jul 2018 21:59:43 +0200
Subject: [PATCH 0408/1890] netfilter: ipset: list:set: Decrease refcount
 synchronously on deletion and replace

Commit 45040978c899 ("netfilter: ipset: Fix set:list type crash
when flush/dump set in parallel") postponed decreasing set
reference counters to the RCU callback.

An 'ipset del' command can terminate before the RCU grace period
is elapsed, and if sets are listed before then, the reference
counter shown in userspace will be wrong:

 # ipset create h hash:ip; ipset create l list:set; ipset add l
 # ipset del l h; ipset list h
 Name: h
 Type: hash:ip
 Revision: 4
 Header: family inet hashsize 1024 maxelem 65536
 Size in memory: 88
 References: 1
 Number of entries: 0
 Members:
 # sleep 1; ipset list h
 Name: h
 Type: hash:ip
 Revision: 4
 Header: family inet hashsize 1024 maxelem 65536
 Size in memory: 88
 References: 0
 Number of entries: 0
 Members:

Fix this by making the reference count update synchronous again.

As a result, when sets are listed, ip_set_name_byindex() might
now fetch a set whose reference count is already zero. Instead
of relying on the reference count to protect against concurrent
set renaming, grab ip_set_ref_lock as reader and copy the name,
while holding the same lock in ip_set_rename() as writer
instead.

Reported-by: Li Shuang <shuali@redhat.com>
Fixes: 45040978c899 ("netfilter: ipset: Fix set:list type crash when flush/dump set in parallel")
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/ipset/ip_set.h |  2 +-
 net/netfilter/ipset/ip_set_core.c      | 23 +++++++++++------------
 net/netfilter/ipset/ip_set_list_set.c  | 17 +++++++++++------
 3 files changed, 23 insertions(+), 19 deletions(-)

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 34fc80f3eb900..1d100efe74ec7 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -314,7 +314,7 @@ enum {
 extern ip_set_id_t ip_set_get_byname(struct net *net,
 				     const char *name, struct ip_set **set);
 extern void ip_set_put_byindex(struct net *net, ip_set_id_t index);
-extern const char *ip_set_name_byindex(struct net *net, ip_set_id_t index);
+extern void ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name);
 extern ip_set_id_t ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index);
 extern void ip_set_nfnl_put(struct net *net, ip_set_id_t index);
 
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index bc4bd247bb7d4..fa15a831aeee5 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -693,21 +693,20 @@ ip_set_put_byindex(struct net *net, ip_set_id_t index)
 EXPORT_SYMBOL_GPL(ip_set_put_byindex);
 
 /* Get the name of a set behind a set index.
- * We assume the set is referenced, so it does exist and
- * can't be destroyed. The set cannot be renamed due to
- * the referencing either.
- *
+ * Set itself is protected by RCU, but its name isn't: to protect against
+ * renaming, grab ip_set_ref_lock as reader (see ip_set_rename()) and copy the
+ * name.
  */
-const char *
-ip_set_name_byindex(struct net *net, ip_set_id_t index)
+void
+ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name)
 {
-	const struct ip_set *set = ip_set_rcu_get(net, index);
+	struct ip_set *set = ip_set_rcu_get(net, index);
 
 	BUG_ON(!set);
-	BUG_ON(set->ref == 0);
 
-	/* Referenced, so it's safe */
-	return set->name;
+	read_lock_bh(&ip_set_ref_lock);
+	strncpy(name, set->name, IPSET_MAXNAMELEN);
+	read_unlock_bh(&ip_set_ref_lock);
 }
 EXPORT_SYMBOL_GPL(ip_set_name_byindex);
 
@@ -1153,7 +1152,7 @@ static int ip_set_rename(struct net *net, struct sock *ctnl,
 	if (!set)
 		return -ENOENT;
 
-	read_lock_bh(&ip_set_ref_lock);
+	write_lock_bh(&ip_set_ref_lock);
 	if (set->ref != 0) {
 		ret = -IPSET_ERR_REFERENCED;
 		goto out;
@@ -1170,7 +1169,7 @@ static int ip_set_rename(struct net *net, struct sock *ctnl,
 	strncpy(set->name, name2, IPSET_MAXNAMELEN);
 
 out:
-	read_unlock_bh(&ip_set_ref_lock);
+	write_unlock_bh(&ip_set_ref_lock);
 	return ret;
 }
 
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 072a658fde047..4eef55da0878e 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -148,9 +148,7 @@ __list_set_del_rcu(struct rcu_head * rcu)
 {
 	struct set_elem *e = container_of(rcu, struct set_elem, rcu);
 	struct ip_set *set = e->set;
-	struct list_set *map = set->data;
 
-	ip_set_put_byindex(map->net, e->id);
 	ip_set_ext_destroy(set, e);
 	kfree(e);
 }
@@ -158,15 +156,21 @@ __list_set_del_rcu(struct rcu_head * rcu)
 static inline void
 list_set_del(struct ip_set *set, struct set_elem *e)
 {
+	struct list_set *map = set->data;
+
 	set->elements--;
 	list_del_rcu(&e->list);
+	ip_set_put_byindex(map->net, e->id);
 	call_rcu(&e->rcu, __list_set_del_rcu);
 }
 
 static inline void
-list_set_replace(struct set_elem *e, struct set_elem *old)
+list_set_replace(struct ip_set *set, struct set_elem *e, struct set_elem *old)
 {
+	struct list_set *map = set->data;
+
 	list_replace_rcu(&old->list, &e->list);
+	ip_set_put_byindex(map->net, old->id);
 	call_rcu(&old->rcu, __list_set_del_rcu);
 }
 
@@ -298,7 +302,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 	INIT_LIST_HEAD(&e->list);
 	list_set_init_extensions(set, ext, e);
 	if (n)
-		list_set_replace(e, n);
+		list_set_replace(set, e, n);
 	else if (next)
 		list_add_tail_rcu(&e->list, &next->list);
 	else if (prev)
@@ -486,6 +490,7 @@ list_set_list(const struct ip_set *set,
 	const struct list_set *map = set->data;
 	struct nlattr *atd, *nested;
 	u32 i = 0, first = cb->args[IPSET_CB_ARG0];
+	char name[IPSET_MAXNAMELEN];
 	struct set_elem *e;
 	int ret = 0;
 
@@ -504,8 +509,8 @@ list_set_list(const struct ip_set *set,
 		nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
 		if (!nested)
 			goto nla_put_failure;
-		if (nla_put_string(skb, IPSET_ATTR_NAME,
-				   ip_set_name_byindex(map->net, e->id)))
+		ip_set_name_byindex(map->net, e->id, name);
+		if (nla_put_string(skb, IPSET_ATTR_NAME, name))
 			goto nla_put_failure;
 		if (ip_set_put_extensions(skb, set, e, true))
 			goto nla_put_failure;
-- 
GitLab


From 886503f34d63e681662057448819edb5b1057a97 Mon Sep 17 00:00:00 2001
From: Eric Westbrook <eric@westbrook.io>
Date: Tue, 28 Aug 2018 15:14:42 -0600
Subject: [PATCH 0409/1890] netfilter: ipset: actually allow allowable CIDR 0
 in hash:net,port,net

Allow /0 as advertised for hash:net,port,net sets.

For "hash:net,port,net", ipset(8) says that "either subnet
is permitted to be a /0 should you wish to match port
between all destinations."

Make that statement true.

Before:

    # ipset create cidrzero hash:net,port,net
    # ipset add cidrzero 0.0.0.0/0,12345,0.0.0.0/0
    ipset v6.34: The value of the CIDR parameter of the IP address is invalid

    # ipset create cidrzero6 hash:net,port,net family inet6
    # ipset add cidrzero6 ::/0,12345,::/0
    ipset v6.34: The value of the CIDR parameter of the IP address is invalid

After:

    # ipset create cidrzero hash:net,port,net
    # ipset add cidrzero 0.0.0.0/0,12345,0.0.0.0/0
    # ipset test cidrzero 192.168.205.129,12345,172.16.205.129
    192.168.205.129,tcp:12345,172.16.205.129 is in set cidrzero.

    # ipset create cidrzero6 hash:net,port,net family inet6
    # ipset add cidrzero6 ::/0,12345,::/0
    # ipset test cidrzero6 fe80::1,12345,ff00::1
    fe80::1,tcp:12345,ff00::1 is in set cidrzero6.

See also:

  https://bugzilla.kernel.org/show_bug.cgi?id=200897
  https://github.com/ewestbrook/linux/commit/df7ff6efb0934ab6acc11f003ff1a7580d6c1d9c

Signed-off-by: Eric Westbrook <linux@westbrook.io>
Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/ipset/ip_set_hash_netportnet.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index d391485a6acdc..613e18e720a44 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -213,13 +213,13 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (tb[IPSET_ATTR_CIDR]) {
 		e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]);
-		if (!e.cidr[0] || e.cidr[0] > HOST_MASK)
+		if (e.cidr[0] > HOST_MASK)
 			return -IPSET_ERR_INVALID_CIDR;
 	}
 
 	if (tb[IPSET_ATTR_CIDR2]) {
 		e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
-		if (!e.cidr[1] || e.cidr[1] > HOST_MASK)
+		if (e.cidr[1] > HOST_MASK)
 			return -IPSET_ERR_INVALID_CIDR;
 	}
 
@@ -493,13 +493,13 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (tb[IPSET_ATTR_CIDR]) {
 		e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]);
-		if (!e.cidr[0] || e.cidr[0] > HOST_MASK)
+		if (e.cidr[0] > HOST_MASK)
 			return -IPSET_ERR_INVALID_CIDR;
 	}
 
 	if (tb[IPSET_ATTR_CIDR2]) {
 		e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
-		if (!e.cidr[1] || e.cidr[1] > HOST_MASK)
+		if (e.cidr[1] > HOST_MASK)
 			return -IPSET_ERR_INVALID_CIDR;
 	}
 
-- 
GitLab


From ed956f3947a01ff9875cd908d7c1ef1fe7f47bf0 Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <aryabinin@virtuozzo.com>
Date: Mon, 22 Oct 2018 23:30:40 +0200
Subject: [PATCH 0410/1890] netfilter: ipset: fix ip_set_list allocation
 failure

ip_set_create() and ip_set_net_init() attempt to allocate physically
contiguous memory for ip_set_list. If memory is fragmented, the
allocations could easily fail:

        vzctl: page allocation failure: order:7, mode:0xc0d0

        Call Trace:
         dump_stack+0x19/0x1b
         warn_alloc_failed+0x110/0x180
         __alloc_pages_nodemask+0x7bf/0xc60
         alloc_pages_current+0x98/0x110
         kmalloc_order+0x18/0x40
         kmalloc_order_trace+0x26/0xa0
         __kmalloc+0x279/0x290
         ip_set_net_init+0x4b/0x90 [ip_set]
         ops_init+0x3b/0xb0
         setup_net+0xbb/0x170
         copy_net_ns+0xf1/0x1c0
         create_new_namespaces+0xf9/0x180
         copy_namespaces+0x8e/0xd0
         copy_process+0xb61/0x1a00
         do_fork+0x91/0x320

Use kvcalloc() to fallback to 0-order allocations if high order
page isn't available.

Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/ipset/ip_set_core.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index fa15a831aeee5..68db946df1511 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -960,7 +960,7 @@ static int ip_set_create(struct net *net, struct sock *ctnl,
 			/* Wraparound */
 			goto cleanup;
 
-		list = kcalloc(i, sizeof(struct ip_set *), GFP_KERNEL);
+		list = kvcalloc(i, sizeof(struct ip_set *), GFP_KERNEL);
 		if (!list)
 			goto cleanup;
 		/* nfnl mutex is held, both lists are valid */
@@ -972,7 +972,7 @@ static int ip_set_create(struct net *net, struct sock *ctnl,
 		/* Use new list */
 		index = inst->ip_set_max;
 		inst->ip_set_max = i;
-		kfree(tmp);
+		kvfree(tmp);
 		ret = 0;
 	} else if (ret) {
 		goto cleanup;
@@ -2058,7 +2058,7 @@ ip_set_net_init(struct net *net)
 	if (inst->ip_set_max >= IPSET_INVALID_ID)
 		inst->ip_set_max = IPSET_INVALID_ID - 1;
 
-	list = kcalloc(inst->ip_set_max, sizeof(struct ip_set *), GFP_KERNEL);
+	list = kvcalloc(inst->ip_set_max, sizeof(struct ip_set *), GFP_KERNEL);
 	if (!list)
 		return -ENOMEM;
 	inst->is_deleted = false;
@@ -2086,7 +2086,7 @@ ip_set_net_exit(struct net *net)
 		}
 	}
 	nfnl_unlock(NFNL_SUBSYS_IPSET);
-	kfree(rcu_dereference_protected(inst->ip_set_list, 1));
+	kvfree(rcu_dereference_protected(inst->ip_set_list, 1));
 }
 
 static struct pernet_operations ip_set_net_ops = {
-- 
GitLab


From b9bb3fdf4e870fb655064f5c3c81c1fee7fd89ce Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.cirrus.com>
Date: Fri, 19 Oct 2018 09:59:57 +0100
Subject: [PATCH 0411/1890] i2c: Remove unnecessary call to irq_find_mapping

irq_create_mapping calls irq_find_mapping internally and will use the
found mapping if one exists, so there is no need to manually call this
from i2c_smbus_host_notify_to_irq.

Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-core-base.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index dc78aa7369def..656f0a6fe3adf 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -306,10 +306,7 @@ static int i2c_smbus_host_notify_to_irq(const struct i2c_client *client)
 	if (client->flags & I2C_CLIENT_TEN)
 		return -EINVAL;
 
-	irq = irq_find_mapping(adap->host_notify_domain, client->addr);
-	if (!irq)
-		irq = irq_create_mapping(adap->host_notify_domain,
-					 client->addr);
+	irq = irq_create_mapping(adap->host_notify_domain, client->addr);
 
 	return irq > 0 ? irq : -ENXIO;
 }
-- 
GitLab


From 6f108dd70d3010c391c1e9f56f3f20d1f9e75450 Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.cirrus.com>
Date: Fri, 19 Oct 2018 09:59:58 +0100
Subject: [PATCH 0412/1890] i2c: Clear client->irq in i2c_device_remove

The IRQ will be mapped in i2c_device_probe only if client->irq is zero and
i2c_device_remove does not clear this. When rebinding an I2C device,
whos IRQ provider has also been rebound this means that an IRQ mapping
will never be created, causing the I2C device to fail to acquire its
IRQ. Fix this issue by clearing client->irq in i2c_device_remove,
forcing i2c_device_probe to lookup the mapping again.

Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-core-base.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index 656f0a6fe3adf..28460f6a60cc1 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -430,6 +430,8 @@ static int i2c_device_remove(struct device *dev)
 	dev_pm_clear_wake_irq(&client->dev);
 	device_init_wakeup(&client->dev, false);
 
+	client->irq = 0;
+
 	return status;
 }
 
-- 
GitLab


From 69819c7fc836e17a870a23a54b885a44afc89d85 Mon Sep 17 00:00:00 2001
From: "A.s. Dong" <aisheng.dong@nxp.com>
Date: Tue, 30 Oct 2018 15:28:11 +0000
Subject: [PATCH 0413/1890] dt-bindings: i2c: i2c-imx-lpi2c: add imx8qxp
 compatible string

Add imx8qxp compatible string

Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Dong Aisheng <aisheng.dong@nxp.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.txt b/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.txt
index 091c8dfd32291..b245363d6d60a 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.txt
+++ b/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.txt
@@ -3,6 +3,7 @@
 Required properties:
 - compatible :
   - "fsl,imx7ulp-lpi2c" for LPI2C compatible with the one integrated on i.MX7ULP soc
+  - "fsl,imx8qxp-lpi2c" for LPI2C compatible with the one integrated on i.MX8QXP soc
 - reg : address and length of the lpi2c master registers
 - interrupts : lpi2c interrupt
 - clocks : lpi2c clock specifier
-- 
GitLab


From 012ebc3b7801fcf424d0ebb4689c98f90a8593e0 Mon Sep 17 00:00:00 2001
From: "A.s. Dong" <aisheng.dong@nxp.com>
Date: Tue, 30 Oct 2018 15:28:14 +0000
Subject: [PATCH 0414/1890] MAINTAINERS: add maintainer for IMX LPI2C driver

The LPI2C is used in IMX7ULP/MX8 SoCs.

Signed-off-by: Dong Aisheng <aisheng.dong@nxp.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 MAINTAINERS | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 1c0f771b859ec..5d581e0dd0178 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5871,6 +5871,14 @@ L:	linux-i2c@vger.kernel.org
 S:	Maintained
 F:	drivers/i2c/busses/i2c-cpm.c
 
+FREESCALE IMX LPI2C DRIVER
+M:	Dong Aisheng <aisheng.dong@nxp.com>
+L:	linux-i2c@vger.kernel.org
+L:	linux-imx@nxp.com
+S:	Maintained
+F:	drivers/i2c/busses/i2c-imx-lpi2c.c
+F:	Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.txt
+
 FREESCALE IMX / MXC FEC DRIVER
 M:	Fugang Duan <fugang.duan@nxp.com>
 L:	netdev@vger.kernel.org
-- 
GitLab


From 0962590e553331db2cc0aef2dc35c57f6300dbbe Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 1 Nov 2018 00:05:52 +0100
Subject: [PATCH 0415/1890] bpf: fix partial copy of map_ptr when dst is scalar

ALU operations on pointers such as scalar_reg += map_value_ptr are
handled in adjust_ptr_min_max_vals(). Problem is however that map_ptr
and range in the register state share a union, so transferring state
through dst_reg->range = ptr_reg->range is just buggy as any new
map_ptr in the dst_reg is then truncated (or null) for subsequent
checks. Fix this by adding a raw member and use it for copying state
over to dst_reg.

Fixes: f1174f77b50c ("bpf/verifier: rework value tracking")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Edward Cree <ecree@solarflare.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h |  3 +++
 kernel/bpf/verifier.c        | 10 ++++++----
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 9e8056ec20faa..d93e89761a8b4 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -51,6 +51,9 @@ struct bpf_reg_state {
 		 *   PTR_TO_MAP_VALUE_OR_NULL
 		 */
 		struct bpf_map *map_ptr;
+
+		/* Max size from any of the above. */
+		unsigned long raw;
 	};
 	/* Fixed part of pointer offset, pointer types only */
 	s32 off;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 171a2c88e77dd..774fa40a32aed 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3046,7 +3046,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
 			dst_reg->umax_value = umax_ptr;
 			dst_reg->var_off = ptr_reg->var_off;
 			dst_reg->off = ptr_reg->off + smin_val;
-			dst_reg->range = ptr_reg->range;
+			dst_reg->raw = ptr_reg->raw;
 			break;
 		}
 		/* A new variable offset is created.  Note that off_reg->off
@@ -3076,10 +3076,11 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
 		}
 		dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
 		dst_reg->off = ptr_reg->off;
+		dst_reg->raw = ptr_reg->raw;
 		if (reg_is_pkt_pointer(ptr_reg)) {
 			dst_reg->id = ++env->id_gen;
 			/* something was added to pkt_ptr, set range to zero */
-			dst_reg->range = 0;
+			dst_reg->raw = 0;
 		}
 		break;
 	case BPF_SUB:
@@ -3108,7 +3109,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
 			dst_reg->var_off = ptr_reg->var_off;
 			dst_reg->id = ptr_reg->id;
 			dst_reg->off = ptr_reg->off - smin_val;
-			dst_reg->range = ptr_reg->range;
+			dst_reg->raw = ptr_reg->raw;
 			break;
 		}
 		/* A new variable offset is created.  If the subtrahend is known
@@ -3134,11 +3135,12 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
 		}
 		dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
 		dst_reg->off = ptr_reg->off;
+		dst_reg->raw = ptr_reg->raw;
 		if (reg_is_pkt_pointer(ptr_reg)) {
 			dst_reg->id = ++env->id_gen;
 			/* something was added to pkt_ptr, set range to zero */
 			if (smin_val < 0)
-				dst_reg->range = 0;
+				dst_reg->raw = 0;
 		}
 		break;
 	case BPF_AND:
-- 
GitLab


From 4d31f30148cea6e97e42616231eed55295117fe7 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 1 Nov 2018 00:05:53 +0100
Subject: [PATCH 0416/1890] bpf: don't set id on after map lookup with
 ptr_to_map_val return

In the verifier there is no such semantics where registers with
PTR_TO_MAP_VALUE type have an id assigned to them. This is only
used in PTR_TO_MAP_VALUE_OR_NULL and later on nullified once the
test against NULL has been pattern matched and type transformed
into PTR_TO_MAP_VALUE.

Fixes: 3e6a4b3e0289 ("bpf/verifier: introduce BPF_PTR_TO_MAP_VALUE")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Roman Gushchin <guro@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 774fa40a32aed..1971ca325fb4e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2852,10 +2852,6 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
 		regs[BPF_REG_0].type = NOT_INIT;
 	} else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL ||
 		   fn->ret_type == RET_PTR_TO_MAP_VALUE) {
-		if (fn->ret_type == RET_PTR_TO_MAP_VALUE)
-			regs[BPF_REG_0].type = PTR_TO_MAP_VALUE;
-		else
-			regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
 		/* There is no offset yet applied, variable or fixed */
 		mark_reg_known_zero(env, regs, BPF_REG_0);
 		/* remember map_ptr, so that check_map_access()
@@ -2868,7 +2864,12 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
 			return -EINVAL;
 		}
 		regs[BPF_REG_0].map_ptr = meta.map_ptr;
-		regs[BPF_REG_0].id = ++env->id_gen;
+		if (fn->ret_type == RET_PTR_TO_MAP_VALUE) {
+			regs[BPF_REG_0].type = PTR_TO_MAP_VALUE;
+		} else {
+			regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
+			regs[BPF_REG_0].id = ++env->id_gen;
+		}
 	} else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
 		int id = acquire_reference_state(env, insn_idx);
 		if (id < 0)
-- 
GitLab


From 2683f4128c8730699296827d3209d2de80fa1d6c Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 1 Nov 2018 00:05:54 +0100
Subject: [PATCH 0417/1890] bpf: add various test cases to test_verifier

Add some more map related test cases to test_verifier kselftest
to improve test coverage. Summary: 1012 PASSED, 0 SKIPPED, 0 FAILED

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_verifier.c | 250 ++++++++++++++++++++
 1 file changed, 250 insertions(+)

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 36f3d3009d1a0..4c7445d4b3e63 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -6454,6 +6454,256 @@ static struct bpf_test tests[] = {
 		.errstr = "R1 min value is negative",
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
+	{
+		"map access: known scalar += value_ptr",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+			BPF_MOV64_IMM(BPF_REG_1, 4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_array_48b = { 3 },
+		.result = ACCEPT,
+		.retval = 1,
+	},
+	{
+		"map access: value_ptr += known scalar",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+			BPF_MOV64_IMM(BPF_REG_1, 4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_array_48b = { 3 },
+		.result = ACCEPT,
+		.retval = 1,
+	},
+	{
+		"map access: unknown scalar += value_ptr",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_array_48b = { 3 },
+		.result = ACCEPT,
+		.retval = 1,
+	},
+	{
+		"map access: value_ptr += unknown scalar",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_array_48b = { 3 },
+		.result = ACCEPT,
+		.retval = 1,
+	},
+	{
+		"map access: value_ptr += value_ptr",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_0),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_array_48b = { 3 },
+		.result = REJECT,
+		.errstr = "R0 pointer += pointer prohibited",
+	},
+	{
+		"map access: known scalar -= value_ptr",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+			BPF_MOV64_IMM(BPF_REG_1, 4),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_array_48b = { 3 },
+		.result = REJECT,
+		.errstr = "R1 tried to subtract pointer from scalar",
+	},
+	{
+		"map access: value_ptr -= known scalar",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+			BPF_MOV64_IMM(BPF_REG_1, 4),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_array_48b = { 3 },
+		.result = REJECT,
+		.errstr = "R0 min value is outside of the array range",
+	},
+	{
+		"map access: value_ptr -= known scalar, 2",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+			BPF_MOV64_IMM(BPF_REG_1, 6),
+			BPF_MOV64_IMM(BPF_REG_2, 4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_2),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_array_48b = { 3 },
+		.result = ACCEPT,
+		.retval = 1,
+	},
+	{
+		"map access: unknown scalar -= value_ptr",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_array_48b = { 3 },
+		.result = REJECT,
+		.errstr = "R1 tried to subtract pointer from scalar",
+	},
+	{
+		"map access: value_ptr -= unknown scalar",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_array_48b = { 3 },
+		.result = REJECT,
+		.errstr = "R0 min value is negative",
+	},
+	{
+		"map access: value_ptr -= unknown scalar, 2",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf),
+			BPF_ALU64_IMM(BPF_OR, BPF_REG_1, 0x7),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_array_48b = { 3 },
+		.result = ACCEPT,
+		.retval = 1,
+	},
+	{
+		"map access: value_ptr -= value_ptr",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_0),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_array_48b = { 3 },
+		.result = REJECT,
+		.errstr = "R0 invalid mem access 'inv'",
+		.errstr_unpriv = "R0 pointer -= pointer prohibited",
+	},
 	{
 		"map lookup helper access to map",
 		.insns = {
-- 
GitLab


From 832c6f2c29ec519b766923937f4f93fb1008b47d Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 1 Nov 2018 00:05:55 +0100
Subject: [PATCH 0418/1890] bpf: test make sure to run unpriv test cases in
 test_verifier

Right now unprivileged tests are never executed as a BPF test run,
only loaded. Allow for running them as well so that we can check
the outcome and probe for regressions.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_verifier.c | 71 ++++++++++++---------
 1 file changed, 40 insertions(+), 31 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 4c7445d4b3e63..6f61df62f690c 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -76,7 +76,7 @@ struct bpf_test {
 	int fixup_percpu_cgroup_storage[MAX_FIXUPS];
 	const char *errstr;
 	const char *errstr_unpriv;
-	uint32_t retval;
+	uint32_t retval, retval_unpriv;
 	enum {
 		UNDEF,
 		ACCEPT,
@@ -3084,6 +3084,8 @@ static struct bpf_test tests[] = {
 		.fixup_prog1 = { 2 },
 		.result = ACCEPT,
 		.retval = 42,
+		/* Verifier rewrite for unpriv skips tail call here. */
+		.retval_unpriv = 2,
 	},
 	{
 		"stack pointer arithmetic",
@@ -14149,6 +14151,33 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_map_type prog_type,
 	}
 }
 
+static int set_admin(bool admin)
+{
+	cap_t caps;
+	const cap_value_t cap_val = CAP_SYS_ADMIN;
+	int ret = -1;
+
+	caps = cap_get_proc();
+	if (!caps) {
+		perror("cap_get_proc");
+		return -1;
+	}
+	if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_val,
+				admin ? CAP_SET : CAP_CLEAR)) {
+		perror("cap_set_flag");
+		goto out;
+	}
+	if (cap_set_proc(caps)) {
+		perror("cap_set_proc");
+		goto out;
+	}
+	ret = 0;
+out:
+	if (cap_free(caps))
+		perror("cap_free");
+	return ret;
+}
+
 static void do_test_single(struct bpf_test *test, bool unpriv,
 			   int *passes, int *errors)
 {
@@ -14157,6 +14186,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 	struct bpf_insn *prog = test->insns;
 	int map_fds[MAX_NR_MAPS];
 	const char *expected_err;
+	uint32_t expected_val;
 	uint32_t retval;
 	int i, err;
 
@@ -14176,6 +14206,8 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 		       test->result_unpriv : test->result;
 	expected_err = unpriv && test->errstr_unpriv ?
 		       test->errstr_unpriv : test->errstr;
+	expected_val = unpriv && test->retval_unpriv ?
+		       test->retval_unpriv : test->retval;
 
 	reject_from_alignment = fd_prog < 0 &&
 				(test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS) &&
@@ -14209,16 +14241,20 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 		__u8 tmp[TEST_DATA_LEN << 2];
 		__u32 size_tmp = sizeof(tmp);
 
+		if (unpriv)
+			set_admin(true);
 		err = bpf_prog_test_run(fd_prog, 1, test->data,
 					sizeof(test->data), tmp, &size_tmp,
 					&retval, NULL);
+		if (unpriv)
+			set_admin(false);
 		if (err && errno != 524/*ENOTSUPP*/ && errno != EPERM) {
 			printf("Unexpected bpf_prog_test_run error\n");
 			goto fail_log;
 		}
-		if (!err && retval != test->retval &&
-		    test->retval != POINTER_VALUE) {
-			printf("FAIL retval %d != %d\n", retval, test->retval);
+		if (!err && retval != expected_val &&
+		    expected_val != POINTER_VALUE) {
+			printf("FAIL retval %d != %d\n", retval, expected_val);
 			goto fail_log;
 		}
 	}
@@ -14261,33 +14297,6 @@ static bool is_admin(void)
 	return (sysadmin == CAP_SET);
 }
 
-static int set_admin(bool admin)
-{
-	cap_t caps;
-	const cap_value_t cap_val = CAP_SYS_ADMIN;
-	int ret = -1;
-
-	caps = cap_get_proc();
-	if (!caps) {
-		perror("cap_get_proc");
-		return -1;
-	}
-	if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_val,
-				admin ? CAP_SET : CAP_CLEAR)) {
-		perror("cap_set_flag");
-		goto out;
-	}
-	if (cap_set_proc(caps)) {
-		perror("cap_set_proc");
-		goto out;
-	}
-	ret = 0;
-out:
-	if (cap_free(caps))
-		perror("cap_free");
-	return ret;
-}
-
 static void get_unpriv_disabled()
 {
 	char buf[2];
-- 
GitLab


From 37a3e9693d444701e4b8f8b98ee0480450bb5982 Mon Sep 17 00:00:00 2001
From: Serge Semin <fancer.lancer@gmail.com>
Date: Wed, 11 Jul 2018 23:40:51 +0300
Subject: [PATCH 0419/1890] ntb: idt: Set PCIe bus address to BARLIMITx

IDT NTB driver sets the upper limit of actual translation address
being written to the corresponding memory window setup. It is achieved
by BARLIMITx register initialization. Needless to say, that the register
works within PCIe bus address space.

In general CPU and PCIe address spaces are different. It means,
that addresses used for Memory TLPs routine can be different from
CPU addresses. While in most of cases they are the same, there are
exceptions when the proper mapping must be performed to have the
portable driver code. There used to be a virt_to_bus()/bus_to_virt()
interface for this purpose. But it's deprecated now. It was also a
mistake to use pci_resource_start() since the return address of the
method is at the CPU address space. In order to achieve the desired
purpose we need to use pci_bus_address() helper. This method shall
return a PCIe bus base address of the corresponding BAR resource.

Signed-off-by: Serge Semin <fancer.lancer@gmail.com>
Acked-by: Allen Hubbe <allenbh@gmail.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 drivers/ntb/hw/idt/ntb_hw_idt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/ntb/hw/idt/ntb_hw_idt.c b/drivers/ntb/hw/idt/ntb_hw_idt.c
index a67ef23e81bca..54485a8d2d7f8 100644
--- a/drivers/ntb/hw/idt/ntb_hw_idt.c
+++ b/drivers/ntb/hw/idt/ntb_hw_idt.c
@@ -1320,7 +1320,7 @@ static int idt_ntb_peer_mw_set_trans(struct ntb_dev *ntb, int pidx, int widx,
 		idt_nt_write(ndev, bar->ltbase, (u32)addr);
 		idt_nt_write(ndev, bar->utbase, (u32)(addr >> 32));
 		/* Set the custom BAR aperture limit */
-		limit = pci_resource_start(ntb->pdev, mw_cfg->bar) + size;
+		limit = pci_bus_address(ntb->pdev, mw_cfg->bar) + size;
 		idt_nt_write(ndev, bar->limit, (u32)limit);
 		if (IS_FLD_SET(BARSETUP_TYPE, data, 64))
 			idt_nt_write(ndev, (bar + 1)->limit, (limit >> 32));
-- 
GitLab


From 846429bc998f4c42d9961c139ce82d0f76a51d35 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Fri, 5 Oct 2018 09:12:34 +0200
Subject: [PATCH 0420/1890] ntb: ntb_transport: Mark expected switch
 fall-throughs

In preparation to enabling -Wimplicit-fallthrough, mark switch cases
where we are expecting to fall through.

Addresses-Coverity-ID: 1373888 ("Missing break in switch")
Addresses-Coverity-ID: 1373889 ("Missing break in switch")
Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Acked-by: Allen Hubbe <allenbh@gmail.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 drivers/ntb/ntb_transport.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index 9398959664769..c643b9cf750bd 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c
@@ -1278,6 +1278,7 @@ static void ntb_rx_copy_callback(void *data,
 		case DMA_TRANS_READ_FAILED:
 		case DMA_TRANS_WRITE_FAILED:
 			entry->errors++;
+			/* fall through */
 		case DMA_TRANS_ABORTED:
 		{
 			struct ntb_transport_qp *qp = entry->qp;
@@ -1533,6 +1534,7 @@ static void ntb_tx_copy_callback(void *data,
 		case DMA_TRANS_READ_FAILED:
 		case DMA_TRANS_WRITE_FAILED:
 			entry->errors++;
+			/* fall through */
 		case DMA_TRANS_ABORTED:
 		{
 			void __iomem *offset =
-- 
GitLab


From fc5d1829f9bf3d8275322727c0e9a8baf268b7c6 Mon Sep 17 00:00:00 2001
From: Aaron Sierra <asierra@xes-inc.com>
Date: Fri, 12 Oct 2018 15:35:03 -0500
Subject: [PATCH 0421/1890] NTB: transport: Try harder to alloc an aligned MW
 buffer

Be a little wasteful if the (likely CMA) message window buffer is not
suitably aligned after our first attempt; allocate a buffer twice as big
as we need and manually align our MW buffer within it.

This was needed on Intel Broadwell DE platforms with intel_iommu=off

Signed-off-by: Aaron Sierra <asierra@xes-inc.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 drivers/ntb/ntb_transport.c | 86 +++++++++++++++++++++++++++----------
 1 file changed, 63 insertions(+), 23 deletions(-)

diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index c643b9cf750bd..3bfdb45624088 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c
@@ -194,6 +194,8 @@ struct ntb_transport_mw {
 	void __iomem *vbase;
 	size_t xlat_size;
 	size_t buff_size;
+	size_t alloc_size;
+	void *alloc_addr;
 	void *virt_addr;
 	dma_addr_t dma_addr;
 };
@@ -672,13 +674,59 @@ static void ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw)
 		return;
 
 	ntb_mw_clear_trans(nt->ndev, PIDX, num_mw);
-	dma_free_coherent(&pdev->dev, mw->buff_size,
-			  mw->virt_addr, mw->dma_addr);
+	dma_free_coherent(&pdev->dev, mw->alloc_size,
+			  mw->alloc_addr, mw->dma_addr);
 	mw->xlat_size = 0;
 	mw->buff_size = 0;
+	mw->alloc_size = 0;
+	mw->alloc_addr = NULL;
 	mw->virt_addr = NULL;
 }
 
+static int ntb_alloc_mw_buffer(struct ntb_transport_mw *mw,
+			       struct device *dma_dev, size_t align)
+{
+	dma_addr_t dma_addr;
+	void *alloc_addr, *virt_addr;
+	int rc;
+
+	alloc_addr = dma_alloc_coherent(dma_dev, mw->alloc_size,
+					&dma_addr, GFP_KERNEL);
+	if (!alloc_addr) {
+		dev_err(dma_dev, "Unable to alloc MW buff of size %zu\n",
+			mw->alloc_size);
+		return -ENOMEM;
+	}
+	virt_addr = alloc_addr;
+
+	/*
+	 * we must ensure that the memory address allocated is BAR size
+	 * aligned in order for the XLAT register to take the value. This
+	 * is a requirement of the hardware. It is recommended to setup CMA
+	 * for BAR sizes equal or greater than 4MB.
+	 */
+	if (!IS_ALIGNED(dma_addr, align)) {
+		if (mw->alloc_size > mw->buff_size) {
+			virt_addr = PTR_ALIGN(alloc_addr, align);
+			dma_addr = ALIGN(dma_addr, align);
+		} else {
+			rc = -ENOMEM;
+			goto err;
+		}
+	}
+
+	mw->alloc_addr = alloc_addr;
+	mw->virt_addr = virt_addr;
+	mw->dma_addr = dma_addr;
+
+	return 0;
+
+err:
+	dma_free_coherent(dma_dev, mw->alloc_size, alloc_addr, dma_addr);
+
+	return rc;
+}
+
 static int ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw,
 		      resource_size_t size)
 {
@@ -710,28 +758,20 @@ static int ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw,
 	/* Alloc memory for receiving data.  Must be aligned */
 	mw->xlat_size = xlat_size;
 	mw->buff_size = buff_size;
+	mw->alloc_size = buff_size;
 
-	mw->virt_addr = dma_alloc_coherent(&pdev->dev, buff_size,
-					   &mw->dma_addr, GFP_KERNEL);
-	if (!mw->virt_addr) {
-		mw->xlat_size = 0;
-		mw->buff_size = 0;
-		dev_err(&pdev->dev, "Unable to alloc MW buff of size %zu\n",
-			buff_size);
-		return -ENOMEM;
-	}
-
-	/*
-	 * we must ensure that the memory address allocated is BAR size
-	 * aligned in order for the XLAT register to take the value. This
-	 * is a requirement of the hardware. It is recommended to setup CMA
-	 * for BAR sizes equal or greater than 4MB.
-	 */
-	if (!IS_ALIGNED(mw->dma_addr, xlat_align)) {
-		dev_err(&pdev->dev, "DMA memory %pad is not aligned\n",
-			&mw->dma_addr);
-		ntb_free_mw(nt, num_mw);
-		return -ENOMEM;
+	rc = ntb_alloc_mw_buffer(mw, &pdev->dev, xlat_align);
+	if (rc) {
+		mw->alloc_size *= 2;
+		rc = ntb_alloc_mw_buffer(mw, &pdev->dev, xlat_align);
+		if (rc) {
+			dev_err(&pdev->dev,
+				"Unable to alloc aligned MW buff\n");
+			mw->xlat_size = 0;
+			mw->buff_size = 0;
+			mw->alloc_size = 0;
+			return rc;
+		}
 	}
 
 	/* Notify HW the memory location of the receive buffer */
-- 
GitLab


From 906e86b22dba61cb1baaed9be7fdcbf0afd74d7b Mon Sep 17 00:00:00 2001
From: Aaron Sierra <asierra@xes-inc.com>
Date: Mon, 15 Oct 2018 15:32:47 -0500
Subject: [PATCH 0422/1890] ntb_netdev: Simplify remove with client device
 drvdata

Replace the elaborate private structure global linked-list used in
ntb_netdev_probe() and ntb_netdev_remove() by stashing our private
data in the NTB transport client device.

Signed-off-by: Aaron Sierra <asierra@xes-inc.com>
Reviewed-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 drivers/net/ntb_netdev.c | 28 +++-------------------------
 1 file changed, 3 insertions(+), 25 deletions(-)

diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c
index df8d49ad48c38..a5bab614ff845 100644
--- a/drivers/net/ntb_netdev.c
+++ b/drivers/net/ntb_netdev.c
@@ -71,7 +71,6 @@ static unsigned int tx_start = 10;
 static unsigned int tx_stop = 5;
 
 struct ntb_netdev {
-	struct list_head list;
 	struct pci_dev *pdev;
 	struct net_device *ndev;
 	struct ntb_transport_qp *qp;
@@ -81,8 +80,6 @@ struct ntb_netdev {
 #define	NTB_TX_TIMEOUT_MS	1000
 #define	NTB_RXQ_SIZE		100
 
-static LIST_HEAD(dev_list);
-
 static void ntb_netdev_event_handler(void *data, int link_is_up)
 {
 	struct net_device *ndev = data;
@@ -452,7 +449,7 @@ static int ntb_netdev_probe(struct device *client_dev)
 	if (rc)
 		goto err1;
 
-	list_add(&dev->list, &dev_list);
+	dev_set_drvdata(client_dev, ndev);
 	dev_info(&pdev->dev, "%s created\n", ndev->name);
 	return 0;
 
@@ -465,27 +462,8 @@ static int ntb_netdev_probe(struct device *client_dev)
 
 static void ntb_netdev_remove(struct device *client_dev)
 {
-	struct ntb_dev *ntb;
-	struct net_device *ndev;
-	struct pci_dev *pdev;
-	struct ntb_netdev *dev;
-	bool found = false;
-
-	ntb = dev_ntb(client_dev->parent);
-	pdev = ntb->pdev;
-
-	list_for_each_entry(dev, &dev_list, list) {
-		if (dev->pdev == pdev) {
-			found = true;
-			break;
-		}
-	}
-	if (!found)
-		return;
-
-	list_del(&dev->list);
-
-	ndev = dev->ndev;
+	struct net_device *ndev = dev_get_drvdata(client_dev);
+	struct ntb_netdev *dev = netdev_priv(ndev);
 
 	unregister_netdev(ndev);
 	ntb_transport_free_queue(dev->qp);
-- 
GitLab


From 1f2b5b8e2df4591fbca430aff9c5a072dcc0f408 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 31 Oct 2018 18:30:21 -0700
Subject: [PATCH 0423/1890] sparc64: Wire up compat getpeername and
 getsockname.

Fixes: 8b30ca73b7cc ("sparc: Add all necessary direct socket system calls.")
Reported-by: Joseph Myers  <joseph@codesourcery.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/systbls_64.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index bb68c805b8918..ff9389a1c9f3f 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -47,9 +47,9 @@ sys_call_table32:
 	.word sys_recvfrom, sys_setreuid16, sys_setregid16, sys_rename, compat_sys_truncate
 /*130*/	.word compat_sys_ftruncate, sys_flock, compat_sys_lstat64, sys_sendto, sys_shutdown
 	.word sys_socketpair, sys_mkdir, sys_rmdir, compat_sys_utimes, compat_sys_stat64
-/*140*/	.word sys_sendfile64, sys_nis_syscall, compat_sys_futex, sys_gettid, compat_sys_getrlimit
+/*140*/	.word sys_sendfile64, sys_getpeername, compat_sys_futex, sys_gettid, compat_sys_getrlimit
 	.word compat_sys_setrlimit, sys_pivot_root, sys_prctl, sys_pciconfig_read, sys_pciconfig_write
-/*150*/	.word sys_nis_syscall, sys_inotify_init, sys_inotify_add_watch, sys_poll, sys_getdents64
+/*150*/	.word sys_getsockname, sys_inotify_init, sys_inotify_add_watch, sys_poll, sys_getdents64
 	.word compat_sys_fcntl64, sys_inotify_rm_watch, compat_sys_statfs, compat_sys_fstatfs, sys_oldumount
 /*160*/	.word compat_sys_sched_setaffinity, compat_sys_sched_getaffinity, sys_getdomainname, sys_setdomainname, sys_nis_syscall
 	.word sys_quotactl, sys_set_tid_address, compat_sys_mount, compat_sys_ustat, sys_setxattr
-- 
GitLab


From 30549aab146ccb1275230c3b4b4bc6b4181fd54e Mon Sep 17 00:00:00 2001
From: Niklas Cassel <niklas.cassel@linaro.org>
Date: Wed, 31 Oct 2018 16:08:10 +0100
Subject: [PATCH 0424/1890] net: stmmac: Fix stmmac_mdio_reset() when building
 stmmac as modules

When building stmmac, it is only possible to select CONFIG_DWMAC_GENERIC,
or any of the glue drivers, when CONFIG_STMMAC_PLATFORM is set.
The only exception is CONFIG_STMMAC_PCI.

When calling of_mdiobus_register(), it will call our ->reset()
callback, which is set to stmmac_mdio_reset().

Most of the code in stmmac_mdio_reset() is protected by a
"#if defined(CONFIG_STMMAC_PLATFORM)", which will evaluate
to false when CONFIG_STMMAC_PLATFORM=m.

Because of this, the phy reset gpio will only be pulled when
stmmac is built as built-in, but not when built as modules.

Fix this by using "#if IS_ENABLED()" instead of "#if defined()".

Signed-off-by: Niklas Cassel <niklas.cassel@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
index b72ef171477e0..bdd351597b552 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
@@ -243,7 +243,7 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg,
  */
 int stmmac_mdio_reset(struct mii_bus *bus)
 {
-#if defined(CONFIG_STMMAC_PLATFORM)
+#if IS_ENABLED(CONFIG_STMMAC_PLATFORM)
 	struct net_device *ndev = bus->priv;
 	struct stmmac_priv *priv = netdev_priv(ndev);
 	unsigned int mii_address = priv->hw->mii.addr;
-- 
GitLab


From 46ebe2834ba5b541f28ee72e556a3fed42c47570 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Caama=C3=B1o=20Ruiz?= <jcaamano@suse.com>
Date: Wed, 31 Oct 2018 18:52:03 +0100
Subject: [PATCH 0425/1890] openvswitch: Fix push/pop ethernet validation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When there are both pop and push ethernet header actions among the
actions to be applied to a packet, an unexpected EINVAL (Invalid
argument) error is obtained. This is due to mac_proto not being reset
correctly when those actions are validated.

Reported-at:
https://mail.openvswitch.org/pipermail/ovs-discuss/2018-October/047554.html
Fixes: 91820da6ae85 ("openvswitch: add Ethernet push and pop actions")
Signed-off-by: Jaime CaamaÃ±o Ruiz <jcaamano@suse.com>
Tested-by: Greg Rose <gvrose8192@gmail.com>
Reviewed-by: Greg Rose <gvrose8192@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/flow_netlink.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index a70097ecf33c2..865ecef681969 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -3030,7 +3030,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
 			 * is already present */
 			if (mac_proto != MAC_PROTO_NONE)
 				return -EINVAL;
-			mac_proto = MAC_PROTO_NONE;
+			mac_proto = MAC_PROTO_ETHERNET;
 			break;
 
 		case OVS_ACTION_ATTR_POP_ETH:
@@ -3038,7 +3038,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
 				return -EINVAL;
 			if (vlan_tci & htons(VLAN_TAG_PRESENT))
 				return -EINVAL;
-			mac_proto = MAC_PROTO_ETHERNET;
+			mac_proto = MAC_PROTO_NONE;
 			break;
 
 		case OVS_ACTION_ATTR_PUSH_NSH:
-- 
GitLab


From c4c14c3bd177ea769fee938674f73a8ec0cdd47a Mon Sep 17 00:00:00 2001
From: Guo Ren <ren_guo@c-sky.com>
Date: Tue, 30 Oct 2018 18:42:32 +0800
Subject: [PATCH 0426/1890] csky: remove builtin-dtb Kbuild

Remove the builtin-dtb implementation in arch/csky.

Signed-off-by: Guo Ren <ren_guo@c-sky.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/csky/Kconfig.debug     | 10 +---------
 arch/csky/Makefile          |  2 --
 arch/csky/boot/dts/Makefile |  7 -------
 3 files changed, 1 insertion(+), 18 deletions(-)

diff --git a/arch/csky/Kconfig.debug b/arch/csky/Kconfig.debug
index 48cf6ff9df4a3..22a162cd99e81 100644
--- a/arch/csky/Kconfig.debug
+++ b/arch/csky/Kconfig.debug
@@ -1,9 +1 @@
-menu "C-SKY Debug Options"
-config CSKY_BUILTIN_DTB
-	string "Use kernel builtin dtb"
-	help
-	  User could define the dtb instead of the one which is passed from
-	  bootloader.
-	  Sometimes for debug, we want to use a built-in dtb and then we needn't
-	  modify bootloader at all.
-endmenu
+# dummy file, do not delete
diff --git a/arch/csky/Makefile b/arch/csky/Makefile
index 67a4ae1fba2ba..91f72224f4b99 100644
--- a/arch/csky/Makefile
+++ b/arch/csky/Makefile
@@ -65,9 +65,7 @@ libs-y += arch/csky/lib/ \
 	$(shell $(CC) $(KBUILD_CFLAGS) $(KCFLAGS) -print-libgcc-file-name)
 
 boot := arch/csky/boot
-ifneq '$(CONFIG_CSKY_BUILTIN_DTB)' '""'
 core-y += $(boot)/dts/
-endif
 
 all: zImage
 
diff --git a/arch/csky/boot/dts/Makefile b/arch/csky/boot/dts/Makefile
index 305e81a5e91e1..19a7f028ee20a 100644
--- a/arch/csky/boot/dts/Makefile
+++ b/arch/csky/boot/dts/Makefile
@@ -1,13 +1,6 @@
 dtstree	:= $(srctree)/$(src)
 
-ifneq '$(CONFIG_CSKY_BUILTIN_DTB)' '""'
-builtindtb-y := $(patsubst "%",%,$(CONFIG_CSKY_BUILTIN_DTB))
-dtb-y += $(builtindtb-y).dtb
-obj-y += $(builtindtb-y).dtb.o
-.SECONDARY: $(obj)/$(builtindtb-y).dtb.S
-else
 dtb-y := $(patsubst $(dtstree)/%.dts,%.dtb, $(wildcard $(dtstree)/*.dts))
-endif
 
 always += $(dtb-y)
 clean-files += *.dtb *.dtb.S
-- 
GitLab


From e8d0c9a726cd1eae410148fcc6a1db23019f0334 Mon Sep 17 00:00:00 2001
From: Guo Ren <ren_guo@c-sky.com>
Date: Tue, 30 Oct 2018 18:46:47 +0800
Subject: [PATCH 0427/1890] csky: use common dtb build rules

Remove the Kbuild rules in arch/csky and use common dtb build rules.

This modification is based on:

commit 37c8a5fafa3b ("kbuild: consolidate Devicetree dtb build rules")

Signed-off-by: Guo Ren <ren_guo@c-sky.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/csky/Makefile          | 11 +----------
 arch/csky/boot/dts/Makefile |  3 ---
 2 files changed, 1 insertion(+), 13 deletions(-)

diff --git a/arch/csky/Makefile b/arch/csky/Makefile
index 91f72224f4b99..c639fc167895d 100644
--- a/arch/csky/Makefile
+++ b/arch/csky/Makefile
@@ -69,20 +69,11 @@ core-y += $(boot)/dts/
 
 all: zImage
 
-
-dtbs: scripts
-	$(Q)$(MAKE) $(build)=$(boot)/dts
-
-%.dtb %.dtb.S %.dtb.o: scripts
-	$(Q)$(MAKE) $(build)=$(boot)/dts $(boot)/dts/$@
-
-zImage Image uImage: vmlinux dtbs
+zImage Image uImage: vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
 
 archclean:
 	$(Q)$(MAKE) $(clean)=$(boot)
-	$(Q)$(MAKE) $(clean)=$(boot)/dts
-	rm -rf arch/csky/include/generated
 
 define archhelp
   echo  '* zImage       - Compressed kernel image (arch/$(ARCH)/boot/zImage)'
diff --git a/arch/csky/boot/dts/Makefile b/arch/csky/boot/dts/Makefile
index 19a7f028ee20a..c57ad3c880bfb 100644
--- a/arch/csky/boot/dts/Makefile
+++ b/arch/csky/boot/dts/Makefile
@@ -1,6 +1,3 @@
 dtstree	:= $(srctree)/$(src)
 
 dtb-y := $(patsubst $(dtstree)/%.dts,%.dtb, $(wildcard $(dtstree)/*.dts))
-
-always += $(dtb-y)
-clean-files += *.dtb *.dtb.S
-- 
GitLab


From bcb6fb5da77c2a228adf07cc9cb1a0c2aa2001c6 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Wed, 31 Oct 2018 21:57:30 -0500
Subject: [PATCH 0428/1890] objtool: Support GCC 9 cold subfunction naming
 scheme

Starting with GCC 8, a lot of unlikely code was moved out of line to
"cold" subfunctions in .text.unlikely.

For example, the unlikely bits of:

  irq_do_set_affinity()

are moved out to the following subfunction:

  irq_do_set_affinity.cold.49()

Starting with GCC 9, the numbered suffix has been removed.  So in the
above example, the cold subfunction is instead:

  irq_do_set_affinity.cold()

Tweak the objtool subfunction detection logic so that it detects both
GCC 8 and GCC 9 naming schemes.

Reported-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/015e9544b1f188d36a7f02fa31e9e95629aa5f50.1541040800.git.jpoimboe@redhat.com
---
 tools/objtool/elf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 7ec85d567598c..f8cef271dfc79 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -301,7 +301,7 @@ static int read_symbols(struct elf *elf)
 			if (sym->type != STT_FUNC)
 				continue;
 			sym->pfunc = sym->cfunc = sym;
-			coldstr = strstr(sym->name, ".cold.");
+			coldstr = strstr(sym->name, ".cold");
 			if (!coldstr)
 				continue;
 
-- 
GitLab


From 57f01796f14fecf00d330fe39c8d2477ced9cd79 Mon Sep 17 00:00:00 2001
From: Michael Kelley <mikelley@microsoft.com>
Date: Thu, 1 Nov 2018 00:35:05 +0000
Subject: [PATCH 0429/1890] irq/matrix: Fix memory overallocation

IRQ_MATRIX_SIZE is the number of longs needed for a bitmap, multiplied by
the size of a long, yielding a byte count. But it is used to size an array
of longs, which is way more memory than is needed.

Change IRQ_MATRIX_SIZE so it is just the number of longs needed and the
arrays come out the correct size.

Fixes: 2f75d9e1c905 ("genirq: Implement bitmap matrix allocator")
Signed-off-by: Michael Kelley <mikelley@microsoft.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: KY Srinivasan <kys@microsoft.com>
Link: https://lkml.kernel.org/r/1541032428-10392-1-git-send-email-mikelley@microsoft.com
---
 kernel/irq/matrix.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/irq/matrix.c b/kernel/irq/matrix.c
index 6e6d467f3dec5..1f0985adf1934 100644
--- a/kernel/irq/matrix.c
+++ b/kernel/irq/matrix.c
@@ -8,7 +8,7 @@
 #include <linux/cpu.h>
 #include <linux/irq.h>
 
-#define IRQ_MATRIX_SIZE	(BITS_TO_LONGS(IRQ_MATRIX_BITS) * sizeof(unsigned long))
+#define IRQ_MATRIX_SIZE	(BITS_TO_LONGS(IRQ_MATRIX_BITS))
 
 struct cpumap {
 	unsigned int		available;
-- 
GitLab


From 3424243e39e8ec138486926949e3668e7553125d Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 13 Oct 2018 13:22:46 +0300
Subject: [PATCH 0430/1890] irqchip/irq-mvebu-sei: Fix a NULL vs IS_ERR() bug
 in probe function

The devm_ioremap_resource() function never returns NULL, it returns
error pointers.

Fixes: 61ce8d8d8a81 ("irqchip/irq-mvebu-sei: Add new driver for Marvell SEI")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Miquel Raynal <miquel.raynal@bootlin.com>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Andrew Lunn <andrew@lunn.ch>
Cc: Gregory Clement <gregory.clement@bootlin.com>
Cc: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: kernel-janitors@vger.kernel.org
Link: https://lkml.kernel.org/r/20181013102246.GD16086@mwanda
---
 drivers/irqchip/irq-mvebu-sei.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/irqchip/irq-mvebu-sei.c b/drivers/irqchip/irq-mvebu-sei.c
index 566d69a2edbc7..add4c9c934c8a 100644
--- a/drivers/irqchip/irq-mvebu-sei.c
+++ b/drivers/irqchip/irq-mvebu-sei.c
@@ -384,9 +384,9 @@ static int mvebu_sei_probe(struct platform_device *pdev)
 
 	sei->res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	sei->base = devm_ioremap_resource(sei->dev, sei->res);
-	if (!sei->base) {
+	if (IS_ERR(sei->base)) {
 		dev_err(sei->dev, "Failed to remap SEI resource\n");
-		return -ENODEV;
+		return PTR_ERR(sei->base);
 	}
 
 	/* Retrieve the SEI capabilities with the interrupt ranges */
-- 
GitLab


From a846446b1914d1e3d996d657754f43fde89bab51 Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <dima@arista.com>
Date: Fri, 12 Oct 2018 14:42:52 +0100
Subject: [PATCH 0431/1890] x86/compat: Adjust in_compat_syscall() to generic
 code under !COMPAT

The result of in_compat_syscall() can be pictured as:

x86 platform:
    ---------------------------------------------------
    |  Arch\syscall  |  64-bit  |   ia32   |   x32    |
    |-------------------------------------------------|
    |     x86_64     |  false   |   true   |   true   |
    |-------------------------------------------------|
    |      i686      |          |  <true>  |          |
    ---------------------------------------------------

Other platforms:
    -------------------------------------------
    |  Arch\syscall  |  64-bit  |   compat    |
    |-----------------------------------------|
    |     64-bit     |  false   |    true     |
    |-----------------------------------------|
    |    32-bit(?)   |          |   <false>   |
    -------------------------------------------

As seen, the result of in_compat_syscall() on generic 32-bit platform
differs from i686.

There is no reason for in_compat_syscall() == true on native i686.  It also
easy to misread code if the result on native 32-bit platform differs
between arches.

Because of that non arch-specific code has many places with:
    if (IS_ENABLED(CONFIG_COMPAT) && in_compat_syscall())
in different variations.

It looks-like the only non-x86 code which uses in_compat_syscall() not
under CONFIG_COMPAT guard is in amd/amdkfd. But according to the commit
a18069c132cb ("amdkfd: Disable support for 32-bit user processes"), it
actually should be disabled on native i686.

Rename in_compat_syscall() to in_32bit_syscall() for x86-specific code
and make in_compat_syscall() false under !CONFIG_COMPAT.

A follow on patch will clean up generic users which were forced to check
IS_ENABLED(CONFIG_COMPAT) with in_compat_syscall().

Signed-off-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Cc: Dmitry Safonov <0x7f454c46@gmail.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Cc: Stephen Boyd <sboyd@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: linux-efi@vger.kernel.org
Cc: netdev@vger.kernel.org
Link: https://lkml.kernel.org/r/20181012134253.23266-2-dima@arista.com
---
 arch/x86/include/asm/compat.h |  9 ++++++++-
 arch/x86/include/asm/ftrace.h |  4 +---
 arch/x86/kernel/process_64.c  |  4 ++--
 arch/x86/kernel/sys_x86_64.c  | 11 ++++++-----
 arch/x86/mm/hugetlbpage.c     |  4 ++--
 arch/x86/mm/mmap.c            |  2 +-
 include/linux/compat.h        |  4 ++--
 7 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index fab4df16a3c43..22c4dfe659923 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -217,11 +217,18 @@ static inline bool in_x32_syscall(void)
 	return false;
 }
 
-static inline bool in_compat_syscall(void)
+static inline bool in_32bit_syscall(void)
 {
 	return in_ia32_syscall() || in_x32_syscall();
 }
+
+#ifdef CONFIG_COMPAT
+static inline bool in_compat_syscall(void)
+{
+	return in_32bit_syscall();
+}
 #define in_compat_syscall in_compat_syscall	/* override the generic impl */
+#endif
 
 struct compat_siginfo;
 int __copy_siginfo_to_user32(struct compat_siginfo __user *to,
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index c18ed65287d5e..cf350639e76d1 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -76,9 +76,7 @@ static inline bool arch_syscall_match_sym_name(const char *sym, const char *name
 #define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS 1
 static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs)
 {
-	if (in_compat_syscall())
-		return true;
-	return false;
+	return in_32bit_syscall();
 }
 #endif /* CONFIG_FTRACE_SYSCALLS && CONFIG_IA32_EMULATION */
 #endif /* !COMPILE_OFFSETS */
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 31b4755369f08..0e0b4288a4b2b 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -701,10 +701,10 @@ static void __set_personality_x32(void)
 		current->mm->context.ia32_compat = TIF_X32;
 	current->personality &= ~READ_IMPLIES_EXEC;
 	/*
-	 * in_compat_syscall() uses the presence of the x32 syscall bit
+	 * in_32bit_syscall() uses the presence of the x32 syscall bit
 	 * flag to determine compat status.  The x86 mmap() code relies on
 	 * the syscall bitness so set x32 syscall bit right here to make
-	 * in_compat_syscall() work during exec().
+	 * in_32bit_syscall() work during exec().
 	 *
 	 * Pretend to come from a x32 execve.
 	 */
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 6a78d4b36a797..f7476ce23b6e0 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -105,7 +105,7 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
 static void find_start_end(unsigned long addr, unsigned long flags,
 		unsigned long *begin, unsigned long *end)
 {
-	if (!in_compat_syscall() && (flags & MAP_32BIT)) {
+	if (!in_32bit_syscall() && (flags & MAP_32BIT)) {
 		/* This is usually used needed to map code in small
 		   model, so it needs to be in the first 31bit. Limit
 		   it to that.  This means we need to move the
@@ -122,7 +122,7 @@ static void find_start_end(unsigned long addr, unsigned long flags,
 	}
 
 	*begin	= get_mmap_base(1);
-	if (in_compat_syscall())
+	if (in_32bit_syscall())
 		*end = task_size_32bit();
 	else
 		*end = task_size_64bit(addr > DEFAULT_MAP_WINDOW);
@@ -193,7 +193,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 		return addr;
 
 	/* for MAP_32BIT mappings we force the legacy mmap base */
-	if (!in_compat_syscall() && (flags & MAP_32BIT))
+	if (!in_32bit_syscall() && (flags & MAP_32BIT))
 		goto bottomup;
 
 	/* requesting a specific address */
@@ -217,9 +217,10 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 	 * If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area
 	 * in the full address space.
 	 *
-	 * !in_compat_syscall() check to avoid high addresses for x32.
+	 * !in_32bit_syscall() check to avoid high addresses for x32
+	 * (and make it no op on native i386).
 	 */
-	if (addr > DEFAULT_MAP_WINDOW && !in_compat_syscall())
+	if (addr > DEFAULT_MAP_WINDOW && !in_32bit_syscall())
 		info.high_limit += TASK_SIZE_MAX - DEFAULT_MAP_WINDOW;
 
 	info.align_mask = 0;
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 00b296617ca43..92e4c4b85bbaa 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -92,7 +92,7 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
 	 * If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area
 	 * in the full address space.
 	 */
-	info.high_limit = in_compat_syscall() ?
+	info.high_limit = in_32bit_syscall() ?
 		task_size_32bit() : task_size_64bit(addr > DEFAULT_MAP_WINDOW);
 
 	info.align_mask = PAGE_MASK & ~huge_page_mask(h);
@@ -116,7 +116,7 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
 	 * If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area
 	 * in the full address space.
 	 */
-	if (addr > DEFAULT_MAP_WINDOW && !in_compat_syscall())
+	if (addr > DEFAULT_MAP_WINDOW && !in_32bit_syscall())
 		info.high_limit += TASK_SIZE_MAX - DEFAULT_MAP_WINDOW;
 
 	info.align_mask = PAGE_MASK & ~huge_page_mask(h);
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 1e95d57760cf7..db31657145214 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -166,7 +166,7 @@ unsigned long get_mmap_base(int is_legacy)
 	struct mm_struct *mm = current->mm;
 
 #ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
-	if (in_compat_syscall()) {
+	if (in_32bit_syscall()) {
 		return is_legacy ? mm->mmap_compat_legacy_base
 				 : mm->mmap_compat_base;
 	}
diff --git a/include/linux/compat.h b/include/linux/compat.h
index d30e4dbd4be24..f1ef24c68fcc6 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -1029,9 +1029,9 @@ int kcompat_sys_fstatfs64(unsigned int fd, compat_size_t sz,
 #else /* !CONFIG_COMPAT */
 
 #define is_compat_task() (0)
-#ifndef in_compat_syscall
+/* Ensure no one redefines in_compat_syscall() under !CONFIG_COMPAT */
+#define in_compat_syscall in_compat_syscall
 static inline bool in_compat_syscall(void) { return false; }
-#endif
 
 #endif /* CONFIG_COMPAT */
 
-- 
GitLab


From 98f76206b33504b934209d16196477dfa519a807 Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <dima@arista.com>
Date: Fri, 12 Oct 2018 14:42:53 +0100
Subject: [PATCH 0432/1890] compat: Cleanup in_compat_syscall() callers

Now that in_compat_syscall() is consistent on all architectures and does
not longer report true on native i686, the workarounds (ifdeffery and
helpers) can be removed.

Signed-off-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Dmitry Safonov <0x7f454c46@gmail.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Andy Lutomirsky <luto@kernel.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Cc: Stephen Boyd <sboyd@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: linux-efi@vger.kernel.org
Cc: netdev@vger.kernel.org
Link: https://lkml.kernel.org/r/20181012134253.23266-3-dima@arista.com
---
 drivers/firmware/efi/efivars.c | 16 ++++------------
 kernel/time/time.c             |  2 +-
 net/xfrm/xfrm_state.c          |  2 --
 net/xfrm/xfrm_user.c           |  2 --
 4 files changed, 5 insertions(+), 17 deletions(-)

diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c
index 3e626fd9bd4e1..8061667a6765a 100644
--- a/drivers/firmware/efi/efivars.c
+++ b/drivers/firmware/efi/efivars.c
@@ -229,14 +229,6 @@ sanity_check(struct efi_variable *var, efi_char16_t *name, efi_guid_t vendor,
 	return 0;
 }
 
-static inline bool is_compat(void)
-{
-	if (IS_ENABLED(CONFIG_COMPAT) && in_compat_syscall())
-		return true;
-
-	return false;
-}
-
 static void
 copy_out_compat(struct efi_variable *dst, struct compat_efi_variable *src)
 {
@@ -263,7 +255,7 @@ efivar_store_raw(struct efivar_entry *entry, const char *buf, size_t count)
 	u8 *data;
 	int err;
 
-	if (is_compat()) {
+	if (in_compat_syscall()) {
 		struct compat_efi_variable *compat;
 
 		if (count != sizeof(*compat))
@@ -324,7 +316,7 @@ efivar_show_raw(struct efivar_entry *entry, char *buf)
 			     &entry->var.DataSize, entry->var.Data))
 		return -EIO;
 
-	if (is_compat()) {
+	if (in_compat_syscall()) {
 		compat = (struct compat_efi_variable *)buf;
 
 		size = sizeof(*compat);
@@ -418,7 +410,7 @@ static ssize_t efivar_create(struct file *filp, struct kobject *kobj,
 	struct compat_efi_variable *compat = (struct compat_efi_variable *)buf;
 	struct efi_variable *new_var = (struct efi_variable *)buf;
 	struct efivar_entry *new_entry;
-	bool need_compat = is_compat();
+	bool need_compat = in_compat_syscall();
 	efi_char16_t *name;
 	unsigned long size;
 	u32 attributes;
@@ -495,7 +487,7 @@ static ssize_t efivar_delete(struct file *filp, struct kobject *kobj,
 	if (!capable(CAP_SYS_ADMIN))
 		return -EACCES;
 
-	if (is_compat()) {
+	if (in_compat_syscall()) {
 		if (count != sizeof(*compat))
 			return -EINVAL;
 
diff --git a/kernel/time/time.c b/kernel/time/time.c
index e3a7f7fd3abc1..ad204cf6d0018 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -842,7 +842,7 @@ int get_timespec64(struct timespec64 *ts,
 	ts->tv_sec = kts.tv_sec;
 
 	/* Zero out the padding for 32 bit systems or in compat mode */
-	if (IS_ENABLED(CONFIG_64BIT_TIME) && (!IS_ENABLED(CONFIG_64BIT) || in_compat_syscall()))
+	if (IS_ENABLED(CONFIG_64BIT_TIME) && in_compat_syscall())
 		kts.tv_nsec &= 0xFFFFFFFFUL;
 
 	ts->tv_nsec = kts.tv_nsec;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index b669262682c97..dc4a9f1fb941a 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -2077,10 +2077,8 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen
 	struct xfrm_mgr *km;
 	struct xfrm_policy *pol = NULL;
 
-#ifdef CONFIG_COMPAT
 	if (in_compat_syscall())
 		return -EOPNOTSUPP;
-#endif
 
 	if (!optval && !optlen) {
 		xfrm_sk_policy_insert(sk, XFRM_POLICY_IN, NULL);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index ca7a207b81a95..c9a84e22f5d57 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -2621,10 +2621,8 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
 	const struct xfrm_link *link;
 	int type, err;
 
-#ifdef CONFIG_COMPAT
 	if (in_compat_syscall())
 		return -EOPNOTSUPP;
-#endif
 
 	type = nlh->nlmsg_type;
 	if (type > XFRM_MSG_MAX)
-- 
GitLab


From 51f5fd2e4615dcdc25cd7f9d19b7b27eb9ecdac7 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Wed, 31 Oct 2018 17:44:08 +0000
Subject: [PATCH 0433/1890] tools headers barrier: Fix arm64 tools build
 failure wrt smp_load_{acquire,release}

Cheers for reporting this. I managed to reproduce the build failure with
gcc version 6.3.0 20170516 (Debian 6.3.0-18+deb9u1).

The code in question is the arm64 versions of smp_load_acquire() and
smp_store_release(). Unlike other architectures, these are not built
around READ_ONCE() and WRITE_ONCE() since we have instructions we can
use instead of fences. Bringing our macros up-to-date with those (i.e.
tweaking the union initialisation and using the special "uXX_alias_t"
types) appears to fix the issue for me.

Committer notes:

Testing it in the systems previously failing:

  # time dm android-ndk:r12b-arm \
         android-ndk:r15c-arm \
         debian:experimental-x-arm64 \
         ubuntu:14.04.4-x-linaro-arm64 \
         ubuntu:16.04-x-arm \
         ubuntu:16.04-x-arm64 \
         ubuntu:18.04-x-arm \
         ubuntu:18.04-x-arm64
    1 android-ndk:r12b-arm          : Ok   arm-linux-androideabi-gcc (GCC) 4.9.x 20150123 (prerelease)
    2 android-ndk:r15c-arm          : Ok   arm-linux-androideabi-gcc (GCC) 4.9.x 20150123 (prerelease)
    3 debian:experimental-x-arm64   : Ok   aarch64-linux-gnu-gcc (Debian 8.2.0-7) 8.2.0
    4 ubuntu:14.04.4-x-linaro-arm64 : Ok   aarch64-linux-gnu-gcc (Linaro GCC 5.5-2017.10) 5.5.0
    5 ubuntu:16.04-x-arm            : Ok   arm-linux-gnueabihf-gcc (Ubuntu/Linaro 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609
    6 ubuntu:16.04-x-arm64          : Ok   aarch64-linux-gnu-gcc (Ubuntu/Linaro 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609
    7 ubuntu:18.04-x-arm            : Ok   arm-linux-gnueabihf-gcc (Ubuntu/Linaro 7.3.0-27ubuntu1~18.04) 7.3.0
    8 ubuntu:18.04-x-arm64          : Ok   aarch64-linux-gnu-gcc (Ubuntu/Linaro 7.3.0-27ubuntu1~18.04) 7.3.0

Reported-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20181031174408.GA27871@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/arm64/include/asm/barrier.h | 133 +++++++++++++------------
 1 file changed, 67 insertions(+), 66 deletions(-)

diff --git a/tools/arch/arm64/include/asm/barrier.h b/tools/arch/arm64/include/asm/barrier.h
index 12835ea0e4173..378c051fa1776 100644
--- a/tools/arch/arm64/include/asm/barrier.h
+++ b/tools/arch/arm64/include/asm/barrier.h
@@ -14,74 +14,75 @@
 #define wmb()		asm volatile("dmb ishst" ::: "memory")
 #define rmb()		asm volatile("dmb ishld" ::: "memory")
 
-#define smp_store_release(p, v)					\
-do {								\
-	union { typeof(*p) __val; char __c[1]; } __u =		\
-		{ .__val = (__force typeof(*p)) (v) }; 		\
-								\
-	switch (sizeof(*p)) {					\
-	case 1:							\
-		asm volatile ("stlrb %w1, %0"			\
-				: "=Q" (*p)			\
-				: "r" (*(__u8 *)__u.__c)	\
-				: "memory");			\
-		break;						\
-	case 2:							\
-		asm volatile ("stlrh %w1, %0"			\
-				: "=Q" (*p)			\
-				: "r" (*(__u16 *)__u.__c)	\
-				: "memory");			\
-		break;						\
-	case 4:							\
-		asm volatile ("stlr %w1, %0"			\
-				: "=Q" (*p)			\
-				: "r" (*(__u32 *)__u.__c)	\
-				: "memory");			\
-		break;						\
-	case 8:							\
-		asm volatile ("stlr %1, %0"			\
-				: "=Q" (*p)			\
-				: "r" (*(__u64 *)__u.__c)	\
-				: "memory");			\
-		break;						\
-	default:						\
-		/* Only to shut up gcc ... */			\
-		mb();						\
-		break;						\
-	}							\
+#define smp_store_release(p, v)						\
+do {									\
+	union { typeof(*p) __val; char __c[1]; } __u =			\
+		{ .__val = (v) }; 					\
+									\
+	switch (sizeof(*p)) {						\
+	case 1:								\
+		asm volatile ("stlrb %w1, %0"				\
+				: "=Q" (*p)				\
+				: "r" (*(__u8_alias_t *)__u.__c)	\
+				: "memory");				\
+		break;							\
+	case 2:								\
+		asm volatile ("stlrh %w1, %0"				\
+				: "=Q" (*p)				\
+				: "r" (*(__u16_alias_t *)__u.__c)	\
+				: "memory");				\
+		break;							\
+	case 4:								\
+		asm volatile ("stlr %w1, %0"				\
+				: "=Q" (*p)				\
+				: "r" (*(__u32_alias_t *)__u.__c)	\
+				: "memory");				\
+		break;							\
+	case 8:								\
+		asm volatile ("stlr %1, %0"				\
+				: "=Q" (*p)				\
+				: "r" (*(__u64_alias_t *)__u.__c)	\
+				: "memory");				\
+		break;							\
+	default:							\
+		/* Only to shut up gcc ... */				\
+		mb();							\
+		break;							\
+	}								\
 } while (0)
 
-#define smp_load_acquire(p)					\
-({								\
-	union { typeof(*p) __val; char __c[1]; } __u;		\
-								\
-	switch (sizeof(*p)) {					\
-	case 1:							\
-		asm volatile ("ldarb %w0, %1"			\
-			: "=r" (*(__u8 *)__u.__c)		\
-			: "Q" (*p) : "memory");			\
-		break;						\
-	case 2:							\
-		asm volatile ("ldarh %w0, %1"			\
-			: "=r" (*(__u16 *)__u.__c)		\
-			: "Q" (*p) : "memory");			\
-		break;						\
-	case 4:							\
-		asm volatile ("ldar %w0, %1"			\
-			: "=r" (*(__u32 *)__u.__c)		\
-			: "Q" (*p) : "memory");			\
-		break;						\
-	case 8:							\
-		asm volatile ("ldar %0, %1"			\
-			: "=r" (*(__u64 *)__u.__c)		\
-			: "Q" (*p) : "memory");			\
-		break;						\
-	default:						\
-		/* Only to shut up gcc ... */			\
-		mb();						\
-		break;						\
-	}							\
-	__u.__val;						\
+#define smp_load_acquire(p)						\
+({									\
+	union { typeof(*p) __val; char __c[1]; } __u =			\
+		{ .__c = { 0 } };					\
+									\
+	switch (sizeof(*p)) {						\
+	case 1:								\
+		asm volatile ("ldarb %w0, %1"				\
+			: "=r" (*(__u8_alias_t *)__u.__c)		\
+			: "Q" (*p) : "memory");				\
+		break;							\
+	case 2:								\
+		asm volatile ("ldarh %w0, %1"				\
+			: "=r" (*(__u16_alias_t *)__u.__c)		\
+			: "Q" (*p) : "memory");				\
+		break;							\
+	case 4:								\
+		asm volatile ("ldar %w0, %1"				\
+			: "=r" (*(__u32_alias_t *)__u.__c)		\
+			: "Q" (*p) : "memory");				\
+		break;							\
+	case 8:								\
+		asm volatile ("ldar %0, %1"				\
+			: "=r" (*(__u64_alias_t *)__u.__c)		\
+			: "Q" (*p) : "memory");				\
+		break;							\
+	default:							\
+		/* Only to shut up gcc ... */				\
+		mb();							\
+		break;							\
+	}								\
+	__u.__val;							\
 })
 
 #endif /* _TOOLS_LINUX_ASM_AARCH64_BARRIER_H */
-- 
GitLab


From 40070408f54e76896d75ddccaabdd0c0714600f1 Mon Sep 17 00:00:00 2001
From: Serge Semin <fancer.lancer@gmail.com>
Date: Tue, 17 Jul 2018 12:24:34 +0300
Subject: [PATCH 0434/1890] ntb: idt: Alter temperature read method

In order to create a hwmon interface for the IDT PCIe-switch temperature
sensor the already available reader method should be improved. Particularly
we need to redesign it so one would be able to read temperature/offset
values from registers of the passed types. Since IDT sensor interface
provides temperature in unsigned format 0:7:1 (7 bits for real value
and one for fraction) we also need to have helpers for the typical sysfs
temperature data type conversion to and from this format. Even though
the IDT PCIe-switch provided temperature offset got the same but signed
type it can be translated by these methods too.

Signed-off-by: Serge Semin <fancer.lancer@gmail.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 drivers/ntb/hw/idt/ntb_hw_idt.c | 113 +++++++++++++++++++++++++++-----
 drivers/ntb/hw/idt/ntb_hw_idt.h |  56 ++++++++++++++++
 2 files changed, 152 insertions(+), 17 deletions(-)

diff --git a/drivers/ntb/hw/idt/ntb_hw_idt.c b/drivers/ntb/hw/idt/ntb_hw_idt.c
index 54485a8d2d7f8..adb71f7c10437 100644
--- a/drivers/ntb/hw/idt/ntb_hw_idt.c
+++ b/drivers/ntb/hw/idt/ntb_hw_idt.c
@@ -1828,23 +1828,100 @@ static int idt_ntb_peer_msg_write(struct ntb_dev *ntb, int pidx, int midx,
  *=============================================================================
  */
 
+/*
+ * idt_get_deg() - convert millidegree Celsius value to just degree
+ * @mdegC:	IN - millidegree Celsius value
+ *
+ * Return: Degree corresponding to the passed millidegree value
+ */
+static inline s8 idt_get_deg(long mdegC)
+{
+	return mdegC / 1000;
+}
+
+/*
+ * idt_get_frac() - retrieve 0/0.5 fraction of the millidegree Celsius value
+ * @mdegC:	IN - millidegree Celsius value
+ *
+ * Return: 0/0.5 degree fraction of the passed millidegree value
+ */
+static inline u8 idt_get_deg_frac(long mdegC)
+{
+	return (mdegC % 1000) >= 500 ? 5 : 0;
+}
+
+/*
+ * idt_get_temp_fmt() - convert millidegree Celsius value to 0:7:1 format
+ * @mdegC:	IN - millidegree Celsius value
+ *
+ * Return: 0:7:1 format acceptable by the IDT temperature sensor
+ */
+static inline u8 idt_temp_get_fmt(long mdegC)
+{
+	return (idt_get_deg(mdegC) << 1) | (idt_get_deg_frac(mdegC) ? 1 : 0);
+}
+
+/*
+ * idt_get_temp_sval() - convert temp sample to signed millidegree Celsius
+ * @data:	IN - shifted to LSB 8-bits temperature sample
+ *
+ * Return: signed millidegree Celsius
+ */
+static inline long idt_get_temp_sval(u32 data)
+{
+	return ((s8)data / 2) * 1000 + (data & 0x1 ? 500 : 0);
+}
+
+/*
+ * idt_get_temp_sval() - convert temp sample to unsigned millidegree Celsius
+ * @data:	IN - shifted to LSB 8-bits temperature sample
+ *
+ * Return: unsigned millidegree Celsius
+ */
+static inline long idt_get_temp_uval(u32 data)
+{
+	return (data / 2) * 1000 + (data & 0x1 ? 500 : 0);
+}
+
 /*
  * idt_read_temp() - read temperature from chip sensor
  * @ntb:	NTB device context.
- * @val:	OUT - integer value of temperature
- * @frac:	OUT - fraction
+ * @type:	IN - type of the temperature value to read
+ * @val:	OUT - integer value of temperature in millidegree Celsius
  */
-static void idt_read_temp(struct idt_ntb_dev *ndev, unsigned char *val,
-			  unsigned char *frac)
+static void idt_read_temp(struct idt_ntb_dev *ndev,
+			  const enum idt_temp_val type, long *val)
 {
 	u32 data;
 
-	/* Read the data from TEMP field of the TMPSTS register */
-	data = idt_sw_read(ndev, IDT_SW_TMPSTS);
-	data = GET_FIELD(TMPSTS_TEMP, data);
-	/* TEMP field has one fractional bit and seven integer bits */
-	*val = data >> 1;
-	*frac = ((data & 0x1) ? 5 : 0);
+	/* Alter the temperature field in accordance with the passed type */
+	switch (type) {
+	case IDT_TEMP_CUR:
+		data = GET_FIELD(TMPSTS_TEMP,
+				 idt_sw_read(ndev, IDT_SW_TMPSTS));
+		break;
+	case IDT_TEMP_LOW:
+		data = GET_FIELD(TMPSTS_LTEMP,
+				 idt_sw_read(ndev, IDT_SW_TMPSTS));
+		break;
+	case IDT_TEMP_HIGH:
+		data = GET_FIELD(TMPSTS_HTEMP,
+				 idt_sw_read(ndev, IDT_SW_TMPSTS));
+		break;
+	case IDT_TEMP_OFFSET:
+		/* This is the only field with signed 0:7:1 format */
+		data = GET_FIELD(TMPADJ_OFFSET,
+				 idt_sw_read(ndev, IDT_SW_TMPADJ));
+		*val = idt_get_temp_sval(data);
+		return;
+	default:
+		data = GET_FIELD(TMPSTS_TEMP,
+				 idt_sw_read(ndev, IDT_SW_TMPSTS));
+		break;
+	}
+
+	/* The rest of the fields accept unsigned 0:7:1 format */
+	*val = idt_get_temp_uval(data);
 }
 
 /*
@@ -1860,10 +1937,10 @@ static void idt_read_temp(struct idt_ntb_dev *ndev, unsigned char *val,
  */
 static void idt_temp_isr(struct idt_ntb_dev *ndev, u32 ntint_sts)
 {
-	unsigned char val, frac;
+	unsigned long mdeg;
 
 	/* Read the current temperature value */
-	idt_read_temp(ndev, &val, &frac);
+	idt_read_temp(ndev, IDT_TEMP_CUR, &mdeg);
 
 	/* Read the temperature alarm to clean the alarm status out */
 	/*(void)idt_sw_read(ndev, IDT_SW_TMPALARM);*/
@@ -1875,7 +1952,8 @@ static void idt_temp_isr(struct idt_ntb_dev *ndev, u32 ntint_sts)
 		"Temp sensor IRQ detected %#08x", ntint_sts);
 
 	/* Print temperature value to log */
-	dev_warn(&ndev->ntb.pdev->dev, "Temperature %hhu.%hhu", val, frac);
+	dev_warn(&ndev->ntb.pdev->dev, "Temperature %hhd.%hhuC",
+		idt_get_deg(mdeg), idt_get_deg_frac(mdeg));
 }
 
 /*=============================================================================
@@ -2123,9 +2201,9 @@ static ssize_t idt_dbgfs_info_read(struct file *filp, char __user *ubuf,
 				   size_t count, loff_t *offp)
 {
 	struct idt_ntb_dev *ndev = filp->private_data;
-	unsigned char temp, frac, idx, pidx, cnt;
+	unsigned char idx, pidx, cnt;
+	unsigned long irqflags, mdeg;
 	ssize_t ret = 0, off = 0;
-	unsigned long irqflags;
 	enum ntb_speed speed;
 	enum ntb_width width;
 	char *strbuf;
@@ -2274,9 +2352,10 @@ static ssize_t idt_dbgfs_info_read(struct file *filp, char __user *ubuf,
 	off += scnprintf(strbuf + off, size - off, "\n");
 
 	/* Current temperature */
-	idt_read_temp(ndev, &temp, &frac);
+	idt_read_temp(ndev, IDT_TEMP_CUR, &mdeg);
 	off += scnprintf(strbuf + off, size - off,
-		"Switch temperature\t\t- %hhu.%hhuC\n", temp, frac);
+		"Switch temperature\t\t- %hhd.%hhuC\n",
+		idt_get_deg(mdeg), idt_get_deg_frac(mdeg));
 
 	/* Copy the buffer to the User Space */
 	ret = simple_read_from_buffer(ubuf, count, offp, strbuf, off);
diff --git a/drivers/ntb/hw/idt/ntb_hw_idt.h b/drivers/ntb/hw/idt/ntb_hw_idt.h
index 856fd182f6f4f..9dfd6b11a621e 100644
--- a/drivers/ntb/hw/idt/ntb_hw_idt.h
+++ b/drivers/ntb/hw/idt/ntb_hw_idt.h
@@ -885,6 +885,24 @@
 #define IDT_SWPxMSGCTL_PART_MASK	0x00000070U
 #define IDT_SWPxMSGCTL_PART_FLD		4
 
+/*
+ * TMPCTL register fields related constants
+ * @IDT_TMPCTL_LTH_MASK:	Low temperature threshold field mask
+ * @IDT_TMPCTL_LTH_FLD:		Low temperature threshold field offset
+ * @IDT_TMPCTL_MTH_MASK:	Middle temperature threshold field mask
+ * @IDT_TMPCTL_MTH_FLD:		Middle temperature threshold field offset
+ * @IDT_TMPCTL_HTH_MASK:	High temperature threshold field mask
+ * @IDT_TMPCTL_HTH_FLD:		High temperature threshold field offset
+ * @IDT_TMPCTL_PDOWN:		Temperature sensor power down
+ */
+#define IDT_TMPCTL_LTH_MASK		0x000000FFU
+#define IDT_TMPCTL_LTH_FLD		0
+#define IDT_TMPCTL_MTH_MASK		0x0000FF00U
+#define IDT_TMPCTL_MTH_FLD		8
+#define IDT_TMPCTL_HTH_MASK		0x00FF0000U
+#define IDT_TMPCTL_HTH_FLD		16
+#define IDT_TMPCTL_PDOWN		0x80000000U
+
 /*
  * TMPSTS register fields related constants
  * @IDT_TMPSTS_TEMP_MASK:	Current temperature field mask
@@ -892,6 +910,18 @@
  */
 #define IDT_TMPSTS_TEMP_MASK		0x000000FFU
 #define IDT_TMPSTS_TEMP_FLD		0
+#define IDT_TMPSTS_LTEMP_MASK		0x0000FF00U
+#define IDT_TMPSTS_LTEMP_FLD		8
+#define IDT_TMPSTS_HTEMP_MASK		0x00FF0000U
+#define IDT_TMPSTS_HTEMP_FLD		16
+
+/*
+ * TMPADJ register fields related constants
+ * @IDT_TMPADJ_OFFSET_MASK:	Temperature value offset field mask
+ * @IDT_TMPADJ_OFFSET_FLD:	Temperature value offset field offset
+ */
+#define IDT_TMPADJ_OFFSET_MASK		0x000000FFU
+#define IDT_TMPADJ_OFFSET_FLD		0
 
 /*
  * Helper macro to get/set the corresponding field value
@@ -950,6 +980,32 @@
 #define IDT_TRANS_ALIGN		4
 #define IDT_DIR_SIZE_ALIGN	1
 
+/*
+ * IDT PCIe-switch temperature sensor value limits
+ * @IDT_TEMP_MIN_MDEG:	Minimal integer value of temperature
+ * @IDT_TEMP_MAX_MDEG:	Maximal integer value of temperature
+ * @IDT_TEMP_MIN_OFFSET:Minimal integer value of temperature offset
+ * @IDT_TEMP_MAX_OFFSET:Maximal integer value of temperature offset
+ */
+#define IDT_TEMP_MIN_MDEG	0
+#define IDT_TEMP_MAX_MDEG	127500
+#define IDT_TEMP_MIN_OFFSET	-64000
+#define IDT_TEMP_MAX_OFFSET	63500
+
+/*
+ * Temperature sensor values enumeration
+ * @IDT_TEMP_CUR:	Current temperature
+ * @IDT_TEMP_LOW:	Lowest historical temperature
+ * @IDT_TEMP_HIGH:	Highest historical temperature
+ * @IDT_TEMP_OFFSET:	Current temperature offset
+ */
+enum idt_temp_val {
+	IDT_TEMP_CUR,
+	IDT_TEMP_LOW,
+	IDT_TEMP_HIGH,
+	IDT_TEMP_OFFSET
+};
+
 /*
  * IDT Memory Windows type. Depending on the device settings, IDT supports
  * Direct Address Translation MW registers and Lookup Table registers
-- 
GitLab


From aed1b7b31154bdd6f2fccca0ab5cf8a6fe2f52eb Mon Sep 17 00:00:00 2001
From: Serge Semin <fancer.lancer@gmail.com>
Date: Tue, 17 Jul 2018 12:24:35 +0300
Subject: [PATCH 0435/1890] ntb: idt: Add basic hwmon sysfs interface

IDT PCIe switches provide an embedded temperature sensor working
within [0; 127.5]C with resolution of 0.5C. They also can generate
a PCIe upstream interrupt in case if the temperature passes through
specified thresholds. Since this thresholds interface is very broken
the created hwmon-sysfs interface exposes only the next set of hwmon
nodes: current input temperature, lowest and highest values measured,
history resetting, value offset. HWmon alarm interface isn't provided.

IDT PCIe switch also've got an ADC/filter settings of the sensor.
This driver doesn't expose them to the hwmon-sysfs interface at the
moment, except the offset node.

Signed-off-by: Serge Semin <fancer.lancer@gmail.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 drivers/ntb/hw/idt/Kconfig      |   1 +
 drivers/ntb/hw/idt/ntb_hw_idt.c | 182 ++++++++++++++++++++++++++++++++
 drivers/ntb/hw/idt/ntb_hw_idt.h |  24 ++++-
 3 files changed, 206 insertions(+), 1 deletion(-)

diff --git a/drivers/ntb/hw/idt/Kconfig b/drivers/ntb/hw/idt/Kconfig
index b360e5613b9f1..2ed147368fa84 100644
--- a/drivers/ntb/hw/idt/Kconfig
+++ b/drivers/ntb/hw/idt/Kconfig
@@ -1,6 +1,7 @@
 config NTB_IDT
 	tristate "IDT PCIe-switch Non-Transparent Bridge support"
 	depends on PCI
+	select HWMON
 	help
 	 This driver supports NTB of cappable IDT PCIe-switches.
 
diff --git a/drivers/ntb/hw/idt/ntb_hw_idt.c b/drivers/ntb/hw/idt/ntb_hw_idt.c
index adb71f7c10437..19425a2c60cd7 100644
--- a/drivers/ntb/hw/idt/ntb_hw_idt.c
+++ b/drivers/ntb/hw/idt/ntb_hw_idt.c
@@ -49,11 +49,14 @@
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/spinlock.h>
+#include <linux/mutex.h>
 #include <linux/pci.h>
 #include <linux/aer.h>
 #include <linux/slab.h>
 #include <linux/list.h>
 #include <linux/debugfs.h>
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>
 #include <linux/ntb.h>
 
 #include "ntb_hw_idt.h"
@@ -1924,6 +1927,153 @@ static void idt_read_temp(struct idt_ntb_dev *ndev,
 	*val = idt_get_temp_uval(data);
 }
 
+/*
+ * idt_write_temp() - write temperature to the chip sensor register
+ * @ntb:	NTB device context.
+ * @type:	IN - type of the temperature value to change
+ * @val:	IN - integer value of temperature in millidegree Celsius
+ */
+static void idt_write_temp(struct idt_ntb_dev *ndev,
+			   const enum idt_temp_val type, const long val)
+{
+	unsigned int reg;
+	u32 data;
+	u8 fmt;
+
+	/* Retrieve the properly formatted temperature value */
+	fmt = idt_temp_get_fmt(val);
+
+	mutex_lock(&ndev->hwmon_mtx);
+	switch (type) {
+	case IDT_TEMP_LOW:
+		reg = IDT_SW_TMPALARM;
+		data = SET_FIELD(TMPALARM_LTEMP, idt_sw_read(ndev, reg), fmt) &
+			~IDT_TMPALARM_IRQ_MASK;
+		break;
+	case IDT_TEMP_HIGH:
+		reg = IDT_SW_TMPALARM;
+		data = SET_FIELD(TMPALARM_HTEMP, idt_sw_read(ndev, reg), fmt) &
+			~IDT_TMPALARM_IRQ_MASK;
+		break;
+	case IDT_TEMP_OFFSET:
+		reg = IDT_SW_TMPADJ;
+		data = SET_FIELD(TMPADJ_OFFSET, idt_sw_read(ndev, reg), fmt);
+		break;
+	default:
+		goto inval_spin_unlock;
+	}
+
+	idt_sw_write(ndev, reg, data);
+
+inval_spin_unlock:
+	mutex_unlock(&ndev->hwmon_mtx);
+}
+
+/*
+ * idt_sysfs_show_temp() - printout corresponding temperature value
+ * @dev:	Pointer to the NTB device structure
+ * @da:		Sensor device attribute structure
+ * @buf:	Buffer to print temperature out
+ *
+ * Return: Number of written symbols or negative error
+ */
+static ssize_t idt_sysfs_show_temp(struct device *dev,
+				   struct device_attribute *da, char *buf)
+{
+	struct sensor_device_attribute *attr = to_sensor_dev_attr(da);
+	struct idt_ntb_dev *ndev = dev_get_drvdata(dev);
+	enum idt_temp_val type = attr->index;
+	long mdeg;
+
+	idt_read_temp(ndev, type, &mdeg);
+	return sprintf(buf, "%ld\n", mdeg);
+}
+
+/*
+ * idt_sysfs_set_temp() - set corresponding temperature value
+ * @dev:	Pointer to the NTB device structure
+ * @da:		Sensor device attribute structure
+ * @buf:	Buffer to print temperature out
+ * @count:	Size of the passed buffer
+ *
+ * Return: Number of written symbols or negative error
+ */
+static ssize_t idt_sysfs_set_temp(struct device *dev,
+				  struct device_attribute *da, const char *buf,
+				  size_t count)
+{
+	struct sensor_device_attribute *attr = to_sensor_dev_attr(da);
+	struct idt_ntb_dev *ndev = dev_get_drvdata(dev);
+	enum idt_temp_val type = attr->index;
+	long mdeg;
+	int ret;
+
+	ret = kstrtol(buf, 10, &mdeg);
+	if (ret)
+		return ret;
+
+	/* Clamp the passed value in accordance with the type */
+	if (type == IDT_TEMP_OFFSET)
+		mdeg = clamp_val(mdeg, IDT_TEMP_MIN_OFFSET,
+				 IDT_TEMP_MAX_OFFSET);
+	else
+		mdeg = clamp_val(mdeg, IDT_TEMP_MIN_MDEG, IDT_TEMP_MAX_MDEG);
+
+	idt_write_temp(ndev, type, mdeg);
+
+	return count;
+}
+
+/*
+ * idt_sysfs_reset_hist() - reset temperature history
+ * @dev:	Pointer to the NTB device structure
+ * @da:		Sensor device attribute structure
+ * @buf:	Buffer to print temperature out
+ * @count:	Size of the passed buffer
+ *
+ * Return: Number of written symbols or negative error
+ */
+static ssize_t idt_sysfs_reset_hist(struct device *dev,
+				    struct device_attribute *da,
+				    const char *buf, size_t count)
+{
+	struct idt_ntb_dev *ndev = dev_get_drvdata(dev);
+
+	/* Just set the maximal value to the lowest temperature field and
+	 * minimal value to the highest temperature field
+	 */
+	idt_write_temp(ndev, IDT_TEMP_LOW, IDT_TEMP_MAX_MDEG);
+	idt_write_temp(ndev, IDT_TEMP_HIGH, IDT_TEMP_MIN_MDEG);
+
+	return count;
+}
+
+/*
+ * Hwmon IDT sysfs attributes
+ */
+static SENSOR_DEVICE_ATTR(temp1_input, 0444, idt_sysfs_show_temp, NULL,
+			  IDT_TEMP_CUR);
+static SENSOR_DEVICE_ATTR(temp1_lowest, 0444, idt_sysfs_show_temp, NULL,
+			  IDT_TEMP_LOW);
+static SENSOR_DEVICE_ATTR(temp1_highest, 0444, idt_sysfs_show_temp, NULL,
+			  IDT_TEMP_HIGH);
+static SENSOR_DEVICE_ATTR(temp1_offset, 0644, idt_sysfs_show_temp,
+			  idt_sysfs_set_temp, IDT_TEMP_OFFSET);
+static DEVICE_ATTR(temp1_reset_history, 0200, NULL, idt_sysfs_reset_hist);
+
+/*
+ * Hwmon IDT sysfs attributes group
+ */
+static struct attribute *idt_temp_attrs[] = {
+	&sensor_dev_attr_temp1_input.dev_attr.attr,
+	&sensor_dev_attr_temp1_lowest.dev_attr.attr,
+	&sensor_dev_attr_temp1_highest.dev_attr.attr,
+	&sensor_dev_attr_temp1_offset.dev_attr.attr,
+	&dev_attr_temp1_reset_history.attr,
+	NULL
+};
+ATTRIBUTE_GROUPS(idt_temp);
+
 /*
  * idt_temp_isr() - temperature sensor alarm events ISR
  * @ndev:	IDT NTB hardware driver descriptor
@@ -1956,6 +2106,35 @@ static void idt_temp_isr(struct idt_ntb_dev *ndev, u32 ntint_sts)
 		idt_get_deg(mdeg), idt_get_deg_frac(mdeg));
 }
 
+/*
+ * idt_init_temp() - initialize temperature sensor interface
+ * @ndev:	IDT NTB hardware driver descriptor
+ *
+ * Simple sensor initializarion method is responsible for device switching
+ * on and resource management based hwmon interface registration. Note, that
+ * since the device is shared we won't disable it on remove, but leave it
+ * working until the system is powered off.
+ */
+static void idt_init_temp(struct idt_ntb_dev *ndev)
+{
+	struct device *hwmon;
+
+	/* Enable sensor if it hasn't been already */
+	idt_sw_write(ndev, IDT_SW_TMPCTL, 0x0);
+
+	/* Initialize hwmon interface fields */
+	mutex_init(&ndev->hwmon_mtx);
+
+	hwmon = devm_hwmon_device_register_with_groups(&ndev->ntb.pdev->dev,
+		ndev->swcfg->name, ndev, idt_temp_groups);
+	if (IS_ERR(hwmon)) {
+		dev_err(&ndev->ntb.pdev->dev, "Couldn't create hwmon device");
+		return;
+	}
+
+	dev_dbg(&ndev->ntb.pdev->dev, "Temperature HWmon interface registered");
+}
+
 /*=============================================================================
  *                           8. ISRs related operations
  *
@@ -2650,6 +2829,9 @@ static int idt_pci_probe(struct pci_dev *pdev,
 	/* Initialize Messaging subsystem */
 	idt_init_msg(ndev);
 
+	/* Initialize hwmon interface */
+	idt_init_temp(ndev);
+
 	/* Initialize IDT interrupts handler */
 	ret = idt_init_isr(ndev);
 	if (ret != 0)
diff --git a/drivers/ntb/hw/idt/ntb_hw_idt.h b/drivers/ntb/hw/idt/ntb_hw_idt.h
index 9dfd6b11a621e..032f81cb4d445 100644
--- a/drivers/ntb/hw/idt/ntb_hw_idt.h
+++ b/drivers/ntb/hw/idt/ntb_hw_idt.h
@@ -47,9 +47,9 @@
 #include <linux/pci_ids.h>
 #include <linux/interrupt.h>
 #include <linux/spinlock.h>
+#include <linux/mutex.h>
 #include <linux/ntb.h>
 
-
 /*
  * Macro is used to create the struct pci_device_id that matches
  * the supported IDT PCIe-switches
@@ -907,6 +907,10 @@
  * TMPSTS register fields related constants
  * @IDT_TMPSTS_TEMP_MASK:	Current temperature field mask
  * @IDT_TMPSTS_TEMP_FLD:	Current temperature field offset
+ * @IDT_TMPSTS_LTEMP_MASK:	Lowest temperature field mask
+ * @IDT_TMPSTS_LTEMP_FLD:	Lowest temperature field offset
+ * @IDT_TMPSTS_HTEMP_MASK:	Highest temperature field mask
+ * @IDT_TMPSTS_HTEMP_FLD:	Highest temperature field offset
  */
 #define IDT_TMPSTS_TEMP_MASK		0x000000FFU
 #define IDT_TMPSTS_TEMP_FLD		0
@@ -915,6 +919,20 @@
 #define IDT_TMPSTS_HTEMP_MASK		0x00FF0000U
 #define IDT_TMPSTS_HTEMP_FLD		16
 
+/*
+ * TMPALARM register fields related constants
+ * @IDT_TMPALARM_LTEMP_MASK:	Lowest temperature field mask
+ * @IDT_TMPALARM_LTEMP_FLD:	Lowest temperature field offset
+ * @IDT_TMPALARM_HTEMP_MASK:	Highest temperature field mask
+ * @IDT_TMPALARM_HTEMP_FLD:	Highest temperature field offset
+ * @IDT_TMPALARM_IRQ_MASK:	Alarm IRQ status mask
+ */
+#define IDT_TMPALARM_LTEMP_MASK		0x0000FF00U
+#define IDT_TMPALARM_LTEMP_FLD		8
+#define IDT_TMPALARM_HTEMP_MASK		0x00FF0000U
+#define IDT_TMPALARM_HTEMP_FLD		16
+#define IDT_TMPALARM_IRQ_MASK		0x3F000000U
+
 /*
  * TMPADJ register fields related constants
  * @IDT_TMPADJ_OFFSET_MASK:	Temperature value offset field mask
@@ -1100,6 +1118,8 @@ struct idt_ntb_peer {
  * @msg_mask_lock:	Message mask register lock
  * @gasa_lock:		GASA registers access lock
  *
+ * @hwmon_mtx:		Temperature sensor interface update mutex
+ *
  * @dbgfs_info:		DebugFS info node
  */
 struct idt_ntb_dev {
@@ -1127,6 +1147,8 @@ struct idt_ntb_dev {
 	spinlock_t msg_mask_lock;
 	spinlock_t gasa_lock;
 
+	struct mutex hwmon_mtx;
+
 	struct dentry *dbgfs_info;
 };
 #define to_ndev_ntb(__ntb) container_of(__ntb, struct idt_ntb_dev, ntb)
-- 
GitLab


From b8babacbae624da6d244d0721263edda56be3991 Mon Sep 17 00:00:00 2001
From: Serge Semin <fancer.lancer@gmail.com>
Date: Tue, 17 Jul 2018 12:24:36 +0300
Subject: [PATCH 0436/1890] ntb: idt: Discard temperature sensor IRQ handler

IDT PCIe-switch temperature sensor interface is very broken. First
of all only a few combinations of TMPCTL threshold enable bits
really cause the interrupts unmasked. Even if an individual bit
indicates the event unmasked, corresponding IRQ just isn't generated.
Most of the threshold enable bits combinations are in fact useless and
non of them can help to create a fully functional alarm interface.
So to speak, we can't create a well defined hwmon alarms based on
the IDT PCI-switch threshold IRQs.

Secondly a single threshold IRQ (not a combination of thresholds) can
be successfully enabled without the issue described above. But in this
case we experienced an enormous number of interrupts generated by
the chip if the temperature got near the enabled threshold value. Filter
adjustment didn't help much. It also doesn't provide a hysteresis settings.
Due to the temperature sample fluctuations near the threshold the
interrupts spate makes the system nearly unusable until the temperature
value finally settled so being pushed either to be fully higher or lower
the threshold.

All of these issues makes the temperature sensor alarm interface useless
and even at some point dangerous to be used in the driver. In this case
it is safer to completely discard it and disable the temperature alarm
interrupts.

Signed-off-by: Serge Semin <fancer.lancer@gmail.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 drivers/ntb/hw/idt/ntb_hw_idt.c | 41 +--------------------------------
 drivers/ntb/hw/idt/ntb_hw_idt.h |  5 ++--
 2 files changed, 3 insertions(+), 43 deletions(-)

diff --git a/drivers/ntb/hw/idt/ntb_hw_idt.c b/drivers/ntb/hw/idt/ntb_hw_idt.c
index 19425a2c60cd7..c4594a708b483 100644
--- a/drivers/ntb/hw/idt/ntb_hw_idt.c
+++ b/drivers/ntb/hw/idt/ntb_hw_idt.c
@@ -2074,38 +2074,6 @@ static struct attribute *idt_temp_attrs[] = {
 };
 ATTRIBUTE_GROUPS(idt_temp);
 
-/*
- * idt_temp_isr() - temperature sensor alarm events ISR
- * @ndev:	IDT NTB hardware driver descriptor
- * @ntint_sts:	NT-function interrupt status
- *
- * It handles events of temperature crossing alarm thresholds. Since reading
- * of TMPALARM register clears it up, the function doesn't analyze the
- * read value, instead the current temperature value just warningly printed to
- * log.
- * The method is called from PCIe ISR bottom-half routine.
- */
-static void idt_temp_isr(struct idt_ntb_dev *ndev, u32 ntint_sts)
-{
-	unsigned long mdeg;
-
-	/* Read the current temperature value */
-	idt_read_temp(ndev, IDT_TEMP_CUR, &mdeg);
-
-	/* Read the temperature alarm to clean the alarm status out */
-	/*(void)idt_sw_read(ndev, IDT_SW_TMPALARM);*/
-
-	/* Clean the corresponding interrupt bit */
-	idt_nt_write(ndev, IDT_NT_NTINTSTS, IDT_NTINTSTS_TMPSENSOR);
-
-	dev_dbg(&ndev->ntb.pdev->dev,
-		"Temp sensor IRQ detected %#08x", ntint_sts);
-
-	/* Print temperature value to log */
-	dev_warn(&ndev->ntb.pdev->dev, "Temperature %hhd.%hhuC",
-		idt_get_deg(mdeg), idt_get_deg_frac(mdeg));
-}
-
 /*
  * idt_init_temp() - initialize temperature sensor interface
  * @ndev:	IDT NTB hardware driver descriptor
@@ -2188,7 +2156,7 @@ static int idt_init_isr(struct idt_ntb_dev *ndev)
 		goto err_free_vectors;
 	}
 
-	/* Unmask Message/Doorbell/SE/Temperature interrupts */
+	/* Unmask Message/Doorbell/SE interrupts */
 	ntint_mask = idt_nt_read(ndev, IDT_NT_NTINTMSK) & ~IDT_NTINTMSK_ALL;
 	idt_nt_write(ndev, IDT_NT_NTINTMSK, ntint_mask);
 
@@ -2203,7 +2171,6 @@ static int idt_init_isr(struct idt_ntb_dev *ndev)
 	return ret;
 }
 
-
 /*
  * idt_deinit_ist() - deinitialize PCIe interrupt handler
  * @ndev:	IDT NTB hardware driver descriptor
@@ -2264,12 +2231,6 @@ static irqreturn_t idt_thread_isr(int irq, void *devid)
 		handled = true;
 	}
 
-	/* Handle temperature sensor interrupt */
-	if (ntint_sts & IDT_NTINTSTS_TMPSENSOR) {
-		idt_temp_isr(ndev, ntint_sts);
-		handled = true;
-	}
-
 	dev_dbg(&ndev->ntb.pdev->dev, "IDT IRQs 0x%08x handled", ntint_sts);
 
 	return handled ? IRQ_HANDLED : IRQ_NONE;
diff --git a/drivers/ntb/hw/idt/ntb_hw_idt.h b/drivers/ntb/hw/idt/ntb_hw_idt.h
index 032f81cb4d445..3517cd2e2baa2 100644
--- a/drivers/ntb/hw/idt/ntb_hw_idt.h
+++ b/drivers/ntb/hw/idt/ntb_hw_idt.h
@@ -688,15 +688,14 @@
  * @IDT_NTINTMSK_DBELL:		Doorbell interrupt mask bit
  * @IDT_NTINTMSK_SEVENT:	Switch Event interrupt mask bit
  * @IDT_NTINTMSK_TMPSENSOR:	Temperature sensor interrupt mask bit
- * @IDT_NTINTMSK_ALL:		All the useful interrupts mask
+ * @IDT_NTINTMSK_ALL:		NTB-related interrupts mask
  */
 #define IDT_NTINTMSK_MSG		0x00000001U
 #define IDT_NTINTMSK_DBELL		0x00000002U
 #define IDT_NTINTMSK_SEVENT		0x00000008U
 #define IDT_NTINTMSK_TMPSENSOR		0x00000080U
 #define IDT_NTINTMSK_ALL \
-	(IDT_NTINTMSK_MSG | IDT_NTINTMSK_DBELL | \
-	 IDT_NTINTMSK_SEVENT | IDT_NTINTMSK_TMPSENSOR)
+	(IDT_NTINTMSK_MSG | IDT_NTINTMSK_DBELL | IDT_NTINTMSK_SEVENT)
 
 /*
  * NTGSIGNAL register fields related constants
-- 
GitLab


From a662315d8ad9e687fe648b6eea9bd35017f565dd Mon Sep 17 00:00:00 2001
From: Serge Semin <fancer.lancer@gmail.com>
Date: Tue, 17 Jul 2018 12:24:37 +0300
Subject: [PATCH 0437/1890] ntb: idt: Alter the driver info comments

Since IDT PCIe-switch temperature sensor is now always available
irregardless of the EEPROM/BIOS settings, Kconfig and in-code
description should be properly altered. In addition lets update
the driver copyright lines.

Signed-off-by: Serge Semin <fancer.lancer@gmail.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 drivers/ntb/hw/idt/Kconfig      |  4 +---
 drivers/ntb/hw/idt/ntb_hw_idt.c | 11 ++++++-----
 drivers/ntb/hw/idt/ntb_hw_idt.h |  2 +-
 3 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/drivers/ntb/hw/idt/Kconfig b/drivers/ntb/hw/idt/Kconfig
index 2ed147368fa84..f8948cf515ce3 100644
--- a/drivers/ntb/hw/idt/Kconfig
+++ b/drivers/ntb/hw/idt/Kconfig
@@ -24,9 +24,7 @@ config NTB_IDT
 	 BAR settings of peer NT-functions, the BAR setups can't be done over
 	 kernel PCI fixups. That's why the alternative pre-initialization
 	 techniques like BIOS using SMBus interface or EEPROM should be
-	 utilized. Additionally if one needs to have temperature sensor
-	 information printed to system log, the corresponding registers must
-	 be initialized within BIOS/EEPROM as well.
+	 utilized.
 
 	 If unsure, say N.
 
diff --git a/drivers/ntb/hw/idt/ntb_hw_idt.c b/drivers/ntb/hw/idt/ntb_hw_idt.c
index c4594a708b483..1dede87dd54fa 100644
--- a/drivers/ntb/hw/idt/ntb_hw_idt.c
+++ b/drivers/ntb/hw/idt/ntb_hw_idt.c
@@ -4,7 +4,7 @@
  *
  *   GPL LICENSE SUMMARY
  *
- *   Copyright (C) 2016 T-Platforms All Rights Reserved.
+ *   Copyright (C) 2016-2018 T-Platforms JSC All Rights Reserved.
  *
  *   This program is free software; you can redistribute it and/or modify it
  *   under the terms and conditions of the GNU General Public License,
@@ -1824,10 +1824,11 @@ static int idt_ntb_peer_msg_write(struct ntb_dev *ntb, int pidx, int midx,
  *                      7. Temperature sensor operations
  *
  *    IDT PCIe-switch has an embedded temperature sensor, which can be used to
- * warn a user-space of possible chip overheating. Since workload temperature
- * can be different on different platforms, temperature thresholds as well as
- * general sensor settings must be setup in the framework of BIOS/EEPROM
- * initializations. It includes the actual sensor enabling as well.
+ * check current chip core temperature. Since a workload environment can be
+ * different on different platforms, an offset and ADC/filter settings can be
+ * specified. Although the offset configuration is only exposed to the sysfs
+ * hwmon interface at the moment. The rest of the settings can be adjusted
+ * for instance by the BIOS/EEPROM firmware.
  *=============================================================================
  */
 
diff --git a/drivers/ntb/hw/idt/ntb_hw_idt.h b/drivers/ntb/hw/idt/ntb_hw_idt.h
index 3517cd2e2baa2..2f1aa121b0cf3 100644
--- a/drivers/ntb/hw/idt/ntb_hw_idt.h
+++ b/drivers/ntb/hw/idt/ntb_hw_idt.h
@@ -4,7 +4,7 @@
  *
  *   GPL LICENSE SUMMARY
  *
- *   Copyright (C) 2016 T-Platforms All Rights Reserved.
+ *   Copyright (C) 2016-2018 T-Platforms JSC All Rights Reserved.
  *
  *   This program is free software; you can redistribute it and/or modify it
  *   under the terms and conditions of the GNU General Public License,
-- 
GitLab


From 734afd4b217f827e71b39abea10f5492c6f27606 Mon Sep 17 00:00:00 2001
From: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Date: Wed, 31 Oct 2018 10:23:05 -0400
Subject: [PATCH 0438/1890] drm/amdgpu: Fix skipping hangged job reset during
 gpu recover.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Problem:
During GPU recover DAL would hang in
amdgpu_pm_compute_clocks->amdgpu_fence_wait_empty

Fix:
Turns out there was a typo introduced by
3320b8d drm/amdgpu: remove job->ring which caused skipping
amdgpu_fence_driver_force_completion and so the hangged job
was never force signaled and this would cause the hang later in DAL.

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Reviewed-by: Christian KÃ¶nig <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index d11489e8b3888..f06d068b8eb51 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3341,7 +3341,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 
 		kthread_park(ring->sched.thread);
 
-		if (job && job->base.sched == &ring->sched)
+		if (job && job->base.sched != &ring->sched)
 			continue;
 
 		drm_sched_hw_job_reset(&ring->sched, job ? &job->base : NULL);
-- 
GitLab


From 5be3bb6e92549f246f472d74bfaa54f56acbf998 Mon Sep 17 00:00:00 2001
From: Evan Quan <evan.quan@amd.com>
Date: Wed, 31 Oct 2018 14:15:04 +0800
Subject: [PATCH 0439/1890] drm/amd/powerplay: no MGPU fan boost enablement on
 DPM disabled

As MGPU fan boost feature will be definitely not needed when
DPM is disabled. So, there is no need to error out.

Signed-off-by: Evan Quan <evan.quan@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/amd_powerplay.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
index bf09735ea3ac7..d6aa1d414320b 100644
--- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
@@ -1318,12 +1318,12 @@ static int pp_enable_mgpu_fan_boost(void *handle)
 {
 	struct pp_hwmgr *hwmgr = handle;
 
-	if (!hwmgr || !hwmgr->pm_en)
+	if (!hwmgr)
 		return -EINVAL;
 
-	if (hwmgr->hwmgr_func->enable_mgpu_fan_boost == NULL) {
+	if (!hwmgr->pm_en ||
+	     hwmgr->hwmgr_func->enable_mgpu_fan_boost == NULL)
 		return 0;
-	}
 
 	mutex_lock(&hwmgr->smu_lock);
 	hwmgr->hwmgr_func->enable_mgpu_fan_boost(hwmgr);
-- 
GitLab


From 17c7c7e714624dd8f00fb6b85a214745fc2bae6b Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Mon, 22 Oct 2018 13:27:37 +0800
Subject: [PATCH 0440/1890] drm/amd/pp: Fix pp_sclk/mclk_od not work on smu7

not update the dpm table with user's setting

Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
index 6c99cbf51c08f..ed35ec0341e67 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
@@ -3588,9 +3588,10 @@ static int smu7_find_dpm_states_clocks_in_dpm_table(struct pp_hwmgr *hwmgr, cons
 			break;
 	}
 
-	if (i >= sclk_table->count)
+	if (i >= sclk_table->count) {
 		data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_SCLK;
-	else {
+		sclk_table->dpm_levels[i-1].value = sclk;
+	} else {
 	/* TODO: Check SCLK in DAL's minimum clocks
 	 * in case DeepSleep divider update is required.
 	 */
@@ -3605,9 +3606,10 @@ static int smu7_find_dpm_states_clocks_in_dpm_table(struct pp_hwmgr *hwmgr, cons
 			break;
 	}
 
-	if (i >= mclk_table->count)
+	if (i >= mclk_table->count) {
 		data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_MCLK;
-
+		mclk_table->dpm_levels[i-1].value = mclk;
+	}
 
 	if (data->display_timing.num_existing_displays != hwmgr->display_config->num_display)
 		data->need_update_smu7_dpm_table |= DPMTABLE_UPDATE_MCLK;
-- 
GitLab


From 47fdd897534704a8a3e77d0091e43479b64d3de1 Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Tue, 30 Oct 2018 20:01:59 +0800
Subject: [PATCH 0441/1890] drm/amd/pp: Fix pp_sclk/mclk_od not work on Vega10

not update dpm table with user's setting.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../drm/amd/powerplay/hwmgr/vega10_hwmgr.c    | 31 +++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index 419a1d77d661e..3fd68df23932b 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -3249,6 +3249,37 @@ static int vega10_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
 static int vega10_find_dpm_states_clocks_in_dpm_table(struct pp_hwmgr *hwmgr, const void *input)
 {
 	struct vega10_hwmgr *data = hwmgr->backend;
+	const struct phm_set_power_state_input *states =
+			(const struct phm_set_power_state_input *)input;
+	const struct vega10_power_state *vega10_ps =
+			cast_const_phw_vega10_power_state(states->pnew_state);
+	struct vega10_single_dpm_table *sclk_table = &(data->dpm_table.gfx_table);
+	uint32_t sclk = vega10_ps->performance_levels
+			[vega10_ps->performance_level_count - 1].gfx_clock;
+	struct vega10_single_dpm_table *mclk_table = &(data->dpm_table.mem_table);
+	uint32_t mclk = vega10_ps->performance_levels
+			[vega10_ps->performance_level_count - 1].mem_clock;
+	uint32_t i;
+
+	for (i = 0; i < sclk_table->count; i++) {
+		if (sclk == sclk_table->dpm_levels[i].value)
+			break;
+	}
+
+	if (i >= sclk_table->count) {
+		data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_SCLK;
+		sclk_table->dpm_levels[i-1].value = sclk;
+	}
+
+	for (i = 0; i < mclk_table->count; i++) {
+		if (mclk == mclk_table->dpm_levels[i].value)
+			break;
+	}
+
+	if (i >= mclk_table->count) {
+		data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_MCLK;
+		mclk_table->dpm_levels[i-1].value = mclk;
+	}
 
 	if (data->display_timing.num_existing_displays != hwmgr->display_config->num_display)
 		data->need_update_dpm_table |= DPMTABLE_UPDATE_MCLK;
-- 
GitLab


From a4c3f247ee94945ac949f3280b7c050c712857cd Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Wed, 31 Oct 2018 19:12:01 +0800
Subject: [PATCH 0442/1890] drm/amd/pp: Print warning if od_sclk/mclk out of
 range

print warning in dmesg to notify user the setting for
sclk_od/mclk_od out of range that vbios can support

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index 3fd68df23932b..8c4db86bb4b77 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -1333,7 +1333,6 @@ static int vega10_setup_default_dpm_tables(struct pp_hwmgr *hwmgr)
 	if (hwmgr->platform_descriptor.overdriveLimit.memoryClock == 0)
 		hwmgr->platform_descriptor.overdriveLimit.memoryClock =
 					dpm_table->dpm_levels[dpm_table->count-1].value;
-
 	vega10_init_dpm_state(&(dpm_table->dpm_state));
 
 	data->dpm_table.eclk_table.count = 0;
@@ -4560,11 +4559,13 @@ static int vega10_set_sclk_od(struct pp_hwmgr *hwmgr, uint32_t value)
 
 	if (vega10_ps->performance_levels
 			[vega10_ps->performance_level_count - 1].gfx_clock >
-			hwmgr->platform_descriptor.overdriveLimit.engineClock)
+			hwmgr->platform_descriptor.overdriveLimit.engineClock) {
 		vega10_ps->performance_levels
 		[vega10_ps->performance_level_count - 1].gfx_clock =
 				hwmgr->platform_descriptor.overdriveLimit.engineClock;
-
+		pr_warn("max sclk supported by vbios is %d\n",
+				hwmgr->platform_descriptor.overdriveLimit.engineClock);
+	}
 	return 0;
 }
 
@@ -4612,10 +4613,13 @@ static int vega10_set_mclk_od(struct pp_hwmgr *hwmgr, uint32_t value)
 
 	if (vega10_ps->performance_levels
 			[vega10_ps->performance_level_count - 1].mem_clock >
-			hwmgr->platform_descriptor.overdriveLimit.memoryClock)
+			hwmgr->platform_descriptor.overdriveLimit.memoryClock) {
 		vega10_ps->performance_levels
 		[vega10_ps->performance_level_count - 1].mem_clock =
 				hwmgr->platform_descriptor.overdriveLimit.memoryClock;
+		pr_warn("max mclk supported by vbios is %d\n",
+				hwmgr->platform_descriptor.overdriveLimit.memoryClock);
+	}
 
 	return 0;
 }
-- 
GitLab


From 9d064be1e6a195eaaa3762af5c7c6cd3f66aa6cc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <ckoenig.leichtzumerken@gmail.com>
Date: Wed, 24 Oct 2018 14:59:10 +0200
Subject: [PATCH 0443/1890] drm/amdgpu: revert "enable gfxoff in non-sriov and
 stutter mode by default"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is still completely breaking my Raven system.

This reverts commit cdf2f910fa969adca1b0e3ad2b487821233dc038.

Revert until we sort out the sbios and firmware combinations that work
correctly.

bug: https://bugs.freedesktop.org/show_bug.cgi?id=108606
Cc: stable@vger.kernel.org # v4.19

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Christian KÃ¶nig <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    | 4 ++--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index f06d068b8eb51..30bc345d6fdf0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1493,8 +1493,6 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
 	}
 
 	adev->powerplay.pp_feature = amdgpu_pp_feature_mask;
-	if (amdgpu_sriov_vf(adev))
-		adev->powerplay.pp_feature &= ~PP_GFXOFF_MASK;
 
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 		if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 28781414d71c8..943dbf3c5da12 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -114,8 +114,8 @@ uint amdgpu_pg_mask = 0xffffffff;
 uint amdgpu_sdma_phase_quantum = 32;
 char *amdgpu_disable_cu = NULL;
 char *amdgpu_virtual_display = NULL;
-/* OverDrive(bit 14) disabled by default*/
-uint amdgpu_pp_feature_mask = 0xffffbfff;
+/* OverDrive(bit 14),gfxoff(bit 15),stutter mode(bit 17) disabled by default*/
+uint amdgpu_pp_feature_mask = 0xfffd3fff;
 int amdgpu_ngg = 0;
 int amdgpu_prim_buf_per_se = 0;
 int amdgpu_pos_buf_per_se = 0;
-- 
GitLab


From 3eb2ebcb8e8d2e57d7a9f21ffe1dc82bf0322bdc Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 19 Oct 2018 14:09:44 +0200
Subject: [PATCH 0444/1890] scsi: aha152x: rename the PCMCIA define

We plan to enable building the PCMCIA core and drivers, and the
non-prefixed PCMCIA name clashes with some arch headers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 drivers/scsi/aha152x.c             | 14 +++++++-------
 drivers/scsi/pcmcia/aha152x_core.c |  2 +-
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/scsi/aha152x.c b/drivers/scsi/aha152x.c
index 4d7b0e0adbf70..301b3cad15f88 100644
--- a/drivers/scsi/aha152x.c
+++ b/drivers/scsi/aha152x.c
@@ -269,7 +269,7 @@ static LIST_HEAD(aha152x_host_list);
 /* DEFINES */
 
 /* For PCMCIA cards, always use AUTOCONF */
-#if defined(PCMCIA) || defined(MODULE)
+#if defined(AHA152X_PCMCIA) || defined(MODULE)
 #if !defined(AUTOCONF)
 #define AUTOCONF
 #endif
@@ -297,7 +297,7 @@ CMD_INC_RESID(struct scsi_cmnd *cmd, int inc)
 
 #define DELAY_DEFAULT 1000
 
-#if defined(PCMCIA)
+#if defined(AHA152X_PCMCIA)
 #define IRQ_MIN 0
 #define IRQ_MAX 16
 #else
@@ -328,7 +328,7 @@ MODULE_AUTHOR("JÃ¼rgen Fischer");
 MODULE_DESCRIPTION(AHA152X_REVID);
 MODULE_LICENSE("GPL");
 
-#if !defined(PCMCIA)
+#if !defined(AHA152X_PCMCIA)
 #if defined(MODULE)
 static int io[] = {0, 0};
 module_param_hw_array(io, int, ioport, NULL, 0);
@@ -391,7 +391,7 @@ static struct isapnp_device_id id_table[] = {
 MODULE_DEVICE_TABLE(isapnp, id_table);
 #endif /* ISAPNP */
 
-#endif /* !PCMCIA */
+#endif /* !AHA152X_PCMCIA */
 
 static struct scsi_host_template aha152x_driver_template;
 
@@ -863,7 +863,7 @@ void aha152x_release(struct Scsi_Host *shpnt)
 	if (shpnt->irq)
 		free_irq(shpnt->irq, shpnt);
 
-#if !defined(PCMCIA)
+#if !defined(AHA152X_PCMCIA)
 	if (shpnt->io_port)
 		release_region(shpnt->io_port, IO_RANGE);
 #endif
@@ -2924,7 +2924,7 @@ static struct scsi_host_template aha152x_driver_template = {
 	.slave_alloc			= aha152x_adjust_queue,
 };
 
-#if !defined(PCMCIA)
+#if !defined(AHA152X_PCMCIA)
 static int setup_count;
 static struct aha152x_setup setup[2];
 
@@ -3392,4 +3392,4 @@ static int __init aha152x_setup(char *str)
 __setup("aha152x=", aha152x_setup);
 #endif
 
-#endif /* !PCMCIA */
+#endif /* !AHA152X_PCMCIA */
diff --git a/drivers/scsi/pcmcia/aha152x_core.c b/drivers/scsi/pcmcia/aha152x_core.c
index dba3716511c56..24b89228b2414 100644
--- a/drivers/scsi/pcmcia/aha152x_core.c
+++ b/drivers/scsi/pcmcia/aha152x_core.c
@@ -1,3 +1,3 @@
-#define PCMCIA	1
+#define AHA152X_PCMCIA 1
 #define AHA152X_STAT 1
 #include "aha152x.c"
-- 
GitLab


From 3905361b723ad9fbc74b3e449acc1d52bee1b4ce Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 19 Oct 2018 14:09:46 +0200
Subject: [PATCH 0445/1890] powerpc: remove CONFIG_PCI_QSPAN

This option isn't actually used anywhere.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 arch/powerpc/Kconfig | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index e84943d24e5c7..2ae8a46bce980 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -946,7 +946,6 @@ config PCI
 	bool "PCI support" if PPC_PCI_CHOICE
 	default y if !40x && !CPM2 && !PPC_8xx && !PPC_83xx \
 		&& !PPC_85xx && !PPC_86xx && !GAMECUBE_COMMON
-	default PCI_QSPAN if PPC_8xx
 	select GENERIC_PCI_IOMAP
 	help
 	  Find out whether your system includes a PCI bus. PCI is the name of
@@ -960,14 +959,6 @@ config PCI_DOMAINS
 config PCI_SYSCALL
 	def_bool PCI
 
-config PCI_QSPAN
-	bool "QSpan PCI"
-	depends on PPC_8xx
-	select PPC_I8259
-	help
-	  Say Y here if you have a system based on a Motorola 8xx-series
-	  embedded processor with a QSPAN PCI interface, otherwise say N.
-
 config PCI_8260
 	bool
 	depends on PCI && 8260
-- 
GitLab


From c8bf9212d1c2df2080e8870da5b791354c20376e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 19 Oct 2018 14:09:47 +0200
Subject: [PATCH 0446/1890] powerpc: remove CONFIG_MCA leftovers

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 arch/powerpc/Kconfig | 4 ----
 drivers/scsi/Kconfig | 6 +++---
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 2ae8a46bce980..0db10c2792b2b 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -932,10 +932,6 @@ config FSL_GTM
 	help
 	  Freescale General-purpose Timers support
 
-# Yes MCA RS/6000s exist but Linux-PPC does not currently support any
-config MCA
-	bool
-
 # Platforms that what PCI turned unconditionally just do select PCI
 # in their config node.  Platforms that want to choose at config
 # time should select PPC_PCI_CHOICE
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 70988c3812684..f07444d30b216 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -538,7 +538,7 @@ config SCSI_HPTIOP
 
 config SCSI_BUSLOGIC
 	tristate "BusLogic SCSI support"
-	depends on (PCI || ISA || MCA) && SCSI && ISA_DMA_API && VIRT_TO_BUS
+	depends on (PCI || ISA) && SCSI && ISA_DMA_API && VIRT_TO_BUS
 	---help---
 	  This is support for BusLogic MultiMaster and FlashPoint SCSI Host
 	  Adapters. Consult the SCSI-HOWTO, available from
@@ -1175,12 +1175,12 @@ config SCSI_LPFC_DEBUG_FS
 
 config SCSI_SIM710
 	tristate "Simple 53c710 SCSI support (Compaq, NCR machines)"
-	depends on (EISA || MCA) && SCSI
+	depends on EISA && SCSI
 	select SCSI_SPI_ATTRS
 	---help---
 	  This driver is for NCR53c710 based SCSI host adapters.
 
-	  It currently supports Compaq EISA cards and NCR MCA cards
+	  It currently supports Compaq EISA cards.
 
 config SCSI_DC395x
 	tristate "Tekram DC395(U/UW/F) and DC315(U) SCSI support"
-- 
GitLab


From a15687ca7b927c02fde9ab68ecbbc3043fb72f25 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 19 Oct 2018 14:09:48 +0200
Subject: [PATCH 0447/1890] powerpc: PCI_MSI needs PCI

Various powerpc boards select the PCI_MSI config option without selecting
PCI, resulting in potentially not compilable configurations if the by
default enabled PCI option is disabled.  Explicitly select PCI to ensure
we always have valid configs.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 arch/powerpc/platforms/40x/Kconfig | 1 +
 arch/powerpc/platforms/44x/Kconfig | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/arch/powerpc/platforms/40x/Kconfig b/arch/powerpc/platforms/40x/Kconfig
index 2a9d66254ffc5..5326ece361204 100644
--- a/arch/powerpc/platforms/40x/Kconfig
+++ b/arch/powerpc/platforms/40x/Kconfig
@@ -29,6 +29,7 @@ config KILAUEA
 	select 405EX
 	select PPC40x_SIMPLE
 	select PPC4xx_PCI_EXPRESS
+	select PCI
 	select PCI_MSI
 	select PPC4xx_MSI
 	help
diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig
index f024efd5a4c20..9a85d350b1b6c 100644
--- a/arch/powerpc/platforms/44x/Kconfig
+++ b/arch/powerpc/platforms/44x/Kconfig
@@ -21,6 +21,7 @@ config BLUESTONE
 	depends on 44x
 	select PPC44x_SIMPLE
 	select APM821xx
+	select PCI
 	select PCI_MSI
 	select PPC4xx_MSI
 	select PPC4xx_PCI_EXPRESS
@@ -200,6 +201,7 @@ config AKEBONO
 	select SWIOTLB
 	select 476FPE
 	select PPC4xx_PCI_EXPRESS
+	select PCI
 	select PCI_MSI
 	select PPC4xx_HSTA_MSI
 	select I2C
-- 
GitLab


From 04c459d204484fa4747d29c24f00df11fe6334d4 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 30 Oct 2018 00:41:27 +0900
Subject: [PATCH 0448/1890] kconfig: remove oldnoconfig target

As commit 312ee68752fa ("kconfig: announce removal of oldnoconfig if
used") announced, it is time for the removal.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 scripts/kconfig/Makefile | 9 +--------
 scripts/kconfig/conf.c   | 7 -------
 2 files changed, 1 insertion(+), 15 deletions(-)

diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile
index 67ed9f6ccdf8f..5d37a604ecc84 100644
--- a/scripts/kconfig/Makefile
+++ b/scripts/kconfig/Makefile
@@ -68,14 +68,7 @@ PHONY += $(simple-targets)
 $(simple-targets): $(obj)/conf
 	$< $(silent) --$@ $(Kconfig)
 
-PHONY += oldnoconfig silentoldconfig savedefconfig defconfig
-
-# oldnoconfig is an alias of olddefconfig, because people already are dependent
-# on its behavior (sets new symbols to their default value but not 'n') with the
-# counter-intuitive name.
-oldnoconfig: olddefconfig
-	@echo "  WARNING: \"oldnoconfig\" target will be removed after Linux 4.19"
-	@echo "            Please use \"olddefconfig\" instead, which is an alias."
+PHONY += silentoldconfig savedefconfig defconfig
 
 # We do not expect manual invokcation of "silentoldcofig" (or "syncconfig").
 silentoldconfig: syncconfig
diff --git a/scripts/kconfig/conf.c b/scripts/kconfig/conf.c
index 7b2b37260669e..98e0c7a34699e 100644
--- a/scripts/kconfig/conf.c
+++ b/scripts/kconfig/conf.c
@@ -460,12 +460,6 @@ static struct option long_opts[] = {
 	{"randconfig",      no_argument,       NULL, randconfig},
 	{"listnewconfig",   no_argument,       NULL, listnewconfig},
 	{"olddefconfig",    no_argument,       NULL, olddefconfig},
-	/*
-	 * oldnoconfig is an alias of olddefconfig, because people already
-	 * are dependent on its behavior(sets new symbols to their default
-	 * value but not 'n') with the counter-intuitive name.
-	 */
-	{"oldnoconfig",     no_argument,       NULL, olddefconfig},
 	{NULL, 0, NULL, 0}
 };
 
@@ -480,7 +474,6 @@ static void conf_usage(const char *progname)
 	printf("  --syncconfig            Similar to oldconfig but generates configuration in\n"
 	       "                          include/{generated/,config/}\n");
 	printf("  --olddefconfig          Same as oldconfig but sets new symbols to their default value\n");
-	printf("  --oldnoconfig           An alias of olddefconfig\n");
 	printf("  --defconfig <file>      New config with default defined in <file>\n");
 	printf("  --savedefconfig <file>  Save the minimal current configuration to <file>\n");
 	printf("  --allnoconfig           New config where all options are answered with no\n");
-- 
GitLab


From 0085b4191f3e30ec94beef4103679f2828d3c54f Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 30 Oct 2018 00:41:28 +0900
Subject: [PATCH 0449/1890] kconfig: remove silentoldconfig target

As commit 911a91c39cab ("kconfig: rename silentoldconfig to
syncconfig") announced, it is time for the removal.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Acked-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 Documentation/networking/ice.rst | 2 +-
 scripts/kconfig/Makefile         | 9 +--------
 2 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/Documentation/networking/ice.rst b/Documentation/networking/ice.rst
index 1e4948c9e9897..4d118b827bbb7 100644
--- a/Documentation/networking/ice.rst
+++ b/Documentation/networking/ice.rst
@@ -20,7 +20,7 @@ Enabling the driver
 The driver is enabled via the standard kernel configuration system,
 using the make command::
 
-  make oldconfig/silentoldconfig/menuconfig/etc.
+  make oldconfig/menuconfig/etc.
 
 The driver is located in the menu structure at:
 
diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile
index 5d37a604ecc84..63b609243d037 100644
--- a/scripts/kconfig/Makefile
+++ b/scripts/kconfig/Makefile
@@ -68,14 +68,7 @@ PHONY += $(simple-targets)
 $(simple-targets): $(obj)/conf
 	$< $(silent) --$@ $(Kconfig)
 
-PHONY += silentoldconfig savedefconfig defconfig
-
-# We do not expect manual invokcation of "silentoldcofig" (or "syncconfig").
-silentoldconfig: syncconfig
-	@echo "  WARNING: \"silentoldconfig\" has been renamed to \"syncconfig\""
-	@echo "            and is now an internal implementation detail."
-	@echo "            What you want is probably \"oldconfig\"."
-	@echo "            \"silentoldconfig\" will be removed after Linux 4.19"
+PHONY += savedefconfig defconfig
 
 savedefconfig: $(obj)/conf
 	$< $(silent) --$@=defconfig $(Kconfig)
-- 
GitLab


From 3f80babd9ca477d19a027e0b25b95973cd7f7357 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 30 Oct 2018 01:03:54 +0900
Subject: [PATCH 0450/1890] kbuild: remove unused cc-fullversion variable

The last user of cc-fullversion was removed by commit f2910f0e6835
("powerpc: remove old GCC version checks").

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 Documentation/kbuild/makefiles.txt | 15 ---------------
 scripts/Kbuild.include             |  4 ----
 2 files changed, 19 deletions(-)

diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt
index 7b6a2b2bdc98d..8da26c6dd886a 100644
--- a/Documentation/kbuild/makefiles.txt
+++ b/Documentation/kbuild/makefiles.txt
@@ -537,21 +537,6 @@ more details, with real examples.
 	The third parameter may be a text as in this example, but it may also
 	be an expanded variable or a macro.
 
-    cc-fullversion
-	cc-fullversion is useful when the exact version of gcc is needed.
-	One typical use-case is when a specific GCC version is broken.
-	cc-fullversion points out a more specific version than cc-version does.
-
-	Example:
-		#arch/powerpc/Makefile
-		$(Q)if test "$(cc-fullversion)" = "040200" ; then \
-			echo -n '*** GCC-4.2.0 cannot compile the 64-bit powerpc ' ; \
-			false ; \
-		fi
-
-	In this example for a specific GCC version the build will error out
-	explaining to the user why it stops.
-
     cc-cross-prefix
 	cc-cross-prefix is used to check if there exists a $(CC) in path with
 	one of the listed prefixes. The first prefix where there exist a
diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index ca21a35fa244e..19a63db62fb9f 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -147,10 +147,6 @@ cc-name = $(shell $(CC) -v 2>&1 | grep -q "clang version" && echo clang || echo
 # cc-version
 cc-version = $(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-version.sh $(CC))
 
-# cc-fullversion
-cc-fullversion = $(shell $(CONFIG_SHELL) \
-	$(srctree)/scripts/gcc-version.sh -p $(CC))
-
 # cc-ifversion
 # Usage:  EXTRA_CFLAGS += $(call cc-ifversion, -lt, 0402, -O1)
 cc-ifversion = $(shell [ $(cc-version) $(1) $(2) ] && echo $(3) || echo $(4))
-- 
GitLab


From 2cd3faf87d2d8f6123adf34741b9a7b98828a76f Mon Sep 17 00:00:00 2001
From: Petr Vorel <petr.vorel@gmail.com>
Date: Mon, 29 Oct 2018 22:10:58 +0100
Subject: [PATCH 0451/1890] merge_config.sh: Allow to define config prefix

with CONFIG_ environment variable.

merge_config.sh uses CONFIG_ which is used in kernel and other projects.
There are some projects which use kconfig with different prefixes (e.g.
buildroot: BR2_ prefix). CONFIG_ variable is already used for this
purpose in kconfig binary (scripts/kconfig/lkc.h), let's use the same
rule for in merge_config.sh.

Suggested-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Petr Vorel <petr.vorel@gmail.com>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 scripts/kconfig/merge_config.sh | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/scripts/kconfig/merge_config.sh b/scripts/kconfig/merge_config.sh
index 67d1314476314..da66e7742282a 100755
--- a/scripts/kconfig/merge_config.sh
+++ b/scripts/kconfig/merge_config.sh
@@ -33,12 +33,15 @@ usage() {
 	echo "  -n    use allnoconfig instead of alldefconfig"
 	echo "  -r    list redundant entries when merging fragments"
 	echo "  -O    dir to put generated output files.  Consider setting \$KCONFIG_CONFIG instead."
+	echo
+	echo "Used prefix: '$CONFIG_PREFIX'. You can redefine it with \$CONFIG_ environment variable."
 }
 
 RUNMAKE=true
 ALLTARGET=alldefconfig
 WARNREDUN=false
 OUTPUT=.
+CONFIG_PREFIX=${CONFIG_-CONFIG_}
 
 while true; do
 	case $1 in
@@ -99,7 +102,8 @@ if [ ! -r "$INITFILE" ]; then
 fi
 
 MERGE_LIST=$*
-SED_CONFIG_EXP="s/^\(# \)\{0,1\}\(CONFIG_[a-zA-Z0-9_]*\)[= ].*/\2/p"
+SED_CONFIG_EXP="s/^\(# \)\{0,1\}\(${CONFIG_PREFIX}[a-zA-Z0-9_]*\)[= ].*/\2/p"
+
 TMP_FILE=$(mktemp ./.tmp.config.XXXXXXXXXX)
 
 echo "Using $INITFILE as base"
-- 
GitLab


From febf8a3712e4209b7e650b37b3b240a2b387794d Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 1 Nov 2018 10:34:34 -0300
Subject: [PATCH 0452/1890] perf examples bpf: Start augmenting
 raw_syscalls:sys_{start,exit}

The previous approach of attaching to each syscall showed how it is
possible to augment tracepoints and use that augmentation, pointer
payloads, in the existing beautifiers in 'perf trace', but for a more
general solution we now will try to augment the main
raw_syscalls:sys_{enter,exit} syscalls, and then pass instructions in
maps so that it knows which syscalls and which pointer contents, and how
many bytes for each of the arguments should be copied.

Start with just the bare minimum to collect what is provided by those
two tracepoints via the __augmented_syscalls__ map + bpf-output perf
event, which results in perf trace showing them without connecting
enter+exit:

  # perf trace -e tools/perf/examples/bpf/augmented_raw_syscalls.c sleep 1
     0.000 sleep/11563 raw_syscalls:sys_exit:NR 59 = 0
     0.019 (         ): sleep/11563 brk() ...
     0.021 sleep/11563 raw_syscalls:sys_exit:NR 12 = 94682642325504
     0.033 (         ): sleep/11563 access(filename:, mode: R) ...
     0.037 sleep/11563 raw_syscalls:sys_exit:NR 21 = -2
     0.041 (         ): sleep/11563 openat(dfd: CWD, filename: , flags: CLOEXEC) ...
     0.044 sleep/11563 raw_syscalls:sys_exit:NR 257 = 3
     0.045 (         ): sleep/11563 fstat(fd: 3, statbuf: 0x7ffdbf7119b0) ...
     0.046 sleep/11563 raw_syscalls:sys_exit:NR 5 = 0
     0.047 (         ): sleep/11563 mmap(len: 103334, prot: READ, flags: PRIVATE, fd: 3) ...
     0.049 sleep/11563 raw_syscalls:sys_exit:NR 9 = 140196285493248
     0.050 (         ): sleep/11563 close(fd: 3) ...
     0.051 sleep/11563 raw_syscalls:sys_exit:NR 3 = 0
     0.059 (         ): sleep/11563 openat(dfd: CWD, filename: , flags: CLOEXEC) ...
     0.062 sleep/11563 raw_syscalls:sys_exit:NR 257 = 3
     0.063 (         ): sleep/11563 read(fd: 3, buf: 0x7ffdbf711b78, count: 832) ...
     0.065 sleep/11563 raw_syscalls:sys_exit:NR 0 = 832
     0.066 (         ): sleep/11563 fstat(fd: 3, statbuf: 0x7ffdbf711a10) ...
     0.067 sleep/11563 raw_syscalls:sys_exit:NR 5 = 0
     0.068 (         ): sleep/11563 mmap(len: 8192, prot: READ|WRITE, flags: PRIVATE|ANONYMOUS) ...
     0.070 sleep/11563 raw_syscalls:sys_exit:NR 9 = 140196285485056
     0.073 (         ): sleep/11563 mmap(len: 3889792, prot: EXEC|READ, flags: PRIVATE|DENYWRITE, fd: 3) ...
     0.076 sleep/11563 raw_syscalls:sys_exit:NR 9 = 140196279463936
     0.077 (         ): sleep/11563 mprotect(start: 0x7f81fd8a8000, len: 2093056) ...
     0.083 sleep/11563 raw_syscalls:sys_exit:NR 10 = 0
     0.084 (         ): sleep/11563 mmap(addr: 0x7f81fdaa7000, len: 24576, prot: READ|WRITE, flags: PRIVATE|FIXED|DENYWRITE, fd: 3, off: 1753088) ...
     0.088 sleep/11563 raw_syscalls:sys_exit:NR 9 = 140196283314176
     0.091 (         ): sleep/11563 mmap(addr: 0x7f81fdaad000, len: 14976, prot: READ|WRITE, flags: PRIVATE|FIXED|ANONYMOUS) ...
     0.093 sleep/11563 raw_syscalls:sys_exit:NR 9 = 140196283338752
     0.097 (         ): sleep/11563 close(fd: 3) ...
     0.098 sleep/11563 raw_syscalls:sys_exit:NR 3 = 0
     0.107 (         ): sleep/11563 arch_prctl(option: 4098, arg2: 140196285490432) ...
     0.108 sleep/11563 raw_syscalls:sys_exit:NR 158 = 0
     0.143 (         ): sleep/11563 mprotect(start: 0x7f81fdaa7000, len: 16384, prot: READ) ...
     0.146 sleep/11563 raw_syscalls:sys_exit:NR 10 = 0
     0.157 (         ): sleep/11563 mprotect(start: 0x561d037e7000, len: 4096, prot: READ) ...
     0.160 sleep/11563 raw_syscalls:sys_exit:NR 10 = 0
     0.163 (         ): sleep/11563 mprotect(start: 0x7f81fdcd5000, len: 4096, prot: READ) ...
     0.165 sleep/11563 raw_syscalls:sys_exit:NR 10 = 0
     0.166 (         ): sleep/11563 munmap(addr: 0x7f81fdcbb000, len: 103334) ...
     0.174 sleep/11563 raw_syscalls:sys_exit:NR 11 = 0
     0.216 (         ): sleep/11563 brk() ...
     0.217 sleep/11563 raw_syscalls:sys_exit:NR 12 = 94682642325504
     0.217 (         ): sleep/11563 brk(brk: 0x561d05453000) ...
     0.219 sleep/11563 raw_syscalls:sys_exit:NR 12 = 94682642460672
     0.220 (         ): sleep/11563 brk() ...
     0.221 sleep/11563 raw_syscalls:sys_exit:NR 12 = 94682642460672
     0.224 (         ): sleep/11563 open(filename: , flags: CLOEXEC) ...
     0.228 sleep/11563 raw_syscalls:sys_exit:NR 2 = 3
     0.229 (         ): sleep/11563 fstat(fd: 3, statbuf: 0x7f81fdaacaa0) ...
     0.230 sleep/11563 raw_syscalls:sys_exit:NR 5 = 0
     0.231 (         ): sleep/11563 mmap(len: 113045344, prot: READ, flags: PRIVATE, fd: 3) ...
     0.234 sleep/11563 raw_syscalls:sys_exit:NR 9 = 140196166418432
     0.237 (         ): sleep/11563 close(fd: 3) ...
     0.238 sleep/11563 raw_syscalls:sys_exit:NR 3 = 0
     0.262 (         ): sleep/11563 nanosleep(rqtp: 0x7ffdbf7126f0) ...
  1000.399 sleep/11563 raw_syscalls:sys_exit:NR 35 = 0
  1000.440 (         ): sleep/11563 close(fd: 1) ...
  1000.447 sleep/11563 raw_syscalls:sys_exit:NR 3 = 0
  1000.454 (         ): sleep/11563 close(fd: 2) ...
  1000.468 (         ): sleep/11563 exit_group(                                                           )
  #

In the next csets we'll connect those events to the existing enter/exit
raw_syscalls handlers in 'perf trace', just like we did with the
syscalls:sys_{enter,exit}_* tracepoints.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-5nl8l4hx1tl9pqdx65nkp6pw@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../examples/bpf/augmented_raw_syscalls.c     | 59 +++++++++++++++++++
 1 file changed, 59 insertions(+)
 create mode 100644 tools/perf/examples/bpf/augmented_raw_syscalls.c

diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c
new file mode 100644
index 0000000000000..cde91c34b1016
--- /dev/null
+++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Augment the raw_syscalls tracepoints with the contents of the pointer arguments.
+ *
+ * Test it with:
+ *
+ * perf trace -e tools/perf/examples/bpf/augmented_raw_syscalls.c cat /etc/passwd > /dev/null
+ *
+ * This exactly matches what is marshalled into the raw_syscall:sys_enter
+ * payload expected by the 'perf trace' beautifiers.
+ *
+ * For now it just uses the existing tracepoint augmentation code in 'perf
+ * trace', in the next csets we'll hook up these with the sys_enter/sys_exit
+ * code that will combine entry/exit in a strace like way.
+ */
+
+#include <stdio.h>
+#include <linux/socket.h>
+
+/* bpf-output associated map */
+struct bpf_map SEC("maps") __augmented_syscalls__ = {
+	.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(u32),
+	.max_entries = __NR_CPUS__,
+};
+
+struct syscall_enter_args {
+	unsigned long long common_tp_fields;
+	long		   syscall_nr;
+	unsigned long	   args[6];
+};
+
+struct syscall_exit_args {
+	unsigned long long common_tp_fields;
+	long		   syscall_nr;
+	long		   ret;
+};
+
+SEC("raw_syscalls:sys_enter")
+int sys_enter(struct syscall_enter_args *args)
+{
+	struct {
+		struct syscall_enter_args args;
+	} augmented_args;
+	unsigned int len = sizeof(augmented_args);
+
+	probe_read(&augmented_args.args, sizeof(augmented_args.args), args);
+	perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len);
+	return 0;
+}
+
+SEC("raw_syscalls:sys_exit")
+int sys_exit(struct syscall_exit_args *args)
+{
+	return 1; /* 0 as soon as we start copying data returned by the kernel, e.g. 'read' */
+}
+
+license(GPL);
-- 
GitLab


From 3c5e3dabf3722a883227623a4adf61976c2224ff Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 1 Nov 2018 13:50:35 -0300
Subject: [PATCH 0453/1890] perf trace: When augmenting raw_syscalls plug
 raw_syscalls:sys_exit too

With just this commit we get to support all syscalls via hooking
raw_syscalls:sys_{enter,exit} to the trace__sys_{enter,exit} routines
to combine, strace-like, those tracepoints.

  # trace -e tools/perf/examples/bpf/augmented_raw_syscalls.c sleep 1
         ? (         ): sleep/31680  ... [continued]: execve()) = 0
     0.043 ( 0.004 ms): sleep/31680 brk() = 0x55652a851000
     0.070 ( 0.009 ms): sleep/31680 access(filename:, mode: R) = -1 ENOENT No such file or directory
     0.087 ( 0.006 ms): sleep/31680 openat(dfd: CWD, filename: , flags: CLOEXEC) = 3
     0.096 ( 0.003 ms): sleep/31680 fstat(fd: 3, statbuf: 0x7ffc5269e190) = 0
     0.101 ( 0.005 ms): sleep/31680 mmap(len: 103334, prot: READ, flags: PRIVATE, fd: 3) = 0x7f709c239000
     0.109 ( 0.002 ms): sleep/31680 close(fd: 3) = 0
     0.126 ( 0.006 ms): sleep/31680 openat(dfd: CWD, filename: , flags: CLOEXEC) = 3
     0.135 ( 0.003 ms): sleep/31680 read(fd: 3, buf: 0x7ffc5269e358, count: 832) = 832
     0.141 ( 0.002 ms): sleep/31680 fstat(fd: 3, statbuf: 0x7ffc5269e1f0) = 0
     0.146 ( 0.005 ms): sleep/31680 mmap(len: 8192, prot: READ|WRITE, flags: PRIVATE|ANONYMOUS) = 0x7f709c237000
     0.159 ( 0.007 ms): sleep/31680 mmap(len: 3889792, prot: EXEC|READ, flags: PRIVATE|DENYWRITE, fd: 3) = 0x7f709bc79000
     0.168 ( 0.009 ms): sleep/31680 mprotect(start: 0x7f709be26000, len: 2093056) = 0
     0.179 ( 0.010 ms): sleep/31680 mmap(addr: 0x7f709c025000, len: 24576, prot: READ|WRITE, flags: PRIVATE|FIXED|DENYWRITE, fd: 3, off: 1753088) = 0x7f709c025000
     0.196 ( 0.005 ms): sleep/31680 mmap(addr: 0x7f709c02b000, len: 14976, prot: READ|WRITE, flags: PRIVATE|FIXED|ANONYMOUS) = 0x7f709c02b000
     0.210 ( 0.002 ms): sleep/31680 close(fd: 3) = 0
     0.230 ( 0.002 ms): sleep/31680 arch_prctl(option: 4098, arg2: 140121632638208) = 0
     0.306 ( 0.009 ms): sleep/31680 mprotect(start: 0x7f709c025000, len: 16384, prot: READ) = 0
     0.338 ( 0.005 ms): sleep/31680 mprotect(start: 0x556529607000, len: 4096, prot: READ) = 0
     0.348 ( 0.005 ms): sleep/31680 mprotect(start: 0x7f709c253000, len: 4096, prot: READ) = 0
     0.356 ( 0.019 ms): sleep/31680 munmap(addr: 0x7f709c239000, len: 103334) = 0
     0.463 ( 0.002 ms): sleep/31680 brk() = 0x55652a851000
     0.468 ( 0.004 ms): sleep/31680 brk(brk: 0x55652a872000) = 0x55652a872000
     0.474 ( 0.002 ms): sleep/31680 brk() = 0x55652a872000
     0.484 ( 0.008 ms): sleep/31680 open(filename: , flags: CLOEXEC) = 3
     0.497 ( 0.002 ms): sleep/31680 fstat(fd: 3, statbuf: 0x7f709c02aaa0) = 0
     0.501 ( 0.006 ms): sleep/31680 mmap(len: 113045344, prot: READ, flags: PRIVATE, fd: 3) = 0x7f70950aa000
     0.514 ( 0.002 ms): sleep/31680 close(fd: 3) = 0
     0.554 (1000.140 ms): sleep/31680 nanosleep(rqtp: 0x7ffc5269eed0) = 0
  1000.734 ( 0.007 ms): sleep/31680 close(fd: 1) = 0
  1000.748 ( 0.004 ms): sleep/31680 close(fd: 2) = 0
  1000.769 (         ): sleep/31680 exit_group()
  #

Now to allow selecting which syscalls should be traced, using a map.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-votqqmqhag8e1i9mgyzfez3o@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-trace.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index dc8a6c4986ce2..f582ca575883e 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -3501,7 +3501,8 @@ int cmd_trace(int argc, const char **argv)
 		evsel->handler = trace__sys_enter;
 
 		evlist__for_each_entry(trace.evlist, evsel) {
-			if (strstarts(perf_evsel__name(evsel), "syscalls:sys_exit_")) {
+			if (strstarts(perf_evsel__name(evsel), "syscalls:sys_exit_") ||
+			    strcmp(perf_evsel__name(evsel), "raw_syscalls:sys_exit") == 0) {
 				perf_evsel__init_augmented_syscall_tp(evsel);
 				perf_evsel__init_augmented_syscall_tp_ret(evsel);
 				evsel->handler = trace__sys_exit;
-- 
GitLab


From c3be6577d82a9f0163eb1e2c37a477414d12a209 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Thu, 1 Nov 2018 17:51:34 +0000
Subject: [PATCH 0454/1890] SUNRPC: Use atomic(64)_t for seq_send(64)

The seq_send & seq_send64 fields in struct krb5_ctx are used as
atomically incrementing counters. This is implemented using cmpxchg() &
cmpxchg64() to implement what amount to custom versions of
atomic_fetch_inc() & atomic64_fetch_inc().

Besides the duplication, using cmpxchg64() has another major drawback in
that some 32 bit architectures don't provide it. As such commit
571ed1fd2390 ("SUNRPC: Replace krb5_seq_lock with a lockless scheme")
resulted in build failures for some architectures.

Change seq_send to be an atomic_t and seq_send64 to be an atomic64_t,
then use atomic(64)_* functions to manipulate the values. The atomic64_t
type & associated functions are provided even on architectures which
lack real 64 bit atomic memory access via CONFIG_GENERIC_ATOMIC64 which
uses spinlocks to serialize access. This fixes the build failures for
architectures lacking cmpxchg64().

A potential alternative that was raised would be to provide cmpxchg64()
on the 32 bit architectures that currently lack it, using spinlocks.
However this would provide a version of cmpxchg64() with semantics a
little different to the implementations on architectures with real 64
bit atomics - the spinlock-based implementation would only work if all
access to the memory used with cmpxchg64() is *always* performed using
cmpxchg64(). That is not currently a requirement for users of
cmpxchg64(), and making it one seems questionable. As such avoiding
cmpxchg64() outside of architecture-specific code seems best,
particularly in cases where atomic64_t seems like a better fit anyway.

The CONFIG_GENERIC_ATOMIC64 implementation of atomic64_* functions will
use spinlocks & so faces the same issue, but with the key difference
that the memory backing an atomic64_t ought to always be accessed via
the atomic64_* functions anyway making the issue moot.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Fixes: 571ed1fd2390 ("SUNRPC: Replace krb5_seq_lock with a lockless scheme")
Cc: Trond Myklebust <trond.myklebust@hammerspace.com>
Cc: Anna Schumaker <anna.schumaker@netapp.com>
Cc: J. Bruce Fields <bfields@fieldses.org>
Cc: Jeff Layton <jlayton@kernel.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: linux-nfs@vger.kernel.org
Cc: netdev@vger.kernel.org
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/gss_krb5.h     |  7 ++-----
 net/sunrpc/auth_gss/gss_krb5_mech.c | 16 ++++++++++------
 net/sunrpc/auth_gss/gss_krb5_seal.c | 28 ++--------------------------
 net/sunrpc/auth_gss/gss_krb5_wrap.c |  4 ++--
 4 files changed, 16 insertions(+), 39 deletions(-)

diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h
index 69f749afa617b..4162de72e95c5 100644
--- a/include/linux/sunrpc/gss_krb5.h
+++ b/include/linux/sunrpc/gss_krb5.h
@@ -107,8 +107,8 @@ struct krb5_ctx {
 	u8			Ksess[GSS_KRB5_MAX_KEYLEN]; /* session key */
 	u8			cksum[GSS_KRB5_MAX_KEYLEN];
 	s32			endtime;
-	u32			seq_send;
-	u64			seq_send64;
+	atomic_t		seq_send;
+	atomic64_t		seq_send64;
 	struct xdr_netobj	mech_used;
 	u8			initiator_sign[GSS_KRB5_MAX_KEYLEN];
 	u8			acceptor_sign[GSS_KRB5_MAX_KEYLEN];
@@ -118,9 +118,6 @@ struct krb5_ctx {
 	u8			acceptor_integ[GSS_KRB5_MAX_KEYLEN];
 };
 
-extern u32 gss_seq_send_fetch_and_inc(struct krb5_ctx *ctx);
-extern u64 gss_seq_send64_fetch_and_inc(struct krb5_ctx *ctx);
-
 /* The length of the Kerberos GSS token header */
 #define GSS_KRB5_TOK_HDR_LEN	(16)
 
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 7bb2514aadd9d..71cb29dc86c28 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -275,6 +275,7 @@ get_key(const void *p, const void *end,
 static int
 gss_import_v1_context(const void *p, const void *end, struct krb5_ctx *ctx)
 {
+	u32 seq_send;
 	int tmp;
 
 	p = simple_get_bytes(p, end, &ctx->initiate, sizeof(ctx->initiate));
@@ -316,9 +317,10 @@ gss_import_v1_context(const void *p, const void *end, struct krb5_ctx *ctx)
 	p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime));
 	if (IS_ERR(p))
 		goto out_err;
-	p = simple_get_bytes(p, end, &ctx->seq_send, sizeof(ctx->seq_send));
+	p = simple_get_bytes(p, end, &seq_send, sizeof(seq_send));
 	if (IS_ERR(p))
 		goto out_err;
+	atomic_set(&ctx->seq_send, seq_send);
 	p = simple_get_netobj(p, end, &ctx->mech_used);
 	if (IS_ERR(p))
 		goto out_err;
@@ -610,6 +612,7 @@ static int
 gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx,
 		gfp_t gfp_mask)
 {
+	u64 seq_send64;
 	int keylen;
 
 	p = simple_get_bytes(p, end, &ctx->flags, sizeof(ctx->flags));
@@ -620,14 +623,15 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx,
 	p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime));
 	if (IS_ERR(p))
 		goto out_err;
-	p = simple_get_bytes(p, end, &ctx->seq_send64, sizeof(ctx->seq_send64));
+	p = simple_get_bytes(p, end, &seq_send64, sizeof(seq_send64));
 	if (IS_ERR(p))
 		goto out_err;
+	atomic64_set(&ctx->seq_send64, seq_send64);
 	/* set seq_send for use by "older" enctypes */
-	ctx->seq_send = ctx->seq_send64;
-	if (ctx->seq_send64 != ctx->seq_send) {
-		dprintk("%s: seq_send64 %lx, seq_send %x overflow?\n", __func__,
-			(unsigned long)ctx->seq_send64, ctx->seq_send);
+	atomic_set(&ctx->seq_send, seq_send64);
+	if (seq_send64 != atomic_read(&ctx->seq_send)) {
+		dprintk("%s: seq_send64 %llx, seq_send %x overflow?\n", __func__,
+			seq_send64, atomic_read(&ctx->seq_send));
 		p = ERR_PTR(-EINVAL);
 		goto out_err;
 	}
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index b4adeb06660b1..48fe4a591b543 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -123,30 +123,6 @@ setup_token_v2(struct krb5_ctx *ctx, struct xdr_netobj *token)
 	return krb5_hdr;
 }
 
-u32
-gss_seq_send_fetch_and_inc(struct krb5_ctx *ctx)
-{
-	u32 old, seq_send = READ_ONCE(ctx->seq_send);
-
-	do {
-		old = seq_send;
-		seq_send = cmpxchg(&ctx->seq_send, old, old + 1);
-	} while (old != seq_send);
-	return seq_send;
-}
-
-u64
-gss_seq_send64_fetch_and_inc(struct krb5_ctx *ctx)
-{
-	u64 old, seq_send = READ_ONCE(ctx->seq_send);
-
-	do {
-		old = seq_send;
-		seq_send = cmpxchg64(&ctx->seq_send64, old, old + 1);
-	} while (old != seq_send);
-	return seq_send;
-}
-
 static u32
 gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
 		struct xdr_netobj *token)
@@ -177,7 +153,7 @@ gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
 
 	memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
 
-	seq_send = gss_seq_send_fetch_and_inc(ctx);
+	seq_send = atomic_fetch_inc(&ctx->seq_send);
 
 	if (krb5_make_seq_num(ctx, ctx->seq, ctx->initiate ? 0 : 0xff,
 			      seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8))
@@ -205,7 +181,7 @@ gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
 
 	/* Set up the sequence number. Now 64-bits in clear
 	 * text and w/o direction indicator */
-	seq_send_be64 = cpu_to_be64(gss_seq_send64_fetch_and_inc(ctx));
+	seq_send_be64 = cpu_to_be64(atomic64_fetch_inc(&ctx->seq_send64));
 	memcpy(krb5_hdr + 8, (char *) &seq_send_be64, 8);
 
 	if (ctx->initiate) {
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 41cb294cd0717..6af6f119d9c1a 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -228,7 +228,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 
 	memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
 
-	seq_send = gss_seq_send_fetch_and_inc(kctx);
+	seq_send = atomic_fetch_inc(&kctx->seq_send);
 
 	/* XXX would probably be more efficient to compute checksum
 	 * and encrypt at the same time: */
@@ -475,7 +475,7 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset,
 	*be16ptr++ = 0;
 
 	be64ptr = (__be64 *)be16ptr;
-	*be64ptr = cpu_to_be64(gss_seq_send64_fetch_and_inc(kctx));
+	*be64ptr = cpu_to_be64(atomic64_fetch_inc(&kctx->seq_send64));
 
 	err = (*kctx->gk5e->encrypt_v2)(kctx, offset, buf, pages);
 	if (err)
-- 
GitLab


From d3787af289c85381cc048cb8c56b67260dbbc436 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 26 Oct 2018 19:10:31 +0100
Subject: [PATCH 0455/1890] NFS: fix spelling mistake, EACCESS -> EACCES

Trivial fix to a spelling mistake of the error access name EACCESS,
rename to EACCES

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/nfs4proc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index db84b4adbc491..867457d6dfbe5 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3788,7 +3788,7 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
 	}
 
 	/*
-	 * -EACCESS could mean that the user doesn't have correct permissions
+	 * -EACCES could mean that the user doesn't have correct permissions
 	 * to access the mount.  It could also mean that we tried to mount
 	 * with a gss auth flavor, but rpc.gssd isn't running.  Either way,
 	 * existing mount programs don't handle -EACCES very well so it should
-- 
GitLab


From d47748e5ae5af6572e520cc9767bbe70c22ea498 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Thu, 1 Nov 2018 21:31:39 +0100
Subject: [PATCH 0456/1890] ovl: automatically enable redirect_dir on
 metacopy=on

Current behavior is to automatically disable metacopy if redirect_dir is
not enabled and proceed with the mount.

If "metacopy=on" mount option was given, then this behavior can confuse the
user: no mount failure, yet metacopy is disabled.

This patch makes metacopy=on imply redirect_dir=on.

The converse is also true: turning off full redirect with redirect_dir=
{off|follow|nofollow} will disable metacopy.

If both metacopy=on and redirect_dir={off|follow|nofollow} is specified,
then mount will fail, since there's no way to correctly resolve the
conflict.

Reported-by: Daniel Walsh <dwalsh@redhat.com>
Fixes: d5791044d2e5 ("ovl: Provide a mount option metacopy=on/off...")
Cc: <stable@vger.kernel.org> # v4.19
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 Documentation/filesystems/overlayfs.txt |  6 +++++
 fs/overlayfs/super.c                    | 36 ++++++++++++++++++++-----
 2 files changed, 35 insertions(+), 7 deletions(-)

diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt
index 51c136c821bfb..eef7d9d259e85 100644
--- a/Documentation/filesystems/overlayfs.txt
+++ b/Documentation/filesystems/overlayfs.txt
@@ -286,6 +286,12 @@ pointed by REDIRECT. This should not be possible on local system as setting
 "trusted." xattrs will require CAP_SYS_ADMIN. But it should be possible
 for untrusted layers like from a pen drive.
 
+Note: redirect_dir={off|nofollow|follow(*)} conflicts with metacopy=on, and
+results in an error.
+
+(*) redirect_dir=follow only conflicts with metacopy=on if upperdir=... is
+given.
+
 Sharing and copying layers
 --------------------------
 
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 22ffb23ea44d9..0116735cc3214 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -472,6 +472,7 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
 {
 	char *p;
 	int err;
+	bool metacopy_opt = false, redirect_opt = false;
 
 	config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL);
 	if (!config->redirect_mode)
@@ -516,6 +517,7 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
 			config->redirect_mode = match_strdup(&args[0]);
 			if (!config->redirect_mode)
 				return -ENOMEM;
+			redirect_opt = true;
 			break;
 
 		case OPT_INDEX_ON:
@@ -548,6 +550,7 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
 
 		case OPT_METACOPY_ON:
 			config->metacopy = true;
+			metacopy_opt = true;
 			break;
 
 		case OPT_METACOPY_OFF:
@@ -572,13 +575,32 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
 	if (err)
 		return err;
 
-	/* metacopy feature with upper requires redirect_dir=on */
-	if (config->upperdir && config->metacopy && !config->redirect_dir) {
-		pr_warn("overlayfs: metadata only copy up requires \"redirect_dir=on\", falling back to metacopy=off.\n");
-		config->metacopy = false;
-	} else if (config->metacopy && !config->redirect_follow) {
-		pr_warn("overlayfs: metadata only copy up requires \"redirect_dir=follow\" on non-upper mount, falling back to metacopy=off.\n");
-		config->metacopy = false;
+	/*
+	 * This is to make the logic below simpler.  It doesn't make any other
+	 * difference, since config->redirect_dir is only used for upper.
+	 */
+	if (!config->upperdir && config->redirect_follow)
+		config->redirect_dir = true;
+
+	/* Resolve metacopy -> redirect_dir dependency */
+	if (config->metacopy && !config->redirect_dir) {
+		if (metacopy_opt && redirect_opt) {
+			pr_err("overlayfs: conflicting options: metacopy=on,redirect_dir=%s\n",
+			       config->redirect_mode);
+			return -EINVAL;
+		}
+		if (redirect_opt) {
+			/*
+			 * There was an explicit redirect_dir=... that resulted
+			 * in this conflict.
+			 */
+			pr_info("overlayfs: disabling metacopy due to redirect_dir=%s\n",
+				config->redirect_mode);
+			config->metacopy = false;
+		} else {
+			/* Automatically enable redirect otherwise. */
+			config->redirect_follow = config->redirect_dir = true;
+		}
 	}
 
 	return 0;
-- 
GitLab


From 7de414a9dd91426318df7b63da024b2b07e53df5 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Thu, 1 Nov 2018 12:02:37 -0700
Subject: [PATCH 0457/1890] net: drop skb on failure in ip_check_defrag()

Most callers of pskb_trim_rcsum() simply drop the skb when
it fails, however, ip_check_defrag() still continues to pass
the skb up to stack. This is suspicious.

In ip_check_defrag(), after we learn the skb is an IP fragment,
passing the skb to callers makes no sense, because callers expect
fragments are defrag'ed on success. So, dropping the skb when we
can't defrag it is reasonable.

Note, prior to commit 88078d98d1bb, this is not a big problem as
checksum will be fixed up anyway. After it, the checksum is not
correct on failure.

Found this during code review.

Fixes: 88078d98d1bb ("net: pskb_trim_rcsum() and CHECKSUM_COMPLETE are friends")
Cc: Eric Dumazet <edumazet@google.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_fragment.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 9b0158fa431f2..d6ee343fdb864 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -722,10 +722,14 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user)
 	if (ip_is_fragment(&iph)) {
 		skb = skb_share_check(skb, GFP_ATOMIC);
 		if (skb) {
-			if (!pskb_may_pull(skb, netoff + iph.ihl * 4))
-				return skb;
-			if (pskb_trim_rcsum(skb, netoff + len))
-				return skb;
+			if (!pskb_may_pull(skb, netoff + iph.ihl * 4)) {
+				kfree_skb(skb);
+				return NULL;
+			}
+			if (pskb_trim_rcsum(skb, netoff + len)) {
+				kfree_skb(skb);
+				return NULL;
+			}
 			memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
 			if (ip_defrag(net, skb, user))
 				return NULL;
-- 
GitLab


From c961cb3be9064d1097ccc019390f8b5739daafc6 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Wed, 31 Oct 2018 09:28:06 -0500
Subject: [PATCH 0458/1890] of: Fix cpu node iterator to not ignore disabled
 cpu nodes

In most cases, nodes with 'status = "disabled";' are treated as if the
node is not present though it is a common bug to forget to check that.
However, cpu nodes are different in that "disabled" simply means offline
and the OS can bring the CPU core online. Commit f1f207e43b8a ("of: Add
cpu node iterator for_each_of_cpu_node()") followed the common behavior
of ignoring disabled cpu nodes. This breaks some powerpc systems (at
least NXP P50XX/e5500). Fix this by dropping the status check.

Fixes: 651d44f9679c ("of: use for_each_of_cpu_node iterator")
Fixes: f1f207e43b8a ("of: Add cpu node iterator for_each_of_cpu_node()")
Reported-by: Christian Zigotzky <chzigotzky@xenosoft.de>
Tested-by: Michael Ellerman <mpe@ellerman.id.au>
Cc: Frank Rowand <frowand.list@gmail.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 drivers/of/base.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/of/base.c b/drivers/of/base.c
index cc62da2786638..e47c5ce6cd589 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -776,8 +776,6 @@ struct device_node *of_get_next_cpu_node(struct device_node *prev)
 		if (!(of_node_name_eq(next, "cpu") ||
 		      (next->type && !of_node_cmp(next->type, "cpu"))))
 			continue;
-		if (!__of_device_is_available(next))
-			continue;
 		if (of_node_get(next))
 			break;
 	}
-- 
GitLab


From 0e9b4a82710220c04100892fb7277b78fd33a747 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 1 Nov 2018 18:19:03 -0400
Subject: [PATCH 0459/1890] missing bits of "iov_iter: Separate type from
 direction and use accessor functions"

sunrpc patches from nfs tree conflict with calling conventions change done
in iov_iter work.  Trivial fixup...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/sunrpc/xprtsock.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 1b51e04d35660..ae77c71c1f640 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -361,7 +361,7 @@ static ssize_t
 xs_read_kvec(struct socket *sock, struct msghdr *msg, int flags,
 		struct kvec *kvec, size_t count, size_t seek)
 {
-	iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, kvec, 1, count);
+	iov_iter_kvec(&msg->msg_iter, READ, kvec, 1, count);
 	return xs_sock_recvmsg(sock, msg, flags, seek);
 }
 
@@ -370,7 +370,7 @@ xs_read_bvec(struct socket *sock, struct msghdr *msg, int flags,
 		struct bio_vec *bvec, unsigned long nr, size_t count,
 		size_t seek)
 {
-	iov_iter_bvec(&msg->msg_iter, READ | ITER_BVEC, bvec, nr, count);
+	iov_iter_bvec(&msg->msg_iter, READ, bvec, nr, count);
 	return xs_sock_recvmsg(sock, msg, flags, seek);
 }
 
-- 
GitLab


From ba1f0d95576902c10930d3467e638bac38f942f1 Mon Sep 17 00:00:00 2001
From: Anup Patel <anup@brainfault.org>
Date: Thu, 1 Nov 2018 10:40:32 +0530
Subject: [PATCH 0460/1890] RISC-V: refresh defconfig

This patch updates defconfig using savedefconfig on Linux-4.19.  It is
intended to have no functional change.

Signed-off-by: Anup Patel <anup@brainfault.org>
Reviewed-by: Palmer Dabbelt <palmer@sifive.com>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 arch/riscv/configs/defconfig | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index 36473d7dbaac4..07fa9ea75fea1 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -1,6 +1,3 @@
-CONFIG_SMP=y
-CONFIG_PCI=y
-CONFIG_PCIE_XILINX=y
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
 CONFIG_IKCONFIG=y
@@ -11,10 +8,15 @@ CONFIG_CFS_BANDWIDTH=y
 CONFIG_CGROUP_BPF=y
 CONFIG_NAMESPACES=y
 CONFIG_USER_NS=y
+CONFIG_CHECKPOINT_RESTORE=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_EXPERT=y
-CONFIG_CHECKPOINT_RESTORE=y
 CONFIG_BPF_SYSCALL=y
+CONFIG_SMP=y
+CONFIG_PCI=y
+CONFIG_PCIE_XILINX=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
@@ -59,6 +61,7 @@ CONFIG_USB_OHCI_HCD_PLATFORM=y
 CONFIG_USB_STORAGE=y
 CONFIG_USB_UAS=y
 CONFIG_VIRTIO_MMIO=y
+CONFIG_SIFIVE_PLIC=y
 CONFIG_RAS=y
 CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
@@ -72,8 +75,5 @@ CONFIG_NFS_V4=y
 CONFIG_NFS_V4_1=y
 CONFIG_NFS_V4_2=y
 CONFIG_ROOT_NFS=y
-# CONFIG_RCU_TRACE is not set
 CONFIG_CRYPTO_USER_API_HASH=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_SIFIVE_PLIC=y
+# CONFIG_RCU_TRACE is not set
-- 
GitLab


From 153fcd5f6d93b8e1e4040b1337f564a10f8d93af Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Fri, 2 Nov 2018 08:50:51 +0800
Subject: [PATCH 0461/1890] block: brd: associate with queue until adding disk

brd_free() may be called in failure path on one brd instance which
disk isn't added yet, so release handler of gendisk may free the
associated request_queue early and causes the following use-after-free[1].

This patch fixes this issue by associating gendisk with request_queue
just before adding disk.

[1] KASAN: use-after-free Read in del_timer_syncNon-volatile memory driver v1.3
Linux agpgart interface v0.103
[drm] Initialized vgem 1.0.0 20120112 for virtual device on minor 0
usbcore: registered new interface driver udl
==================================================================
BUG: KASAN: use-after-free in __lock_acquire+0x36d9/0x4c20
kernel/locking/lockdep.c:3218
Read of size 8 at addr ffff8801d1b6b540 by task swapper/0/1

CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.19.0+ #88
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
Google 01/01/2011
Call Trace:
  __dump_stack lib/dump_stack.c:77 [inline]
  dump_stack+0x244/0x39d lib/dump_stack.c:113
  print_address_description.cold.7+0x9/0x1ff mm/kasan/report.c:256
  kasan_report_error mm/kasan/report.c:354 [inline]
  kasan_report.cold.8+0x242/0x309 mm/kasan/report.c:412
  __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:433
  __lock_acquire+0x36d9/0x4c20 kernel/locking/lockdep.c:3218
  lock_acquire+0x1ed/0x520 kernel/locking/lockdep.c:3844
  del_timer_sync+0xb7/0x270 kernel/time/timer.c:1283
  blk_cleanup_queue+0x413/0x710 block/blk-core.c:809
  brd_free+0x5d/0x71 drivers/block/brd.c:422
  brd_init+0x2eb/0x393 drivers/block/brd.c:518
  do_one_initcall+0x145/0x957 init/main.c:890
  do_initcall_level init/main.c:958 [inline]
  do_initcalls init/main.c:966 [inline]
  do_basic_setup init/main.c:984 [inline]
  kernel_init_freeable+0x5c6/0x6b9 init/main.c:1148
  kernel_init+0x11/0x1ae init/main.c:1068
  ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:350

Reported-by: syzbot+3701447012fe951dabb2@syzkaller.appspotmail.com
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/brd.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index df8103dd40ac2..c18586fccb6f2 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -396,15 +396,14 @@ static struct brd_device *brd_alloc(int i)
 	disk->first_minor	= i * max_part;
 	disk->fops		= &brd_fops;
 	disk->private_data	= brd;
-	disk->queue		= brd->brd_queue;
 	disk->flags		= GENHD_FL_EXT_DEVT;
 	sprintf(disk->disk_name, "ram%d", i);
 	set_capacity(disk, rd_size * 2);
-	disk->queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO;
+	brd->brd_queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO;
 
 	/* Tell the block layer that this is not a rotational device */
-	blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
-	blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
+	blk_queue_flag_set(QUEUE_FLAG_NONROT, brd->brd_queue);
+	blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, brd->brd_queue);
 
 	return brd;
 
@@ -436,6 +435,7 @@ static struct brd_device *brd_init_one(int i, bool *new)
 
 	brd = brd_alloc(i);
 	if (brd) {
+		brd->brd_disk->queue = brd->brd_queue;
 		add_disk(brd->brd_disk);
 		list_add_tail(&brd->brd_list, &brd_devices);
 	}
@@ -503,8 +503,14 @@ static int __init brd_init(void)
 
 	/* point of no return */
 
-	list_for_each_entry(brd, &brd_devices, brd_list)
+	list_for_each_entry(brd, &brd_devices, brd_list) {
+		/*
+		 * associate with queue just before adding disk for
+		 * avoiding to mess up failure path
+		 */
+		brd->brd_disk->queue = brd->brd_queue;
 		add_disk(brd->brd_disk);
+	}
 
 	blk_register_region(MKDEV(RAMDISK_MAJOR, 0), 1UL << MINORBITS,
 				  THIS_MODULE, brd_probe, NULL, NULL);
-- 
GitLab


From b5f2954d30c77649bce9c27e7a0a94299d9cfdf8 Mon Sep 17 00:00:00 2001
From: Dennis Zhou <dennis@kernel.org>
Date: Thu, 1 Nov 2018 17:24:10 -0400
Subject: [PATCH 0462/1890] blkcg: revert blkcg cleanups series

This reverts a series committed earlier due to null pointer exception
bug report in [1]. It seems there are edge case interactions that I did
not consider and will need some time to understand what causes the
adverse interactions.

The original series can be found in [2] with a follow up series in [3].

[1] https://www.spinics.net/lists/cgroups/msg20719.html
[2] https://lore.kernel.org/lkml/20180911184137.35897-1-dennisszhou@gmail.com/
[3] https://lore.kernel.org/lkml/20181020185612.51587-1-dennis@kernel.org/

This reverts the following commits:
d459d853c2ed, b2c3fa546705, 101246ec02b5, b3b9f24f5fcc, e2b0989954ae,
f0fcb3ec89f3, c839e7a03f92, bdc2491708c4, 74b7c02a9bc1, 5bf9a1f3b4ef,
a7b39b4e961c, 07b05bcc3213, 49f4c2dc2b50, 27e6fa996c53

Signed-off-by: Dennis Zhou <dennis@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 Documentation/admin-guide/cgroup-v2.rst |   8 +-
 block/bfq-cgroup.c                      |   4 +-
 block/bfq-iosched.c                     |   2 +-
 block/bio.c                             | 174 +++++++-----------------
 block/blk-cgroup.c                      | 123 ++++++-----------
 block/blk-core.c                        |   1 -
 block/blk-iolatency.c                   |  26 +++-
 block/blk-throttle.c                    |  13 +-
 block/bounce.c                          |   4 +-
 block/cfq-iosched.c                     |   4 +-
 drivers/block/loop.c                    |   5 +-
 drivers/md/raid0.c                      |   2 +-
 fs/buffer.c                             |  10 +-
 fs/ext4/page-io.c                       |   2 +-
 include/linux/bio.h                     |  26 ++--
 include/linux/blk-cgroup.h              | 145 ++++++--------------
 include/linux/blk_types.h               |   1 +
 include/linux/cgroup.h                  |   2 -
 include/linux/writeback.h               |   5 +-
 kernel/cgroup/cgroup.c                  |  48 ++-----
 kernel/trace/blktrace.c                 |   4 +-
 mm/page_io.c                            |   2 +-
 22 files changed, 208 insertions(+), 403 deletions(-)

diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index caf36105a1c7b..184193bcb262a 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1857,10 +1857,8 @@ following two functions.
 
   wbc_init_bio(@wbc, @bio)
 	Should be called for each bio carrying writeback data and
-	associates the bio with the inode's owner cgroup and the
-	corresponding request queue.  This must be called after
-	a queue (device) has been associated with the bio and
-	before submission.
+	associates the bio with the inode's owner cgroup.  Can be
+	called anytime between bio allocation and submission.
 
   wbc_account_io(@wbc, @page, @bytes)
 	Should be called for each data segment being written out.
@@ -1879,7 +1877,7 @@ the configuration, the bio may be executed at a lower priority and if
 the writeback session is holding shared resources, e.g. a journal
 entry, may lead to priority inversion.  There is no one easy solution
 for the problem.  Filesystems can try to work around specific problem
-cases by skipping wbc_init_bio() or using bio_associate_create_blkg()
+cases by skipping wbc_init_bio() or using bio_associate_blkcg()
 directly.
 
 
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index d9a7916ff0ab6..9fe5952d117d5 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -642,7 +642,7 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
 	uint64_t serial_nr;
 
 	rcu_read_lock();
-	serial_nr = __bio_blkcg(bio)->css.serial_nr;
+	serial_nr = bio_blkcg(bio)->css.serial_nr;
 
 	/*
 	 * Check whether blkcg has changed.  The condition may trigger
@@ -651,7 +651,7 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
 	if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr))
 		goto out;
 
-	bfqg = __bfq_bic_change_cgroup(bfqd, bic, __bio_blkcg(bio));
+	bfqg = __bfq_bic_change_cgroup(bfqd, bic, bio_blkcg(bio));
 	/*
 	 * Update blkg_path for bfq_log_* functions. We cache this
 	 * path, and update it here, for the following
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 6075100f03a50..3a27d31fcda60 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -4384,7 +4384,7 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
 
 	rcu_read_lock();
 
-	bfqg = bfq_find_set_group(bfqd, __bio_blkcg(bio));
+	bfqg = bfq_find_set_group(bfqd, bio_blkcg(bio));
 	if (!bfqg) {
 		bfqq = &bfqd->oom_bfqq;
 		goto out;
diff --git a/block/bio.c b/block/bio.c
index bbfeb4ee2892f..4a5a036268fb8 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -609,9 +609,7 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
 	bio->bi_iter = bio_src->bi_iter;
 	bio->bi_io_vec = bio_src->bi_io_vec;
 
-	bio_clone_blkg_association(bio, bio_src);
-
-	blkcg_bio_issue_init(bio);
+	bio_clone_blkcg_association(bio, bio_src);
 }
 EXPORT_SYMBOL(__bio_clone_fast);
 
@@ -1956,151 +1954,69 @@ EXPORT_SYMBOL(bioset_init_from_src);
 
 #ifdef CONFIG_BLK_CGROUP
 
-/**
- * bio_associate_blkg - associate a bio with the a blkg
- * @bio: target bio
- * @blkg: the blkg to associate
- *
- * This tries to associate @bio with the specified blkg.  Association failure
- * is handled by walking up the blkg tree.  Therefore, the blkg associated can
- * be anything between @blkg and the root_blkg.  This situation only happens
- * when a cgroup is dying and then the remaining bios will spill to the closest
- * alive blkg.
- *
- * A reference will be taken on the @blkg and will be released when @bio is
- * freed.
- */
-int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg)
-{
-	if (unlikely(bio->bi_blkg))
-		return -EBUSY;
-	bio->bi_blkg = blkg_tryget_closest(blkg);
-	return 0;
-}
-
-/**
- * __bio_associate_blkg_from_css - internal blkg association function
- *
- * This in the core association function that all association paths rely on.
- * A blkg reference is taken which is released upon freeing of the bio.
- */
-static int __bio_associate_blkg_from_css(struct bio *bio,
-					 struct cgroup_subsys_state *css)
-{
-	struct request_queue *q = bio->bi_disk->queue;
-	struct blkcg_gq *blkg;
-	int ret;
-
-	rcu_read_lock();
-
-	if (!css || !css->parent)
-		blkg = q->root_blkg;
-	else
-		blkg = blkg_lookup_create(css_to_blkcg(css), q);
-
-	ret = bio_associate_blkg(bio, blkg);
-
-	rcu_read_unlock();
-	return ret;
-}
-
-/**
- * bio_associate_blkg_from_css - associate a bio with a specified css
- * @bio: target bio
- * @css: target css
- *
- * Associate @bio with the blkg found by combining the css's blkg and the
- * request_queue of the @bio.  This falls back to the queue's root_blkg if
- * the association fails with the css.
- */
-int bio_associate_blkg_from_css(struct bio *bio,
-				struct cgroup_subsys_state *css)
-{
-	if (unlikely(bio->bi_blkg))
-		return -EBUSY;
-	return __bio_associate_blkg_from_css(bio, css);
-}
-EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css);
-
 #ifdef CONFIG_MEMCG
 /**
- * bio_associate_blkg_from_page - associate a bio with the page's blkg
+ * bio_associate_blkcg_from_page - associate a bio with the page's blkcg
  * @bio: target bio
  * @page: the page to lookup the blkcg from
  *
- * Associate @bio with the blkg from @page's owning memcg and the respective
- * request_queue.  If cgroup_e_css returns NULL, fall back to the queue's
- * root_blkg.
- *
- * Note: this must be called after bio has an associated device.
+ * Associate @bio with the blkcg from @page's owning memcg.  This works like
+ * every other associate function wrt references.
  */
-int bio_associate_blkg_from_page(struct bio *bio, struct page *page)
+int bio_associate_blkcg_from_page(struct bio *bio, struct page *page)
 {
-	struct cgroup_subsys_state *css;
-	int ret;
+	struct cgroup_subsys_state *blkcg_css;
 
-	if (unlikely(bio->bi_blkg))
+	if (unlikely(bio->bi_css))
 		return -EBUSY;
 	if (!page->mem_cgroup)
 		return 0;
-
-	rcu_read_lock();
-
-	css = cgroup_e_css(page->mem_cgroup->css.cgroup, &io_cgrp_subsys);
-
-	ret = __bio_associate_blkg_from_css(bio, css);
-
-	rcu_read_unlock();
-	return ret;
+	blkcg_css = cgroup_get_e_css(page->mem_cgroup->css.cgroup,
+				     &io_cgrp_subsys);
+	bio->bi_css = blkcg_css;
+	return 0;
 }
 #endif /* CONFIG_MEMCG */
 
 /**
- * bio_associate_create_blkg - associate a bio with a blkg from q
- * @q: request_queue where bio is going
+ * bio_associate_blkcg - associate a bio with the specified blkcg
  * @bio: target bio
+ * @blkcg_css: css of the blkcg to associate
+ *
+ * Associate @bio with the blkcg specified by @blkcg_css.  Block layer will
+ * treat @bio as if it were issued by a task which belongs to the blkcg.
  *
- * Associate @bio with the blkg found from the bio's css and the request_queue.
- * If one is not found, bio_lookup_blkg creates the blkg.  This falls back to
- * the queue's root_blkg if association fails.
+ * This function takes an extra reference of @blkcg_css which will be put
+ * when @bio is released.  The caller must own @bio and is responsible for
+ * synchronizing calls to this function.
  */
-int bio_associate_create_blkg(struct request_queue *q, struct bio *bio)
+int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css)
 {
-	struct cgroup_subsys_state *css;
-	int ret = 0;
-
-	/* someone has already associated this bio with a blkg */
-	if (bio->bi_blkg)
-		return ret;
-
-	rcu_read_lock();
-
-	css = blkcg_css();
-
-	ret = __bio_associate_blkg_from_css(bio, css);
-
-	rcu_read_unlock();
-	return ret;
+	if (unlikely(bio->bi_css))
+		return -EBUSY;
+	css_get(blkcg_css);
+	bio->bi_css = blkcg_css;
+	return 0;
 }
+EXPORT_SYMBOL_GPL(bio_associate_blkcg);
 
 /**
- * bio_reassociate_blkg - reassociate a bio with a blkg from q
- * @q: request_queue where bio is going
+ * bio_associate_blkg - associate a bio with the specified blkg
  * @bio: target bio
+ * @blkg: the blkg to associate
  *
- * When submitting a bio, multiple recursive calls to make_request() may occur.
- * This causes the initial associate done in blkcg_bio_issue_check() to be
- * incorrect and reference the prior request_queue.  This performs reassociation
- * when this situation happens.
+ * Associate @bio with the blkg specified by @blkg.  This is the queue specific
+ * blkcg information associated with the @bio, a reference will be taken on the
+ * @blkg and will be freed when the bio is freed.
  */
-int bio_reassociate_blkg(struct request_queue *q, struct bio *bio)
+int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg)
 {
-	if (bio->bi_blkg) {
-		blkg_put(bio->bi_blkg);
-		bio->bi_blkg = NULL;
-	}
-
-	return bio_associate_create_blkg(q, bio);
+	if (unlikely(bio->bi_blkg))
+		return -EBUSY;
+	if (!blkg_try_get(blkg))
+		return -ENODEV;
+	bio->bi_blkg = blkg;
+	return 0;
 }
 
 /**
@@ -2113,6 +2029,10 @@ void bio_disassociate_task(struct bio *bio)
 		put_io_context(bio->bi_ioc);
 		bio->bi_ioc = NULL;
 	}
+	if (bio->bi_css) {
+		css_put(bio->bi_css);
+		bio->bi_css = NULL;
+	}
 	if (bio->bi_blkg) {
 		blkg_put(bio->bi_blkg);
 		bio->bi_blkg = NULL;
@@ -2120,16 +2040,16 @@ void bio_disassociate_task(struct bio *bio)
 }
 
 /**
- * bio_clone_blkg_association - clone blkg association from src to dst bio
+ * bio_clone_blkcg_association - clone blkcg association from src to dst bio
  * @dst: destination bio
  * @src: source bio
  */
-void bio_clone_blkg_association(struct bio *dst, struct bio *src)
+void bio_clone_blkcg_association(struct bio *dst, struct bio *src)
 {
-	if (src->bi_blkg)
-		bio_associate_blkg(dst, src->bi_blkg);
+	if (src->bi_css)
+		WARN_ON(bio_associate_blkcg(dst, src->bi_css));
 }
-EXPORT_SYMBOL_GPL(bio_clone_blkg_association);
+EXPORT_SYMBOL_GPL(bio_clone_blkcg_association);
 #endif /* CONFIG_BLK_CGROUP */
 
 static void __init biovec_init_slabs(void)
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 992da5592c6ed..c630e02836a80 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -84,37 +84,6 @@ static void blkg_free(struct blkcg_gq *blkg)
 	kfree(blkg);
 }
 
-static void __blkg_release(struct rcu_head *rcu)
-{
-	struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head);
-
-	percpu_ref_exit(&blkg->refcnt);
-
-	/* release the blkcg and parent blkg refs this blkg has been holding */
-	css_put(&blkg->blkcg->css);
-	if (blkg->parent)
-		blkg_put(blkg->parent);
-
-	wb_congested_put(blkg->wb_congested);
-
-	blkg_free(blkg);
-}
-
-/*
- * A group is RCU protected, but having an rcu lock does not mean that one
- * can access all the fields of blkg and assume these are valid.  For
- * example, don't try to follow throtl_data and request queue links.
- *
- * Having a reference to blkg under an rcu allows accesses to only values
- * local to groups like group stats and group rate limits.
- */
-static void blkg_release(struct percpu_ref *ref)
-{
-	struct blkcg_gq *blkg = container_of(ref, struct blkcg_gq, refcnt);
-
-	call_rcu(&blkg->rcu_head, __blkg_release);
-}
-
 /**
  * blkg_alloc - allocate a blkg
  * @blkcg: block cgroup the new blkg is associated with
@@ -141,6 +110,7 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
 	blkg->q = q;
 	INIT_LIST_HEAD(&blkg->q_node);
 	blkg->blkcg = blkcg;
+	atomic_set(&blkg->refcnt, 1);
 
 	/* root blkg uses @q->root_rl, init rl only for !root blkgs */
 	if (blkcg != &blkcg_root) {
@@ -247,11 +217,6 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
 		blkg_get(blkg->parent);
 	}
 
-	ret = percpu_ref_init(&blkg->refcnt, blkg_release, 0,
-			      GFP_NOWAIT | __GFP_NOWARN);
-	if (ret)
-		goto err_cancel_ref;
-
 	/* invoke per-policy init */
 	for (i = 0; i < BLKCG_MAX_POLS; i++) {
 		struct blkcg_policy *pol = blkcg_policy[i];
@@ -284,8 +249,6 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
 	blkg_put(blkg);
 	return ERR_PTR(ret);
 
-err_cancel_ref:
-	percpu_ref_exit(&blkg->refcnt);
 err_put_congested:
 	wb_congested_put(wb_congested);
 err_put_css:
@@ -296,7 +259,7 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
 }
 
 /**
- * __blkg_lookup_create - lookup blkg, try to create one if not there
+ * blkg_lookup_create - lookup blkg, try to create one if not there
  * @blkcg: blkcg of interest
  * @q: request_queue of interest
  *
@@ -305,11 +268,12 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
  * that all non-root blkg's have access to the parent blkg.  This function
  * should be called under RCU read lock and @q->queue_lock.
  *
- * Returns the blkg or the closest blkg if blkg_create fails as it walks
- * down from root.
+ * Returns pointer to the looked up or created blkg on success, ERR_PTR()
+ * value on error.  If @q is dead, returns ERR_PTR(-EINVAL).  If @q is not
+ * dead and bypassing, returns ERR_PTR(-EBUSY).
  */
-struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
-				      struct request_queue *q)
+struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
+				    struct request_queue *q)
 {
 	struct blkcg_gq *blkg;
 
@@ -321,7 +285,7 @@ struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
 	 * we shouldn't allow anything to go through for a bypassing queue.
 	 */
 	if (unlikely(blk_queue_bypass(q)))
-		return q->root_blkg;
+		return ERR_PTR(blk_queue_dying(q) ? -ENODEV : -EBUSY);
 
 	blkg = __blkg_lookup(blkcg, q, true);
 	if (blkg)
@@ -329,58 +293,23 @@ struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
 
 	/*
 	 * Create blkgs walking down from blkcg_root to @blkcg, so that all
-	 * non-root blkgs have access to their parents.  Returns the closest
-	 * blkg to the intended blkg should blkg_create() fail.
+	 * non-root blkgs have access to their parents.
 	 */
 	while (true) {
 		struct blkcg *pos = blkcg;
 		struct blkcg *parent = blkcg_parent(blkcg);
-		struct blkcg_gq *ret_blkg = q->root_blkg;
-
-		while (parent) {
-			blkg = __blkg_lookup(parent, q, false);
-			if (blkg) {
-				/* remember closest blkg */
-				ret_blkg = blkg;
-				break;
-			}
+
+		while (parent && !__blkg_lookup(parent, q, false)) {
 			pos = parent;
 			parent = blkcg_parent(parent);
 		}
 
 		blkg = blkg_create(pos, q, NULL);
-		if (IS_ERR(blkg))
-			return ret_blkg;
-		if (pos == blkcg)
+		if (pos == blkcg || IS_ERR(blkg))
 			return blkg;
 	}
 }
 
-/**
- * blkg_lookup_create - find or create a blkg
- * @blkcg: target block cgroup
- * @q: target request_queue
- *
- * This looks up or creates the blkg representing the unique pair
- * of the blkcg and the request_queue.
- */
-struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
-				    struct request_queue *q)
-{
-	struct blkcg_gq *blkg = blkg_lookup(blkcg, q);
-	unsigned long flags;
-
-	if (unlikely(!blkg)) {
-		spin_lock_irqsave(q->queue_lock, flags);
-
-		blkg = __blkg_lookup_create(blkcg, q);
-
-		spin_unlock_irqrestore(q->queue_lock, flags);
-	}
-
-	return blkg;
-}
-
 static void blkg_destroy(struct blkcg_gq *blkg)
 {
 	struct blkcg *blkcg = blkg->blkcg;
@@ -424,7 +353,7 @@ static void blkg_destroy(struct blkcg_gq *blkg)
 	 * Put the reference taken at the time of creation so that when all
 	 * queues are gone, group can be destroyed.
 	 */
-	percpu_ref_kill(&blkg->refcnt);
+	blkg_put(blkg);
 }
 
 /**
@@ -451,6 +380,29 @@ static void blkg_destroy_all(struct request_queue *q)
 	q->root_rl.blkg = NULL;
 }
 
+/*
+ * A group is RCU protected, but having an rcu lock does not mean that one
+ * can access all the fields of blkg and assume these are valid.  For
+ * example, don't try to follow throtl_data and request queue links.
+ *
+ * Having a reference to blkg under an rcu allows accesses to only values
+ * local to groups like group stats and group rate limits.
+ */
+void __blkg_release_rcu(struct rcu_head *rcu_head)
+{
+	struct blkcg_gq *blkg = container_of(rcu_head, struct blkcg_gq, rcu_head);
+
+	/* release the blkcg and parent blkg refs this blkg has been holding */
+	css_put(&blkg->blkcg->css);
+	if (blkg->parent)
+		blkg_put(blkg->parent);
+
+	wb_congested_put(blkg->wb_congested);
+
+	blkg_free(blkg);
+}
+EXPORT_SYMBOL_GPL(__blkg_release_rcu);
+
 /*
  * The next function used by blk_queue_for_each_rl().  It's a bit tricky
  * because the root blkg uses @q->root_rl instead of its own rl.
@@ -1796,7 +1748,8 @@ void blkcg_maybe_throttle_current(void)
 	blkg = blkg_lookup(blkcg, q);
 	if (!blkg)
 		goto out;
-	if (!blkg_tryget(blkg))
+	blkg = blkg_try_get(blkg);
+	if (!blkg)
 		goto out;
 	rcu_read_unlock();
 
diff --git a/block/blk-core.c b/block/blk-core.c
index 26a5dac80ed98..ce12515f9b9b9 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2435,7 +2435,6 @@ blk_qc_t generic_make_request(struct bio *bio)
 			if (q)
 				blk_queue_exit(q);
 			q = bio->bi_disk->queue;
-			bio_reassociate_blkg(q, bio);
 			flags = 0;
 			if (bio->bi_opf & REQ_NOWAIT)
 				flags = BLK_MQ_REQ_NOWAIT;
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index 35c48d7b8f78e..bb240a0c1309f 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -480,12 +480,34 @@ static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio,
 				     spinlock_t *lock)
 {
 	struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
-	struct blkcg_gq *blkg = bio->bi_blkg;
+	struct blkcg *blkcg;
+	struct blkcg_gq *blkg;
+	struct request_queue *q = rqos->q;
 	bool issue_as_root = bio_issue_as_root_blkg(bio);
 
 	if (!blk_iolatency_enabled(blkiolat))
 		return;
 
+	rcu_read_lock();
+	blkcg = bio_blkcg(bio);
+	bio_associate_blkcg(bio, &blkcg->css);
+	blkg = blkg_lookup(blkcg, q);
+	if (unlikely(!blkg)) {
+		if (!lock)
+			spin_lock_irq(q->queue_lock);
+		blkg = blkg_lookup_create(blkcg, q);
+		if (IS_ERR(blkg))
+			blkg = NULL;
+		if (!lock)
+			spin_unlock_irq(q->queue_lock);
+	}
+	if (!blkg)
+		goto out;
+
+	bio_issue_init(&bio->bi_issue, bio_sectors(bio));
+	bio_associate_blkg(bio, blkg);
+out:
+	rcu_read_unlock();
 	while (blkg && blkg->parent) {
 		struct iolatency_grp *iolat = blkg_to_lat(blkg);
 		if (!iolat) {
@@ -706,7 +728,7 @@ static void blkiolatency_timer_fn(struct timer_list *t)
 		 * We could be exiting, don't access the pd unless we have a
 		 * ref on the blkg.
 		 */
-		if (!blkg_tryget(blkg))
+		if (!blkg_try_get(blkg))
 			continue;
 
 		iolat = blkg_to_lat(blkg);
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 4bda70e8db48a..db1a3a2ae0061 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -2115,11 +2115,21 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td)
 }
 #endif
 
+static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio)
+{
+#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
+	/* fallback to root_blkg if we fail to get a blkg ref */
+	if (bio->bi_css && (bio_associate_blkg(bio, tg_to_blkg(tg)) == -ENODEV))
+		bio_associate_blkg(bio, bio->bi_disk->queue->root_blkg);
+	bio_issue_init(&bio->bi_issue, bio_sectors(bio));
+#endif
+}
+
 bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
 		    struct bio *bio)
 {
 	struct throtl_qnode *qn = NULL;
-	struct throtl_grp *tg = blkg_to_tg(blkg);
+	struct throtl_grp *tg = blkg_to_tg(blkg ?: q->root_blkg);
 	struct throtl_service_queue *sq;
 	bool rw = bio_data_dir(bio);
 	bool throttled = false;
@@ -2138,6 +2148,7 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
 	if (unlikely(blk_queue_bypass(q)))
 		goto out_unlock;
 
+	blk_throtl_assoc_bio(tg, bio);
 	blk_throtl_update_idletime(tg);
 
 	sq = &tg->service_queue;
diff --git a/block/bounce.c b/block/bounce.c
index ec0d99995f5f0..418677dcec60f 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -276,9 +276,7 @@ static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask,
 		}
 	}
 
-	bio_clone_blkg_association(bio, bio_src);
-
-	blkcg_bio_issue_init(bio);
+	bio_clone_blkcg_association(bio, bio_src);
 
 	return bio;
 }
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 6a3d87dd3c1ac..ed41aa978c4ab 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -3759,7 +3759,7 @@ static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
 	uint64_t serial_nr;
 
 	rcu_read_lock();
-	serial_nr = __bio_blkcg(bio)->css.serial_nr;
+	serial_nr = bio_blkcg(bio)->css.serial_nr;
 	rcu_read_unlock();
 
 	/*
@@ -3824,7 +3824,7 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic,
 	struct cfq_group *cfqg;
 
 	rcu_read_lock();
-	cfqg = cfq_lookup_cfqg(cfqd, __bio_blkcg(bio));
+	cfqg = cfq_lookup_cfqg(cfqd, bio_blkcg(bio));
 	if (!cfqg) {
 		cfqq = &cfqd->oom_cfqq;
 		goto out;
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index abad6d15f9563..ea9debf59b225 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -77,7 +77,6 @@
 #include <linux/falloc.h>
 #include <linux/uio.h>
 #include <linux/ioprio.h>
-#include <linux/blk-cgroup.h>
 
 #include "loop.h"
 
@@ -1761,8 +1760,8 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	/* always use the first bio's css */
 #ifdef CONFIG_BLK_CGROUP
-	if (cmd->use_aio && rq->bio && rq->bio->bi_blkg) {
-		cmd->css = &bio_blkcg(rq->bio)->css;
+	if (cmd->use_aio && rq->bio && rq->bio->bi_css) {
+		cmd->css = rq->bio->bi_css;
 		css_get(cmd->css);
 	} else
 #endif
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index f3fb5bb8c82a1..ac1cffd2a09b0 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -542,7 +542,7 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
 		    !discard_bio)
 			continue;
 		bio_chain(discard_bio, bio);
-		bio_clone_blkg_association(discard_bio, bio);
+		bio_clone_blkcg_association(discard_bio, bio);
 		if (mddev->gendisk)
 			trace_block_bio_remap(bdev_get_queue(rdev->bdev),
 				discard_bio, disk_devt(mddev->gendisk),
diff --git a/fs/buffer.c b/fs/buffer.c
index 109f551968662..6f1ae3ac97896 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3060,6 +3060,11 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
 	 */
 	bio = bio_alloc(GFP_NOIO, 1);
 
+	if (wbc) {
+		wbc_init_bio(wbc, bio);
+		wbc_account_io(wbc, bh->b_page, bh->b_size);
+	}
+
 	bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
 	bio_set_dev(bio, bh->b_bdev);
 	bio->bi_write_hint = write_hint;
@@ -3079,11 +3084,6 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
 		op_flags |= REQ_PRIO;
 	bio_set_op_attrs(bio, op, op_flags);
 
-	if (wbc) {
-		wbc_init_bio(wbc, bio);
-		wbc_account_io(wbc, bh->b_page, bh->b_size);
-	}
-
 	submit_bio(bio);
 	return 0;
 }
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 2aa62d58d8dd8..db7590178dfcf 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -374,13 +374,13 @@ static int io_submit_init_bio(struct ext4_io_submit *io,
 	bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
 	if (!bio)
 		return -ENOMEM;
+	wbc_init_bio(io->io_wbc, bio);
 	bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
 	bio_set_dev(bio, bh->b_bdev);
 	bio->bi_end_io = ext4_end_bio;
 	bio->bi_private = ext4_get_io_end(io->io_end);
 	io->io_bio = bio;
 	io->io_next_block = bh->b_blocknr;
-	wbc_init_bio(io->io_wbc, bio);
 	return 0;
 }
 
diff --git a/include/linux/bio.h b/include/linux/bio.h
index b47c7f716731f..056fb627edb3e 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -503,31 +503,23 @@ do {						\
 	disk_devt((bio)->bi_disk)
 
 #if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
-int bio_associate_blkg_from_page(struct bio *bio, struct page *page);
+int bio_associate_blkcg_from_page(struct bio *bio, struct page *page);
 #else
-static inline int bio_associate_blkg_from_page(struct bio *bio,
-					       struct page *page) { return 0; }
+static inline int bio_associate_blkcg_from_page(struct bio *bio,
+						struct page *page) {  return 0; }
 #endif
 
 #ifdef CONFIG_BLK_CGROUP
+int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css);
 int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg);
-int bio_associate_blkg_from_css(struct bio *bio,
-				struct cgroup_subsys_state *css);
-int bio_associate_create_blkg(struct request_queue *q, struct bio *bio);
-int bio_reassociate_blkg(struct request_queue *q, struct bio *bio);
 void bio_disassociate_task(struct bio *bio);
-void bio_clone_blkg_association(struct bio *dst, struct bio *src);
+void bio_clone_blkcg_association(struct bio *dst, struct bio *src);
 #else	/* CONFIG_BLK_CGROUP */
-static inline int bio_associate_blkg_from_css(struct bio *bio,
-					      struct cgroup_subsys_state *css)
-{ return 0; }
-static inline int bio_associate_create_blkg(struct request_queue *q,
-					    struct bio *bio) { return 0; }
-static inline int bio_reassociate_blkg(struct request_queue *q, struct bio *bio)
-{ return 0; }
+static inline int bio_associate_blkcg(struct bio *bio,
+			struct cgroup_subsys_state *blkcg_css) { return 0; }
 static inline void bio_disassociate_task(struct bio *bio) { }
-static inline void bio_clone_blkg_association(struct bio *dst,
-					      struct bio *src) { }
+static inline void bio_clone_blkcg_association(struct bio *dst,
+			struct bio *src) { }
 #endif	/* CONFIG_BLK_CGROUP */
 
 #ifdef CONFIG_HIGHMEM
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 1e76ceebeb5dc..6d766a19f2bbb 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -126,7 +126,7 @@ struct blkcg_gq {
 	struct request_list		rl;
 
 	/* reference count */
-	struct percpu_ref		refcnt;
+	atomic_t			refcnt;
 
 	/* is this blkg online? protected by both blkcg and q locks */
 	bool				online;
@@ -184,8 +184,6 @@ extern struct cgroup_subsys_state * const blkcg_root_css;
 
 struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
 				      struct request_queue *q, bool update_hint);
-struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
-				      struct request_queue *q);
 struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
 				    struct request_queue *q);
 int blkcg_init_queue(struct request_queue *q);
@@ -232,59 +230,22 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
 		   char *input, struct blkg_conf_ctx *ctx);
 void blkg_conf_finish(struct blkg_conf_ctx *ctx);
 
-/**
- * blkcg_css - find the current css
- *
- * Find the css associated with either the kthread or the current task.
- * This may return a dying css, so it is up to the caller to use tryget logic
- * to confirm it is alive and well.
- */
-static inline struct cgroup_subsys_state *blkcg_css(void)
-{
-	struct cgroup_subsys_state *css;
-
-	css = kthread_blkcg();
-	if (css)
-		return css;
-	return task_css(current, io_cgrp_id);
-}
 
 static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
 {
 	return css ? container_of(css, struct blkcg, css) : NULL;
 }
 
-/**
- * __bio_blkcg - internal version of bio_blkcg for bfq and cfq
- *
- * DO NOT USE.
- * There is a flaw using this version of the function.  In particular, this was
- * used in a broken paradigm where association was called on the given css.  It
- * is possible though that the returned css from task_css() is in the process
- * of dying due to migration of the current task.  So it is improper to assume
- * *_get() is going to succeed.  Both BFQ and CFQ rely on this logic and will
- * take additional work to handle more gracefully.
- */
-static inline struct blkcg *__bio_blkcg(struct bio *bio)
-{
-	if (bio && bio->bi_blkg)
-		return bio->bi_blkg->blkcg;
-	return css_to_blkcg(blkcg_css());
-}
-
-/**
- * bio_blkcg - grab the blkcg associated with a bio
- * @bio: target bio
- *
- * This returns the blkcg associated with a bio, NULL if not associated.
- * Callers are expected to either handle NULL or know association has been
- * done prior to calling this.
- */
 static inline struct blkcg *bio_blkcg(struct bio *bio)
 {
-	if (bio && bio->bi_blkg)
-		return bio->bi_blkg->blkcg;
-	return NULL;
+	struct cgroup_subsys_state *css;
+
+	if (bio && bio->bi_css)
+		return css_to_blkcg(bio->bi_css);
+	css = kthread_blkcg();
+	if (css)
+		return css_to_blkcg(css);
+	return css_to_blkcg(task_css(current, io_cgrp_id));
 }
 
 static inline bool blk_cgroup_congested(void)
@@ -490,35 +451,26 @@ static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
  */
 static inline void blkg_get(struct blkcg_gq *blkg)
 {
-	percpu_ref_get(&blkg->refcnt);
+	WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
+	atomic_inc(&blkg->refcnt);
 }
 
 /**
- * blkg_tryget - try and get a blkg reference
+ * blkg_try_get - try and get a blkg reference
  * @blkg: blkg to get
  *
  * This is for use when doing an RCU lookup of the blkg.  We may be in the midst
  * of freeing this blkg, so we can only use it if the refcnt is not zero.
  */
-static inline bool blkg_tryget(struct blkcg_gq *blkg)
+static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg)
 {
-	return percpu_ref_tryget(&blkg->refcnt);
+	if (atomic_inc_not_zero(&blkg->refcnt))
+		return blkg;
+	return NULL;
 }
 
-/**
- * blkg_tryget_closest - try and get a blkg ref on the closet blkg
- * @blkg: blkg to get
- *
- * This walks up the blkg tree to find the closest non-dying blkg and returns
- * the blkg that it did association with as it may not be the passed in blkg.
- */
-static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg)
-{
-	while (!percpu_ref_tryget(&blkg->refcnt))
-		blkg = blkg->parent;
 
-	return blkg;
-}
+void __blkg_release_rcu(struct rcu_head *rcu);
 
 /**
  * blkg_put - put a blkg reference
@@ -526,7 +478,9 @@ static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg)
  */
 static inline void blkg_put(struct blkcg_gq *blkg)
 {
-	percpu_ref_put(&blkg->refcnt);
+	WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
+	if (atomic_dec_and_test(&blkg->refcnt))
+		call_rcu(&blkg->rcu_head, __blkg_release_rcu);
 }
 
 /**
@@ -579,36 +533,25 @@ static inline struct request_list *blk_get_rl(struct request_queue *q,
 
 	rcu_read_lock();
 
-	if (bio && bio->bi_blkg) {
-		blkcg = bio->bi_blkg->blkcg;
-		if (blkcg == &blkcg_root)
-			goto rl_use_root;
-
-		blkg_get(bio->bi_blkg);
-		rcu_read_unlock();
-		return &bio->bi_blkg->rl;
-	}
+	blkcg = bio_blkcg(bio);
 
-	blkcg = css_to_blkcg(blkcg_css());
+	/* bypass blkg lookup and use @q->root_rl directly for root */
 	if (blkcg == &blkcg_root)
-		goto rl_use_root;
+		goto root_rl;
 
+	/*
+	 * Try to use blkg->rl.  blkg lookup may fail under memory pressure
+	 * or if either the blkcg or queue is going away.  Fall back to
+	 * root_rl in such cases.
+	 */
 	blkg = blkg_lookup(blkcg, q);
 	if (unlikely(!blkg))
-		blkg = __blkg_lookup_create(blkcg, q);
-
-	if (blkg->blkcg == &blkcg_root || !blkg_tryget(blkg))
-		goto rl_use_root;
+		goto root_rl;
 
+	blkg_get(blkg);
 	rcu_read_unlock();
 	return &blkg->rl;
-
-	/*
-	 * Each blkg has its own request_list, however, the root blkcg
-	 * uses the request_queue's root_rl.  This is to avoid most
-	 * overhead for the root blkcg.
-	 */
-rl_use_root:
+root_rl:
 	rcu_read_unlock();
 	return &q->root_rl;
 }
@@ -854,26 +797,32 @@ static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg
 				  struct bio *bio) { return false; }
 #endif
 
-
-static inline void blkcg_bio_issue_init(struct bio *bio)
-{
-	bio_issue_init(&bio->bi_issue, bio_sectors(bio));
-}
-
 static inline bool blkcg_bio_issue_check(struct request_queue *q,
 					 struct bio *bio)
 {
+	struct blkcg *blkcg;
 	struct blkcg_gq *blkg;
 	bool throtl = false;
 
 	rcu_read_lock();
+	blkcg = bio_blkcg(bio);
+
+	/* associate blkcg if bio hasn't attached one */
+	bio_associate_blkcg(bio, &blkcg->css);
 
-	bio_associate_create_blkg(q, bio);
-	blkg = bio->bi_blkg;
+	blkg = blkg_lookup(blkcg, q);
+	if (unlikely(!blkg)) {
+		spin_lock_irq(q->queue_lock);
+		blkg = blkg_lookup_create(blkcg, q);
+		if (IS_ERR(blkg))
+			blkg = NULL;
+		spin_unlock_irq(q->queue_lock);
+	}
 
 	throtl = blk_throtl_bio(q, blkg, bio);
 
 	if (!throtl) {
+		blkg = blkg ?: q->root_blkg;
 		/*
 		 * If the bio is flagged with BIO_QUEUE_ENTERED it means this
 		 * is a split bio and we would have already accounted for the
@@ -885,8 +834,6 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
 		blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1);
 	}
 
-	blkcg_bio_issue_init(bio);
-
 	rcu_read_unlock();
 	return !throtl;
 }
@@ -983,7 +930,6 @@ static inline int blkcg_activate_policy(struct request_queue *q,
 static inline void blkcg_deactivate_policy(struct request_queue *q,
 					   const struct blkcg_policy *pol) { }
 
-static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; }
 static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
 
 static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
@@ -999,7 +945,6 @@ static inline void blk_put_rl(struct request_list *rl) { }
 static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { }
 static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; }
 
-static inline void blkcg_bio_issue_init(struct bio *bio) { }
 static inline bool blkcg_bio_issue_check(struct request_queue *q,
 					 struct bio *bio) { return true; }
 
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 093a818c5b684..1dcf652ba0aa3 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -178,6 +178,7 @@ struct bio {
 	 * release.  Read comment on top of bio_associate_current().
 	 */
 	struct io_context	*bi_ioc;
+	struct cgroup_subsys_state *bi_css;
 	struct blkcg_gq		*bi_blkg;
 	struct bio_issue	bi_issue;
 #endif
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index b8bcbdeb2eac4..32c553556bbdc 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -93,8 +93,6 @@ extern struct css_set init_css_set;
 
 bool css_has_online_children(struct cgroup_subsys_state *css);
 struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss);
-struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgroup,
-					 struct cgroup_subsys *ss);
 struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup,
 					     struct cgroup_subsys *ss);
 struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 738a0c24874f0..fdfd04e348f69 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -246,8 +246,7 @@ static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc,
  *
  * @bio is a part of the writeback in progress controlled by @wbc.  Perform
  * writeback specific initialization.  This is used to apply the cgroup
- * writeback context.  Must be called after the bio has been associated with
- * a device.
+ * writeback context.
  */
 static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio)
 {
@@ -258,7 +257,7 @@ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio)
 	 * regular writeback instead of writing things out itself.
 	 */
 	if (wbc->wb)
-		bio_associate_blkg_from_css(bio, wbc->wb->blkcg_css);
+		bio_associate_blkcg(bio, wbc->wb->blkcg_css);
 }
 
 #else	/* CONFIG_CGROUP_WRITEBACK */
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 4c1cf0969a80e..4a3dae2a82830 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -492,7 +492,7 @@ static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp,
 }
 
 /**
- * cgroup_e_css_by_mask - obtain a cgroup's effective css for the specified ss
+ * cgroup_e_css - obtain a cgroup's effective css for the specified subsystem
  * @cgrp: the cgroup of interest
  * @ss: the subsystem of interest (%NULL returns @cgrp->self)
  *
@@ -501,8 +501,8 @@ static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp,
  * enabled.  If @ss is associated with the hierarchy @cgrp is on, this
  * function is guaranteed to return non-NULL css.
  */
-static struct cgroup_subsys_state *cgroup_e_css_by_mask(struct cgroup *cgrp,
-							struct cgroup_subsys *ss)
+static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
+						struct cgroup_subsys *ss)
 {
 	lockdep_assert_held(&cgroup_mutex);
 
@@ -522,35 +522,6 @@ static struct cgroup_subsys_state *cgroup_e_css_by_mask(struct cgroup *cgrp,
 	return cgroup_css(cgrp, ss);
 }
 
-/**
- * cgroup_e_css - obtain a cgroup's effective css for the specified subsystem
- * @cgrp: the cgroup of interest
- * @ss: the subsystem of interest
- *
- * Find and get the effective css of @cgrp for @ss.  The effective css is
- * defined as the matching css of the nearest ancestor including self which
- * has @ss enabled.  If @ss is not mounted on the hierarchy @cgrp is on,
- * the root css is returned, so this function always returns a valid css.
- *
- * The returned css is not guaranteed to be online, and therefore it is the
- * callers responsiblity to tryget a reference for it.
- */
-struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
-					 struct cgroup_subsys *ss)
-{
-	struct cgroup_subsys_state *css;
-
-	do {
-		css = cgroup_css(cgrp, ss);
-
-		if (css)
-			return css;
-		cgrp = cgroup_parent(cgrp);
-	} while (cgrp);
-
-	return init_css_set.subsys[ss->id];
-}
-
 /**
  * cgroup_get_e_css - get a cgroup's effective css for the specified subsystem
  * @cgrp: the cgroup of interest
@@ -633,11 +604,10 @@ EXPORT_SYMBOL_GPL(of_css);
  *
  * Should be called under cgroup_[tree_]mutex.
  */
-#define for_each_e_css(css, ssid, cgrp)					    \
-	for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++)	    \
-		if (!((css) = cgroup_e_css_by_mask(cgrp,		    \
-						   cgroup_subsys[(ssid)]))) \
-			;						    \
+#define for_each_e_css(css, ssid, cgrp)					\
+	for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++)	\
+		if (!((css) = cgroup_e_css(cgrp, cgroup_subsys[(ssid)]))) \
+			;						\
 		else
 
 /**
@@ -1036,7 +1006,7 @@ static struct css_set *find_existing_css_set(struct css_set *old_cset,
 			 * @ss is in this hierarchy, so we want the
 			 * effective css from @cgrp.
 			 */
-			template[i] = cgroup_e_css_by_mask(cgrp, ss);
+			template[i] = cgroup_e_css(cgrp, ss);
 		} else {
 			/*
 			 * @ss is not in this hierarchy, so we don't want
@@ -3053,7 +3023,7 @@ static int cgroup_apply_control(struct cgroup *cgrp)
 		return ret;
 
 	/*
-	 * At this point, cgroup_e_css_by_mask() results reflect the new csses
+	 * At this point, cgroup_e_css() results reflect the new csses
 	 * making the following cgroup_update_dfl_csses() properly update
 	 * css associations of all tasks in the subtree.
 	 */
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index fac0ddf8a8e22..2868d85f1fb1d 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -764,9 +764,9 @@ blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
 	if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
 		return NULL;
 
-	if (!bio->bi_blkg)
+	if (!bio->bi_css)
 		return NULL;
-	return cgroup_get_kernfs_id(bio_blkcg(bio)->css.cgroup);
+	return cgroup_get_kernfs_id(bio->bi_css->cgroup);
 }
 #else
 static union kernfs_node_id *
diff --git a/mm/page_io.c b/mm/page_io.c
index 573d3663d8462..aafd19ec1db46 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -339,7 +339,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
 		goto out;
 	}
 	bio->bi_opf = REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc);
-	bio_associate_blkg_from_page(bio, page);
+	bio_associate_blkcg_from_page(bio, page);
 	count_swpout_vm_event(page);
 	set_page_writeback(page);
 	unlock_page(page);
-- 
GitLab


From 76af016e65e74023a11b2710e18a98b68b86e3cd Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Thu, 1 Nov 2018 22:28:17 -0700
Subject: [PATCH 0463/1890] apparmor: fix checkpatch error in Parse secmark
 policy

Fix missed spacing error reported by checkpatch for
9caafbe2b4cf ("Parse secmark policy")

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/policy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c
index 8d846a747b846..df9c5890a8789 100644
--- a/security/apparmor/policy.c
+++ b/security/apparmor/policy.c
@@ -231,7 +231,7 @@ void aa_free_profile(struct aa_profile *profile)
 	for (i = 0; i < profile->xattr_count; i++)
 		kzfree(profile->xattrs[i]);
 	kzfree(profile->xattrs);
-	for (i=0; i < profile->secmark_count; i++)
+	for (i = 0; i < profile->secmark_count; i++)
 		kzfree(profile->secmark[i].label);
 	kzfree(profile->secmark);
 	kzfree(profile->dirname);
-- 
GitLab


From 566f52ece7bd1099d20dfe2f6f0801896643cf8f Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 30 Oct 2018 14:11:31 +0000
Subject: [PATCH 0464/1890] apparmor: clean an indentation issue, remove
 extraneous space

Trivial fix to clean up an indentation issue, remove space

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/apparmorfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index e09fe4d7307cd..8963203319ea4 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -1742,7 +1742,7 @@ static int ns_rmdir_op(struct inode *dir, struct dentry *dentry)
 	if (error)
 		return error;
 
-	 parent = aa_get_ns(dir->i_private);
+	parent = aa_get_ns(dir->i_private);
 	/* rmdir calls the generic securityfs functions to remove files
 	 * from the apparmor dir. It is up to the apparmor ns locking
 	 * to avoid races.
-- 
GitLab


From a8874e7e8a8896f2b6c641f4b8e2473eafd35204 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 31 Oct 2018 12:11:48 +0100
Subject: [PATCH 0465/1890] mm: make the __PAGETABLE_PxD_FOLDED defines
 non-empty

Change the currently empty defines for __PAGETABLE_PMD_FOLDED,
__PAGETABLE_PUD_FOLDED and __PAGETABLE_P4D_FOLDED to return 1.
This makes it possible to use __is_defined() to test if the
preprocessor define exists.

Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/arm/include/asm/pgtable-2level.h    | 2 +-
 arch/m68k/include/asm/pgtable_mm.h       | 4 ++--
 arch/microblaze/include/asm/pgtable.h    | 2 +-
 arch/nds32/include/asm/pgtable.h         | 2 +-
 arch/parisc/include/asm/pgtable.h        | 2 +-
 include/asm-generic/4level-fixup.h       | 2 +-
 include/asm-generic/5level-fixup.h       | 2 +-
 include/asm-generic/pgtable-nop4d-hack.h | 2 +-
 include/asm-generic/pgtable-nop4d.h      | 2 +-
 include/asm-generic/pgtable-nopmd.h      | 2 +-
 include/asm-generic/pgtable-nopud.h      | 2 +-
 11 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h
index 92fd2c8a9af06..12659ce5c1f38 100644
--- a/arch/arm/include/asm/pgtable-2level.h
+++ b/arch/arm/include/asm/pgtable-2level.h
@@ -10,7 +10,7 @@
 #ifndef _ASM_PGTABLE_2LEVEL_H
 #define _ASM_PGTABLE_2LEVEL_H
 
-#define __PAGETABLE_PMD_FOLDED
+#define __PAGETABLE_PMD_FOLDED 1
 
 /*
  * Hardware-wise, we have a two level page table structure, where the first
diff --git a/arch/m68k/include/asm/pgtable_mm.h b/arch/m68k/include/asm/pgtable_mm.h
index 6181e4134483c..fe3ddd73a0ccb 100644
--- a/arch/m68k/include/asm/pgtable_mm.h
+++ b/arch/m68k/include/asm/pgtable_mm.h
@@ -55,12 +55,12 @@
  */
 #ifdef CONFIG_SUN3
 #define PTRS_PER_PTE   16
-#define __PAGETABLE_PMD_FOLDED
+#define __PAGETABLE_PMD_FOLDED 1
 #define PTRS_PER_PMD   1
 #define PTRS_PER_PGD   2048
 #elif defined(CONFIG_COLDFIRE)
 #define PTRS_PER_PTE	512
-#define __PAGETABLE_PMD_FOLDED
+#define __PAGETABLE_PMD_FOLDED 1
 #define PTRS_PER_PMD	1
 #define PTRS_PER_PGD	1024
 #else
diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h
index f64ebb9c9a413..e14b6621c933e 100644
--- a/arch/microblaze/include/asm/pgtable.h
+++ b/arch/microblaze/include/asm/pgtable.h
@@ -63,7 +63,7 @@ extern int mem_init_done;
 
 #include <asm-generic/4level-fixup.h>
 
-#define __PAGETABLE_PMD_FOLDED
+#define __PAGETABLE_PMD_FOLDED 1
 
 #ifdef __KERNEL__
 #ifndef __ASSEMBLY__
diff --git a/arch/nds32/include/asm/pgtable.h b/arch/nds32/include/asm/pgtable.h
index d3e19a55cf530..9f52db930c004 100644
--- a/arch/nds32/include/asm/pgtable.h
+++ b/arch/nds32/include/asm/pgtable.h
@@ -4,7 +4,7 @@
 #ifndef _ASMNDS32_PGTABLE_H
 #define _ASMNDS32_PGTABLE_H
 
-#define __PAGETABLE_PMD_FOLDED
+#define __PAGETABLE_PMD_FOLDED 1
 #include <asm-generic/4level-fixup.h>
 #include <asm-generic/sizes.h>
 
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index b941ac7d4e70b..c7bb74e224360 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -111,7 +111,7 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
 #if CONFIG_PGTABLE_LEVELS == 3
 #define BITS_PER_PMD	(PAGE_SHIFT + PMD_ORDER - BITS_PER_PMD_ENTRY)
 #else
-#define __PAGETABLE_PMD_FOLDED
+#define __PAGETABLE_PMD_FOLDED 1
 #define BITS_PER_PMD	0
 #endif
 #define PTRS_PER_PMD    (1UL << BITS_PER_PMD)
diff --git a/include/asm-generic/4level-fixup.h b/include/asm-generic/4level-fixup.h
index 89f3b03b14451..e3667c9a33a5d 100644
--- a/include/asm-generic/4level-fixup.h
+++ b/include/asm-generic/4level-fixup.h
@@ -3,7 +3,7 @@
 #define _4LEVEL_FIXUP_H
 
 #define __ARCH_HAS_4LEVEL_HACK
-#define __PAGETABLE_PUD_FOLDED
+#define __PAGETABLE_PUD_FOLDED 1
 
 #define PUD_SHIFT			PGDIR_SHIFT
 #define PUD_SIZE			PGDIR_SIZE
diff --git a/include/asm-generic/5level-fixup.h b/include/asm-generic/5level-fixup.h
index 9c2e0708eb82f..73474bb52344d 100644
--- a/include/asm-generic/5level-fixup.h
+++ b/include/asm-generic/5level-fixup.h
@@ -3,7 +3,7 @@
 #define _5LEVEL_FIXUP_H
 
 #define __ARCH_HAS_5LEVEL_HACK
-#define __PAGETABLE_P4D_FOLDED
+#define __PAGETABLE_P4D_FOLDED 1
 
 #define P4D_SHIFT			PGDIR_SHIFT
 #define P4D_SIZE			PGDIR_SIZE
diff --git a/include/asm-generic/pgtable-nop4d-hack.h b/include/asm-generic/pgtable-nop4d-hack.h
index 0c34215263b8a..1d6dd38c0e5ea 100644
--- a/include/asm-generic/pgtable-nop4d-hack.h
+++ b/include/asm-generic/pgtable-nop4d-hack.h
@@ -5,7 +5,7 @@
 #ifndef __ASSEMBLY__
 #include <asm-generic/5level-fixup.h>
 
-#define __PAGETABLE_PUD_FOLDED
+#define __PAGETABLE_PUD_FOLDED 1
 
 /*
  * Having the pud type consist of a pgd gets the size right, and allows
diff --git a/include/asm-generic/pgtable-nop4d.h b/include/asm-generic/pgtable-nop4d.h
index 1a29b2a0282bf..04cb913797bc0 100644
--- a/include/asm-generic/pgtable-nop4d.h
+++ b/include/asm-generic/pgtable-nop4d.h
@@ -4,7 +4,7 @@
 
 #ifndef __ASSEMBLY__
 
-#define __PAGETABLE_P4D_FOLDED
+#define __PAGETABLE_P4D_FOLDED 1
 
 typedef struct { pgd_t pgd; } p4d_t;
 
diff --git a/include/asm-generic/pgtable-nopmd.h b/include/asm-generic/pgtable-nopmd.h
index f35f6e8149e47..b85b8271a73de 100644
--- a/include/asm-generic/pgtable-nopmd.h
+++ b/include/asm-generic/pgtable-nopmd.h
@@ -8,7 +8,7 @@
 
 struct mm_struct;
 
-#define __PAGETABLE_PMD_FOLDED
+#define __PAGETABLE_PMD_FOLDED 1
 
 /*
  * Having the pmd type consist of a pud gets the size right, and allows
diff --git a/include/asm-generic/pgtable-nopud.h b/include/asm-generic/pgtable-nopud.h
index e950b9c50f34f..9bef475db6fef 100644
--- a/include/asm-generic/pgtable-nopud.h
+++ b/include/asm-generic/pgtable-nopud.h
@@ -9,7 +9,7 @@
 #else
 #include <asm-generic/pgtable-nop4d.h>
 
-#define __PAGETABLE_PUD_FOLDED
+#define __PAGETABLE_PUD_FOLDED 1
 
 /*
  * Having the pud type consist of a p4d gets the size right, and allows
-- 
GitLab


From 1071fc5779d9846fec56a4ff6089ab08cac1ab72 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Mon, 15 Oct 2018 10:25:57 +0200
Subject: [PATCH 0466/1890] mm: introduce mm_[p4d|pud|pmd]_folded

Add three architecture overrideable functions to test if the
p4d, pud, or pmd layer of a page table is folded or not.

Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 include/asm-generic/pgtable.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 88ebc6102c7cc..15fd0277ffa69 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -1127,4 +1127,20 @@ static inline bool arch_has_pfn_modify_check(void)
 #endif
 #endif
 
+/*
+ * On some architectures it depends on the mm if the p4d/pud or pmd
+ * layer of the page table hierarchy is folded or not.
+ */
+#ifndef mm_p4d_folded
+#define mm_p4d_folded(mm)	__is_defined(__PAGETABLE_P4D_FOLDED)
+#endif
+
+#ifndef mm_pud_folded
+#define mm_pud_folded(mm)	__is_defined(__PAGETABLE_PUD_FOLDED)
+#endif
+
+#ifndef mm_pmd_folded
+#define mm_pmd_folded(mm)	__is_defined(__PAGETABLE_PMD_FOLDED)
+#endif
+
 #endif /* _ASM_GENERIC_PGTABLE_H */
-- 
GitLab


From 6d212db11947ae5464e4717536ed9faf61c01e86 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Mon, 15 Oct 2018 10:30:23 +0200
Subject: [PATCH 0467/1890] mm: add mm_pxd_folded checks to pgtable_bytes
 accounting functions

The common mm code calls mm_dec_nr_pmds() and mm_dec_nr_puds()
in free_pgtables() if the address range spans a full pud or pmd.
If mm_dec_nr_puds/mm_dec_nr_pmds are non-empty due to configuration
settings they blindly subtract the size of the pmd or pud table from
pgtable_bytes even if the pud or pmd page table layer is folded.

Add explicit mm_[pmd|pud]_folded checks to the four pgtable_bytes
accounting functions mm_inc_nr_puds, mm_inc_nr_pmds, mm_dec_nr_puds
and mm_dec_nr_pmds. As the check for folded page tables can be
overwritten by the architecture, this allows to keep a correct
pgtable_bytes value for platforms that use a dynamic number of
page table levels.

Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 include/linux/mm.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index daa2b8f1e9a88..a3701e91bb576 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1742,11 +1742,15 @@ int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address);
 
 static inline void mm_inc_nr_puds(struct mm_struct *mm)
 {
+	if (mm_pud_folded(mm))
+		return;
 	atomic_long_add(PTRS_PER_PUD * sizeof(pud_t), &mm->pgtables_bytes);
 }
 
 static inline void mm_dec_nr_puds(struct mm_struct *mm)
 {
+	if (mm_pud_folded(mm))
+		return;
 	atomic_long_sub(PTRS_PER_PUD * sizeof(pud_t), &mm->pgtables_bytes);
 }
 #endif
@@ -1766,11 +1770,15 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address);
 
 static inline void mm_inc_nr_pmds(struct mm_struct *mm)
 {
+	if (mm_pmd_folded(mm))
+		return;
 	atomic_long_add(PTRS_PER_PMD * sizeof(pmd_t), &mm->pgtables_bytes);
 }
 
 static inline void mm_dec_nr_pmds(struct mm_struct *mm)
 {
+	if (mm_pmd_folded(mm))
+		return;
 	atomic_long_sub(PTRS_PER_PMD * sizeof(pmd_t), &mm->pgtables_bytes);
 }
 #endif
-- 
GitLab


From e12e4044aede97974f2222eb7f0ed726a5179a32 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Mon, 15 Oct 2018 11:09:16 +0200
Subject: [PATCH 0468/1890] s390/mm: fix mis-accounting of pgtable_bytes

In case a fork or a clone system fails in copy_process and the error
handling does the mmput() at the bad_fork_cleanup_mm label, the
following warning messages will appear on the console:

  BUG: non-zero pgtables_bytes on freeing mm: 16384

The reason for that is the tricks we play with mm_inc_nr_puds() and
mm_inc_nr_pmds() in init_new_context().

A normal 64-bit process has 3 levels of page table, the p4d level and
the pud level are folded. On process termination the free_pud_range()
function in mm/memory.c will subtract 16KB from pgtable_bytes with a
mm_dec_nr_puds() call, but there actually is not really a pud table.

One issue with this is the fact that pgtable_bytes is usually off
by a few kilobytes, but the more severe problem is that for a failed
fork or clone the free_pgtables() function is not called. In this case
there is no mm_dec_nr_puds() or mm_dec_nr_pmds() that go together with
the mm_inc_nr_puds() and mm_inc_nr_pmds in init_new_context().
The pgtable_bytes will be off by 16384 or 32768 bytes and we get the
BUG message. The message itself is purely cosmetic, but annoying.

To fix this override the mm_pmd_folded, mm_pud_folded and mm_p4d_folded
function to check for the true size of the address space.

Reported-by: Li Wang <liwang@redhat.com>
Tested-by: Li Wang <liwang@redhat.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/mmu_context.h |  5 -----
 arch/s390/include/asm/pgalloc.h     |  6 +++---
 arch/s390/include/asm/pgtable.h     | 18 ++++++++++++++++++
 arch/s390/include/asm/tlb.h         |  6 +++---
 arch/s390/mm/pgalloc.c              |  1 +
 5 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index dbd689d556ce5..ccbb53e220240 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -46,8 +46,6 @@ static inline int init_new_context(struct task_struct *tsk,
 		mm->context.asce_limit = STACK_TOP_MAX;
 		mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
 				   _ASCE_USER_BITS | _ASCE_TYPE_REGION3;
-		/* pgd_alloc() did not account this pud */
-		mm_inc_nr_puds(mm);
 		break;
 	case -PAGE_SIZE:
 		/* forked 5-level task, set new asce with new_mm->pgd */
@@ -63,9 +61,6 @@ static inline int init_new_context(struct task_struct *tsk,
 		/* forked 2-level compat task, set new asce with new mm->pgd */
 		mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
 				   _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
-		/* pgd_alloc() did not account this pmd */
-		mm_inc_nr_pmds(mm);
-		mm_inc_nr_puds(mm);
 	}
 	crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
 	return 0;
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index f0f9bcf94c037..5ee733720a571 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -36,11 +36,11 @@ static inline void crst_table_init(unsigned long *crst, unsigned long entry)
 
 static inline unsigned long pgd_entry_type(struct mm_struct *mm)
 {
-	if (mm->context.asce_limit <= _REGION3_SIZE)
+	if (mm_pmd_folded(mm))
 		return _SEGMENT_ENTRY_EMPTY;
-	if (mm->context.asce_limit <= _REGION2_SIZE)
+	if (mm_pud_folded(mm))
 		return _REGION3_ENTRY_EMPTY;
-	if (mm->context.asce_limit <= _REGION1_SIZE)
+	if (mm_p4d_folded(mm))
 		return _REGION2_ENTRY_EMPTY;
 	return _REGION1_ENTRY_EMPTY;
 }
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 411d435e7a7d2..063732414dfbb 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -493,6 +493,24 @@ static inline int is_module_addr(void *addr)
 				   _REGION_ENTRY_PROTECT | \
 				   _REGION_ENTRY_NOEXEC)
 
+static inline bool mm_p4d_folded(struct mm_struct *mm)
+{
+	return mm->context.asce_limit <= _REGION1_SIZE;
+}
+#define mm_p4d_folded(mm) mm_p4d_folded(mm)
+
+static inline bool mm_pud_folded(struct mm_struct *mm)
+{
+	return mm->context.asce_limit <= _REGION2_SIZE;
+}
+#define mm_pud_folded(mm) mm_pud_folded(mm)
+
+static inline bool mm_pmd_folded(struct mm_struct *mm)
+{
+	return mm->context.asce_limit <= _REGION3_SIZE;
+}
+#define mm_pmd_folded(mm) mm_pmd_folded(mm)
+
 static inline int mm_has_pgste(struct mm_struct *mm)
 {
 #ifdef CONFIG_PGSTE
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 457b7ba0fbb66..b31c779cf5817 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -136,7 +136,7 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
 static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
 				unsigned long address)
 {
-	if (tlb->mm->context.asce_limit <= _REGION3_SIZE)
+	if (mm_pmd_folded(tlb->mm))
 		return;
 	pgtable_pmd_page_dtor(virt_to_page(pmd));
 	tlb_remove_table(tlb, pmd);
@@ -152,7 +152,7 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
 static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
 				unsigned long address)
 {
-	if (tlb->mm->context.asce_limit <= _REGION1_SIZE)
+	if (mm_p4d_folded(tlb->mm))
 		return;
 	tlb_remove_table(tlb, p4d);
 }
@@ -167,7 +167,7 @@ static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
 static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
 				unsigned long address)
 {
-	if (tlb->mm->context.asce_limit <= _REGION2_SIZE)
+	if (mm_pud_folded(tlb->mm))
 		return;
 	tlb_remove_table(tlb, pud);
 }
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 76d89ee8b4288..814f26520aa2c 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -101,6 +101,7 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
 			mm->context.asce_limit = _REGION1_SIZE;
 			mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
 				_ASCE_USER_BITS | _ASCE_TYPE_REGION2;
+			mm_inc_nr_puds(mm);
 		} else {
 			crst_table_init(table, _REGION1_ENTRY_EMPTY);
 			pgd_populate(mm, (pgd_t *) table, (p4d_t *) pgd);
-- 
GitLab


From c43e1c5a801fdde1aad0a2f9ed948753b5275d56 Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.ibm.com>
Date: Thu, 25 Oct 2018 09:04:05 +0100
Subject: [PATCH 0469/1890] s390/cpum_sf: Rework attribute definition for
 diagnostic sampling

Previously, the attribute entry for diagnostic sampling was added
if authorized.  Otherwise, the array of struct attribute contains
two NULL values.

Change this logic and reserve space for the attribute for diagnostic
sampling. If diagnostic sampling is authorized, add an entry in the
respective position in the array of struct attribute.

Signed-off-by: Thomas Richter <tmricht@linux.ibm.com>
Reviewed-by: Hendrik Brueckner <brueckner@linux.ibm.com>
Suggested-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/perf_cpum_sf.c | 33 ++++++++++++++++++++++++++++-----
 1 file changed, 28 insertions(+), 5 deletions(-)

diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 7bf604ff50a1b..bfabeb1889cc0 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -1842,10 +1842,30 @@ static void cpumsf_pmu_del(struct perf_event *event, int flags)
 CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC, PERF_EVENT_CPUM_SF);
 CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC_DIAG, PERF_EVENT_CPUM_SF_DIAG);
 
-static struct attribute *cpumsf_pmu_events_attr[] = {
-	CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC),
-	NULL,
-	NULL,
+/* Attribute list for CPU_SF.
+ *
+ * The availablitiy depends on the CPU_MF sampling facility authorization
+ * for basic + diagnositic samples. This is determined at initialization
+ * time by the sampling facility device driver.
+ * If the authorization for basic samples is turned off, it should be
+ * also turned off for diagnostic sampling.
+ *
+ * During initialization of the device driver, check the authorization
+ * level for diagnostic sampling and installs the attribute
+ * file for diagnostic sampling if necessary.
+ *
+ * For now install a placeholder to reference all possible attributes:
+ * SF_CYCLES_BASIC and SF_CYCLES_BASIC_DIAG.
+ * Add another entry for the final NULL pointer.
+ */
+enum {
+	SF_CYCLES_BASIC_ATTR_IDX = 0,
+	SF_CYCLES_BASIC_DIAG_ATTR_IDX,
+	SF_CYCLES_ATTR_MAX
+};
+
+static struct attribute *cpumsf_pmu_events_attr[SF_CYCLES_ATTR_MAX + 1] = {
+	[SF_CYCLES_BASIC_ATTR_IDX] = CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC)
 };
 
 PMU_FORMAT_ATTR(event, "config:0-63");
@@ -2040,7 +2060,10 @@ static int __init init_cpum_sampling_pmu(void)
 
 	if (si.ad) {
 		sfb_set_limits(CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
-		cpumsf_pmu_events_attr[1] =
+		/* Sampling of diagnostic data authorized,
+		 * install event into attribute list of PMU device.
+		 */
+		cpumsf_pmu_events_attr[SF_CYCLES_BASIC_DIAG_ATTR_IDX] =
 			CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG);
 	}
 
-- 
GitLab


From 9fed920e6817218ad786c3f28e14b4c877cc2aed Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Fri, 26 Oct 2018 15:29:59 +0200
Subject: [PATCH 0470/1890] s390/kasan: increase instrumented stack size to 64k

Increase kasan instrumented kernel stack size from 32k to 64k. Other
architectures seems to get away with just doubling kernel stack size under
kasan, but on s390 this appears to be not enough due to bigger frame size.
The particular pain point is kasan inlined checks (CONFIG_KASAN_INLINE
vs CONFIG_KASAN_OUTLINE). With inlined checks one particular case hitting
stack overflow is fs sync on xfs filesystem:

 #0 [9a0681e8]  704 bytes  check_usage at 34b1fc
 #1 [9a0684a8]  432 bytes  check_usage at 34c710
 #2 [9a068658]  1048 bytes  validate_chain at 35044a
 #3 [9a068a70]  312 bytes  __lock_acquire at 3559fe
 #4 [9a068ba8]  440 bytes  lock_acquire at 3576ee
 #5 [9a068d60]  104 bytes  _raw_spin_lock at 21b44e0
 #6 [9a068dc8]  1992 bytes  enqueue_entity at 2dbf72
 #7 [9a069590]  1496 bytes  enqueue_task_fair at 2df5f0
 #8 [9a069b68]  64 bytes  ttwu_do_activate at 28f438
 #9 [9a069ba8]  552 bytes  try_to_wake_up at 298c4c
 #10 [9a069dd0]  168 bytes  wake_up_worker at 23f97c
 #11 [9a069e78]  200 bytes  insert_work at 23fc2e
 #12 [9a069f40]  648 bytes  __queue_work at 2487c0
 #13 [9a06a1c8]  200 bytes  __queue_delayed_work at 24db28
 #14 [9a06a290]  248 bytes  mod_delayed_work_on at 24de84
 #15 [9a06a388]  24 bytes  kblockd_mod_delayed_work_on at 153e2a0
 #16 [9a06a3a0]  288 bytes  __blk_mq_delay_run_hw_queue at 158168c
 #17 [9a06a4c0]  192 bytes  blk_mq_run_hw_queue at 1581a3c
 #18 [9a06a580]  184 bytes  blk_mq_sched_insert_requests at 15a2192
 #19 [9a06a638]  1024 bytes  blk_mq_flush_plug_list at 1590f3a
 #20 [9a06aa38]  704 bytes  blk_flush_plug_list at 1555028
 #21 [9a06acf8]  320 bytes  schedule at 219e476
 #22 [9a06ae38]  760 bytes  schedule_timeout at 21b0aac
 #23 [9a06b130]  408 bytes  wait_for_common at 21a1706
 #24 [9a06b2c8]  360 bytes  xfs_buf_iowait at fa1540
 #25 [9a06b430]  256 bytes  __xfs_buf_submit at fadae6
 #26 [9a06b530]  264 bytes  xfs_buf_read_map at fae3f6
 #27 [9a06b638]  656 bytes  xfs_trans_read_buf_map at 10ac9a8
 #28 [9a06b8c8]  304 bytes  xfs_btree_kill_root at e72426
 #29 [9a06b9f8]  288 bytes  xfs_btree_lookup_get_block at e7bc5e
 #30 [9a06bb18]  624 bytes  xfs_btree_lookup at e7e1a6
 #31 [9a06bd88]  2664 bytes  xfs_alloc_ag_vextent_near at dfa070
 #32 [9a06c7f0]  144 bytes  xfs_alloc_ag_vextent at dff3ca
 #33 [9a06c880]  1128 bytes  xfs_alloc_vextent at e05fce
 #34 [9a06cce8]  584 bytes  xfs_bmap_btalloc at e58342
 #35 [9a06cf30]  1336 bytes  xfs_bmapi_write at e618de
 #36 [9a06d468]  776 bytes  xfs_iomap_write_allocate at ff678e
 #37 [9a06d770]  720 bytes  xfs_map_blocks at f82af8
 #38 [9a06da40]  928 bytes  xfs_writepage_map at f83cd6
 #39 [9a06dde0]  320 bytes  xfs_do_writepage at f85872
 #40 [9a06df20]  1320 bytes  write_cache_pages at 73dfe8
 #41 [9a06e448]  208 bytes  xfs_vm_writepages at f7f892
 #42 [9a06e518]  88 bytes  do_writepages at 73fe6a
 #43 [9a06e570]  872 bytes  __writeback_single_inode at a20cb6
 #44 [9a06e8d8]  664 bytes  writeback_sb_inodes at a23be2
 #45 [9a06eb70]  296 bytes  __writeback_inodes_wb at a242e0
 #46 [9a06ec98]  928 bytes  wb_writeback at a2500e
 #47 [9a06f038]  848 bytes  wb_do_writeback at a260ae
 #48 [9a06f388]  536 bytes  wb_workfn at a28228
 #49 [9a06f5a0]  1088 bytes  process_one_work at 24a234
 #50 [9a06f9e0]  1120 bytes  worker_thread at 24ba26
 #51 [9a06fe40]  104 bytes  kthread at 26545a
 #52 [9a06fea8]             kernel_thread_starter at 21b6b62

To be able to increase the stack size to 64k reuse LLILL instruction
in __switch_to function to load 64k - STACK_FRAME_OVERHEAD - __PT_SIZE
(65192) value as unsigned.

Reported-by: Benjamin Block <bblock@linux.ibm.com>
Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/Makefile                  | 2 +-
 arch/s390/include/asm/thread_info.h | 2 +-
 arch/s390/kernel/entry.S            | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 0b33577932c3b..e21053e5e0da2 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -27,7 +27,7 @@ KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-option,-ffreestanding)
 KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),-g)
 KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO_DWARF4), $(call cc-option, -gdwarf-4,))
 UTS_MACHINE	:= s390x
-STACK_SIZE	:= $(if $(CONFIG_KASAN),32768,16384)
+STACK_SIZE	:= $(if $(CONFIG_KASAN),65536,16384)
 CHECKFLAGS	+= -D__s390__ -D__s390x__
 
 export LD_BFD
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index 27248f42a03c4..ce4e17c9aad6f 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -14,7 +14,7 @@
  * General size of kernel stacks
  */
 #ifdef CONFIG_KASAN
-#define THREAD_SIZE_ORDER 3
+#define THREAD_SIZE_ORDER 4
 #else
 #define THREAD_SIZE_ORDER 2
 #endif
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 724fba4d09d2d..39191a0feed1c 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -236,10 +236,10 @@ ENTRY(__switch_to)
 	stmg	%r6,%r15,__SF_GPRS(%r15)	# store gprs of prev task
 	lghi	%r4,__TASK_stack
 	lghi	%r1,__TASK_thread
-	lg	%r5,0(%r4,%r3)			# start of kernel stack of next
+	llill	%r5,STACK_INIT
 	stg	%r15,__THREAD_ksp(%r1,%r2)	# store kernel stack of prev
-	lgr	%r15,%r5
-	aghi	%r15,STACK_INIT			# end of kernel stack of next
+	lg	%r15,0(%r4,%r3)			# start of kernel stack of next
+	agr	%r15,%r5			# end of kernel stack of next
 	stg	%r3,__LC_CURRENT		# store task struct of next
 	stg	%r15,__LC_KERNEL_STACK		# store end of kernel stack
 	lg	%r15,__THREAD_ksp(%r1,%r3)	# load kernel stack of next
-- 
GitLab


From a541f0ebcc08ed8bc0cc492eec9a86cb280a9f24 Mon Sep 17 00:00:00 2001
From: "Justin M. Forbes" <jforbes@fedoraproject.org>
Date: Wed, 31 Oct 2018 13:02:03 -0500
Subject: [PATCH 0471/1890] s390/mm: Fix ERROR: "__node_distance" undefined!

Fixes:
ERROR: "__node_distance" [drivers/nvme/host/nvme-core.ko] undefined!
make[1]: *** [scripts/Makefile.modpost:92: __modpost] Error 1
make: *** [Makefile:1275: modules] Error 2
+ exit 1

Signed-off-by: Justin M. Forbes <jforbes@fedoraproject.org>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/numa/numa.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/s390/numa/numa.c b/arch/s390/numa/numa.c
index 5bd374491f946..6c151b42e65db 100644
--- a/arch/s390/numa/numa.c
+++ b/arch/s390/numa/numa.c
@@ -54,6 +54,7 @@ int __node_distance(int a, int b)
 {
 	return mode->distance ? mode->distance(a, b) : 0;
 }
+EXPORT_SYMBOL(__node_distance);
 
 int numa_debug_enabled;
 
-- 
GitLab


From dafb7f9aef2fd44991ff1691721ff765a23be27b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Fri, 2 Nov 2018 06:36:32 -0400
Subject: [PATCH 0472/1890] v4l2-controls: add a missing include

As warned by "make headers_check", the definition for the linux-specific
integer types is missing:

	./usr/include/linux/v4l2-controls.h:1105: found __[us]{8,16,32,64} type without #include <linux/types.h>

Fixes: c27bb30e7b6d ("media: v4l: Add definitions for MPEG-2 slice format and metadata")
Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 include/uapi/linux/v4l2-controls.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index 51b095898f4b5..86a54916206fa 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -47,6 +47,8 @@
  *  videodev2.h.
  */
 
+#include <linux/types.h>
+
 #ifndef __LINUX_V4L2_CONTROLS_H
 #define __LINUX_V4L2_CONTROLS_H
 
-- 
GitLab


From 076f421da5d4594d0a3e60c032ccf02ba55e868a Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 30 Oct 2018 22:26:33 +0900
Subject: [PATCH 0473/1890] kbuild: replace cc-name test with
 CONFIG_CC_IS_CLANG

Evaluating cc-name invokes the compiler every time even when you are
not compiling anything, like 'make help'. This is not efficient.

The compiler type has been already detected in the Kconfig stage.
Use CONFIG_CC_IS_CLANG, instead.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Acked-by: Michael Ellerman <mpe@ellerman.id.au> (powerpc)
Acked-by: Paul Burton <paul.burton@mips.com> (MIPS)
Acked-by: Joel Stanley <joel@jms.id.au>
---
 Makefile                   | 2 +-
 arch/mips/Makefile         | 2 +-
 arch/mips/vdso/Makefile    | 2 +-
 arch/powerpc/Makefile      | 4 ++--
 scripts/Makefile.extrawarn | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/Makefile b/Makefile
index 9aa352b388158..8d6529c371e18 100644
--- a/Makefile
+++ b/Makefile
@@ -702,7 +702,7 @@ stackp-flags-$(CONFIG_STACKPROTECTOR_STRONG)      := -fstack-protector-strong
 
 KBUILD_CFLAGS += $(stackp-flags-y)
 
-ifeq ($(cc-name),clang)
+ifdef CONFIG_CC_IS_CLANG
 KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,)
 KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier)
 KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index 15a84cfd07191..68410490e12fd 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -128,7 +128,7 @@ cflags-y += -ffreestanding
 # clang's output will be based upon the build machine. So for clang we simply
 # unconditionally specify -EB or -EL as appropriate.
 #
-ifeq ($(cc-name),clang)
+ifdef CONFIG_CC_IS_CLANG
 cflags-$(CONFIG_CPU_BIG_ENDIAN)		+= -EB
 cflags-$(CONFIG_CPU_LITTLE_ENDIAN)	+= -EL
 else
diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile
index 34605ca214984..58a0315ad743d 100644
--- a/arch/mips/vdso/Makefile
+++ b/arch/mips/vdso/Makefile
@@ -10,7 +10,7 @@ ccflags-vdso := \
 	$(filter -march=%,$(KBUILD_CFLAGS)) \
 	-D__VDSO__
 
-ifeq ($(cc-name),clang)
+ifdef CONFIG_CC_IS_CLANG
 ccflags-vdso += $(filter --target=%,$(KBUILD_CFLAGS))
 endif
 
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 17be664dafa2f..8a2ce14d68d07 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -96,7 +96,7 @@ aflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(call cc-option,-mabi=elfv1)
 aflags-$(CONFIG_CPU_LITTLE_ENDIAN)	+= -mabi=elfv2
 endif
 
-ifneq ($(cc-name),clang)
+ifndef CONFIG_CC_IS_CLANG
   cflags-$(CONFIG_CPU_LITTLE_ENDIAN)	+= -mno-strict-align
 endif
 
@@ -175,7 +175,7 @@ endif
 # Work around gcc code-gen bugs with -pg / -fno-omit-frame-pointer in gcc <= 4.8
 # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=44199
 # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52828
-ifneq ($(cc-name),clang)
+ifndef CONFIG_CC_IS_CLANG
 CC_FLAGS_FTRACE	+= $(call cc-ifversion, -lt, 0409, -mno-sched-epilog)
 endif
 endif
diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn
index 24b2fb1d12974..800a10f1b4f39 100644
--- a/scripts/Makefile.extrawarn
+++ b/scripts/Makefile.extrawarn
@@ -64,7 +64,7 @@ endif
 KBUILD_CFLAGS += $(warning)
 else
 
-ifeq ($(cc-name),clang)
+ifdef CONFIG_CC_IS_CLANG
 KBUILD_CFLAGS += $(call cc-disable-warning, initializer-overrides)
 KBUILD_CFLAGS += $(call cc-disable-warning, unused-value)
 KBUILD_CFLAGS += $(call cc-disable-warning, format)
-- 
GitLab


From 995167420797c000bff1f0787ab2390ffa1a9784 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 30 Oct 2018 22:26:34 +0900
Subject: [PATCH 0474/1890] kbuild: remove cc-name variable

There is one more user of $(cc-name) in the top Makefile. It is supposed
to detect Clang before invoking Kconfig, so it should still be there
in the $(shell ...) form. All the other users of $(cc-name) have been
replaced with $(CONFIG_CC_IS_CLANG). Hence, scripts/Kbuild.include does
not need to define cc-name any more.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 Makefile               | 2 +-
 scripts/Kbuild.include | 4 ----
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/Makefile b/Makefile
index 8d6529c371e18..bce41f4180fcb 100644
--- a/Makefile
+++ b/Makefile
@@ -485,7 +485,7 @@ ifneq ($(KBUILD_SRC),)
 	$(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkmakefile $(srctree)
 endif
 
-ifeq ($(cc-name),clang)
+ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep clang),)
 ifneq ($(CROSS_COMPILE),)
 CLANG_TARGET	:= --target=$(notdir $(CROSS_COMPILE:%-=%))
 GCC_TOOLCHAIN_DIR := $(dir $(shell which $(LD)))
diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index 19a63db62fb9f..bb015551c2d9a 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -140,10 +140,6 @@ cc-option-yn = $(call try-run,\
 cc-disable-warning = $(call try-run,\
 	$(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1)))
 
-# cc-name
-# Expands to either gcc or clang
-cc-name = $(shell $(CC) -v 2>&1 | grep -q "clang version" && echo clang || echo gcc)
-
 # cc-version
 cc-version = $(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-version.sh $(CC))
 
-- 
GitLab


From d19b8bc82fc232d17ec45ca148388e4ba05ac4b9 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Sat, 27 Oct 2018 12:41:54 -0700
Subject: [PATCH 0475/1890] nvme-fc: fix request private initialization

The patch made to avoid Coverity reporting of out of bounds access
on aen_op moved the assignment of a pointer, leaving it null when it
was subsequently used to calculate a private pointer. Thus the private
pointer was bad.

Move/correct the private pointer initialization to be in sync with the
patch.

Fixes: 0d2bdf9f4134 ("nvme-fc: rework the request initialization code")
Signed-off-by: James Smart <jsmart2021@gmail.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/host/fc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index e52b9d3c0bd6c..0b70c8bab045a 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -1704,7 +1704,6 @@ __nvme_fc_init_request(struct nvme_fc_ctrl *ctrl,
 	op->fcp_req.rspaddr = &op->rsp_iu;
 	op->fcp_req.rsplen = sizeof(op->rsp_iu);
 	op->fcp_req.done = nvme_fc_fcpio_done;
-	op->fcp_req.private = &op->fcp_req.first_sgl[SG_CHUNK_SIZE];
 	op->ctrl = ctrl;
 	op->queue = queue;
 	op->rq = rq;
@@ -1752,6 +1751,7 @@ nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq,
 	if (res)
 		return res;
 	op->op.fcp_req.first_sgl = &op->sgl[0];
+	op->op.fcp_req.private = &op->priv[0];
 	return res;
 }
 
-- 
GitLab


From 9fe5c59ff6a1e5e26a39b75489a1420e7eaaf0b1 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Wed, 31 Oct 2018 13:15:29 -0600
Subject: [PATCH 0476/1890] nvme-pci: fix conflicting p2p resource adds

The nvme pci driver had been adding its CMB resource to the P2P DMA
subsystem everytime on on a controller reset. This results in the
following warning:

    ------------[ cut here ]------------
    nvme 0000:00:03.0: Conflicting mapping in same section
    WARNING: CPU: 7 PID: 81 at kernel/memremap.c:155 devm_memremap_pages+0xa6/0x380
    ...
    Call Trace:
     pci_p2pdma_add_resource+0x153/0x370
     nvme_reset_work+0x28c/0x17b1 [nvme]
     ? add_timer+0x107/0x1e0
     ? dequeue_entity+0x81/0x660
     ? dequeue_entity+0x3b0/0x660
     ? pick_next_task_fair+0xaf/0x610
     ? __switch_to+0xbc/0x410
     process_one_work+0x1cf/0x350
     worker_thread+0x215/0x3d0
     ? process_one_work+0x350/0x350
     kthread+0x107/0x120
     ? kthread_park+0x80/0x80
     ret_from_fork+0x1f/0x30
    ---[ end trace f7ea76ac6ee72727 ]---
    nvme nvme0: failed to register the CMB

This patch fixes this by registering the CMB with P2P only once.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/host/pci.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index f30031945ee4b..c33bb201b8846 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1663,6 +1663,9 @@ static void nvme_map_cmb(struct nvme_dev *dev)
 	struct pci_dev *pdev = to_pci_dev(dev->dev);
 	int bar;
 
+	if (dev->cmb_size)
+		return;
+
 	dev->cmbsz = readl(dev->bar + NVME_REG_CMBSZ);
 	if (!dev->cmbsz)
 		return;
@@ -2147,7 +2150,6 @@ static void nvme_pci_disable(struct nvme_dev *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev->dev);
 
-	nvme_release_cmb(dev);
 	pci_free_irq_vectors(pdev);
 
 	if (pci_is_enabled(pdev)) {
@@ -2595,6 +2597,7 @@ static void nvme_remove(struct pci_dev *pdev)
 	nvme_stop_ctrl(&dev->ctrl);
 	nvme_remove_namespaces(&dev->ctrl);
 	nvme_dev_disable(dev, true);
+	nvme_release_cmb(dev);
 	nvme_free_host_mem(dev);
 	nvme_dev_remove_admin(dev);
 	nvme_free_queues(dev, 0);
-- 
GitLab


From f9005571701920551bcf54a500973fb61f2e1eda Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefanos@xilinx.com>
Date: Wed, 31 Oct 2018 16:11:49 -0700
Subject: [PATCH 0477/1890] CONFIG_XEN_PV breaks xen_create_contiguous_region
 on ARM

xen_create_contiguous_region has now only an implementation if
CONFIG_XEN_PV is defined. However, on ARM we never set CONFIG_XEN_PV but
we do have an implementation of xen_create_contiguous_region which is
required for swiotlb-xen to work correctly (although it just sets
*dma_handle).

Cc: <stable@vger.kernel.org> # 4.12
Fixes: 16624390816c ("xen: create xen_create/destroy_contiguous_region() stubs for PVHVM only builds")
Signed-off-by: Stefano Stabellini <stefanos@xilinx.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
CC: Jeff.Kubascik@dornerworks.com
CC: Jarvis.Roach@dornerworks.com
CC: Nathan.Studer@dornerworks.com
CC: vkuznets@redhat.com
CC: boris.ostrovsky@oracle.com
CC: jgross@suse.com
CC: julien.grall@arm.com
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 include/xen/xen-ops.h | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
index 18803ff76e278..4969817124a8d 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -42,16 +42,12 @@ int xen_setup_shutdown_event(void);
 
 extern unsigned long *xen_contiguous_bitmap;
 
-#ifdef CONFIG_XEN_PV
+#if defined(CONFIG_XEN_PV) || defined(CONFIG_ARM) || defined(CONFIG_ARM64)
 int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
 				unsigned int address_bits,
 				dma_addr_t *dma_handle);
 
 void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order);
-
-int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr,
-		  xen_pfn_t *pfn, int nr, int *err_ptr, pgprot_t prot,
-		  unsigned int domid, bool no_translate, struct page **pages);
 #else
 static inline int xen_create_contiguous_region(phys_addr_t pstart,
 					       unsigned int order,
@@ -63,7 +59,13 @@ static inline int xen_create_contiguous_region(phys_addr_t pstart,
 
 static inline void xen_destroy_contiguous_region(phys_addr_t pstart,
 						 unsigned int order) { }
+#endif
 
+#if defined(CONFIG_XEN_PV)
+int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr,
+		  xen_pfn_t *pfn, int nr, int *err_ptr, pgprot_t prot,
+		  unsigned int domid, bool no_translate, struct page **pages);
+#else
 static inline int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr,
 				xen_pfn_t *pfn, int nr, int *err_ptr,
 				pgprot_t prot,  unsigned int domid,
-- 
GitLab


From 98356eb0ae499c63e78073ccedd9a5fc5c563288 Mon Sep 17 00:00:00 2001
From: Victor Kamensky <kamensky@cisco.com>
Date: Tue, 30 Oct 2018 16:37:10 -0700
Subject: [PATCH 0478/1890] arm64: makefile fix build of .i file in external
 module case

After 'a66649dab350 arm64: fix vdso-offsets.h dependency' if
one will try to build .i file in case of external kernel module,
build fails complaining that prepare0 target is missing. This
issue came up with SystemTap when it tries to build variety
of .i files for its own generated kernel modules trying to
figure given kernel features/capabilities.

The issue is that prepare0 is defined in top level Makefile
only if KBUILD_EXTMOD is not defined. .i file rule depends
on prepare and in case KBUILD_EXTMOD defined top level Makefile
contains empty rule for prepare. But after mentioned commit
arch/arm64/Makefile would introduce dependency on prepare0
through its own prepare target.

Fix it to put proper ifdef KBUILD_EXTMOD around code introduced
by mentioned commit. It matches what top level Makefile does.

Acked-by: Kevin Brodsky <kevin.brodsky@arm.com>
Signed-off-by: Victor Kamensky <kamensky@cisco.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/Makefile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 106039d25e2f7..3016ccb414686 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -147,6 +147,7 @@ archclean:
 	$(Q)$(MAKE) $(clean)=$(boot)
 	$(Q)$(MAKE) $(clean)=$(boot)/dts
 
+ifeq ($(KBUILD_EXTMOD),)
 # We need to generate vdso-offsets.h before compiling certain files in kernel/.
 # In order to do that, we should use the archprepare target, but we can't since
 # asm-offsets.h is included in some files used to generate vdso-offsets.h, and
@@ -156,6 +157,7 @@ archclean:
 prepare: vdso_prepare
 vdso_prepare: prepare0
 	$(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso include/generated/vdso-offsets.h
+endif
 
 define archhelp
   echo  '* Image.gz      - Compressed kernel image (arch/$(ARCH)/boot/Image.gz)'
-- 
GitLab


From 5900e02b5b8cac1792c790ac2c6adb695d530fda Mon Sep 17 00:00:00 2001
From: Yangtao Li <tiny.windzz@gmail.com>
Date: Fri, 2 Nov 2018 08:36:19 -0400
Subject: [PATCH 0479/1890] arm64: kdump: fix small typo

This brings the kernel doc in line with the function signature.

Signed-off-by: Yangtao Li <tiny.windzz@gmail.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/crash_dump.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/crash_dump.c b/arch/arm64/kernel/crash_dump.c
index f46d57c314430..6b5037ed15b28 100644
--- a/arch/arm64/kernel/crash_dump.c
+++ b/arch/arm64/kernel/crash_dump.c
@@ -58,7 +58,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
 /**
  * elfcorehdr_read - read from ELF core header
  * @buf: buffer where the data is placed
- * @csize: number of bytes to read
+ * @count: number of bytes to read
  * @ppos: address in the memory
  *
  * This function reads @count bytes from elf core header which exists
-- 
GitLab


From 966866892cf89d606544bca22d584ba2ef9ec208 Mon Sep 17 00:00:00 2001
From: Anders Roxell <anders.roxell@linaro.org>
Date: Tue, 30 Oct 2018 12:38:50 +0100
Subject: [PATCH 0480/1890] arm64: kprobe: make page to RO mode when allocate
 it

Commit 1404d6f13e47 ("arm64: dump: Add checking for writable and exectuable pages")
has successfully identified code that leaves a page with W+X
permissions.

[    3.245140] arm64/mm: Found insecure W+X mapping at address (____ptrval____)/0xffff000000d90000
[    3.245771] WARNING: CPU: 0 PID: 1 at ../arch/arm64/mm/dump.c:232 note_page+0x410/0x420
[    3.246141] Modules linked in:
[    3.246653] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.19.0-rc5-next-20180928-00001-ge70ae259b853-dirty #62
[    3.247008] Hardware name: linux,dummy-virt (DT)
[    3.247347] pstate: 80000005 (Nzcv daif -PAN -UAO)
[    3.247623] pc : note_page+0x410/0x420
[    3.247898] lr : note_page+0x410/0x420
[    3.248071] sp : ffff00000804bcd0
[    3.248254] x29: ffff00000804bcd0 x28: ffff000009274000
[    3.248578] x27: ffff00000921a000 x26: ffff80007dfff000
[    3.248845] x25: ffff0000093f5000 x24: ffff000009526f6a
[    3.249109] x23: 0000000000000004 x22: ffff000000d91000
[    3.249396] x21: ffff000000d90000 x20: 0000000000000000
[    3.249661] x19: ffff00000804bde8 x18: 0000000000000400
[    3.249924] x17: 0000000000000000 x16: 0000000000000000
[    3.250271] x15: ffffffffffffffff x14: 295f5f5f5f6c6176
[    3.250594] x13: 7274705f5f5f5f28 x12: 2073736572646461
[    3.250941] x11: 20746120676e6970 x10: 70616d20582b5720
[    3.251252] x9 : 6572756365736e69 x8 : 3039643030303030
[    3.251519] x7 : 306666666678302f x6 : ffff0000095467b2
[    3.251802] x5 : 0000000000000000 x4 : 0000000000000000
[    3.252060] x3 : 0000000000000000 x2 : ffffffffffffffff
[    3.252323] x1 : 4d151327adc50b00 x0 : 0000000000000000
[    3.252664] Call trace:
[    3.252953]  note_page+0x410/0x420
[    3.253186]  walk_pgd+0x12c/0x238
[    3.253417]  ptdump_check_wx+0x68/0xf8
[    3.253637]  mark_rodata_ro+0x68/0x98
[    3.253847]  kernel_init+0x38/0x160
[    3.254103]  ret_from_fork+0x10/0x18

kprobes allocates a writable executable page with module_alloc() in
order to store executable code.
Reworked to that when allocate a page it sets mode RO. Inspired by
commit 63fef14fc98a ("kprobes/x86: Make insn buffer always ROX and use text_poke()").

Suggested-by: Arnd Bergmann <arnd@arndb.de>
Suggested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Reviewed-by: Laura Abbott <labbott@redhat.com>
Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
[catalin.marinas@arm.com: removed unnecessary casts]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/probes/kprobes.c | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index 9b65132e789a5..2a5b338b25424 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -23,7 +23,9 @@
 #include <linux/slab.h>
 #include <linux/stop_machine.h>
 #include <linux/sched/debug.h>
+#include <linux/set_memory.h>
 #include <linux/stringify.h>
+#include <linux/vmalloc.h>
 #include <asm/traps.h>
 #include <asm/ptrace.h>
 #include <asm/cacheflush.h>
@@ -42,10 +44,21 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
 static void __kprobes
 post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *);
 
+static int __kprobes patch_text(kprobe_opcode_t *addr, u32 opcode)
+{
+	void *addrs[1];
+	u32 insns[1];
+
+	addrs[0] = addr;
+	insns[0] = opcode;
+
+	return aarch64_insn_patch_text(addrs, insns, 1);
+}
+
 static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
 {
 	/* prepare insn slot */
-	p->ainsn.api.insn[0] = cpu_to_le32(p->opcode);
+	patch_text(p->ainsn.api.insn, p->opcode);
 
 	flush_icache_range((uintptr_t) (p->ainsn.api.insn),
 			   (uintptr_t) (p->ainsn.api.insn) +
@@ -118,15 +131,15 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
 	return 0;
 }
 
-static int __kprobes patch_text(kprobe_opcode_t *addr, u32 opcode)
+void *alloc_insn_page(void)
 {
-	void *addrs[1];
-	u32 insns[1];
+	void *page;
 
-	addrs[0] = (void *)addr;
-	insns[0] = (u32)opcode;
+	page = vmalloc_exec(PAGE_SIZE);
+	if (page)
+		set_memory_ro((unsigned long)page, 1);
 
-	return aarch64_insn_patch_text(addrs, insns, 1);
+	return page;
 }
 
 /* arm kprobe: install breakpoint in text */
-- 
GitLab


From 832ad0e3da4510fd17f98804abe512ea9a747035 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 31 Oct 2018 08:41:34 +0000
Subject: [PATCH 0481/1890] soc: ti: QMSS: Fix usage of irq_set_affinity_hint

The Keystone QMSS driver is pretty damaged, in the sense that it
does things like this:

	irq_set_affinity_hint(irq, to_cpumask(&cpu_map));

where cpu_map is a local variable. As we leave the function, this
will point to nowhere-land, and things will end-up badly.

Instead, let's use a proper cpumask that gets allocated, giving
the driver a chance to actually work with things like irqbalance
as well as have a hypothetical 64bit future.

Cc: stable@vger.kernel.org
Acked-by: Santosh Shilimkar <ssantosh@kernel.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 drivers/soc/ti/knav_qmss.h       |  4 ++--
 drivers/soc/ti/knav_qmss_acc.c   | 10 +++++-----
 drivers/soc/ti/knav_qmss_queue.c | 22 +++++++++++++++-------
 3 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/drivers/soc/ti/knav_qmss.h b/drivers/soc/ti/knav_qmss.h
index 7c128132799e0..4c28fa938ac76 100644
--- a/drivers/soc/ti/knav_qmss.h
+++ b/drivers/soc/ti/knav_qmss.h
@@ -329,8 +329,8 @@ struct knav_range_ops {
 };
 
 struct knav_irq_info {
-	int	irq;
-	u32	cpu_map;
+	int		irq;
+	struct cpumask	*cpu_mask;
 };
 
 struct knav_range_info {
diff --git a/drivers/soc/ti/knav_qmss_acc.c b/drivers/soc/ti/knav_qmss_acc.c
index 316e82e46f6cb..2f7fb2dcc1d66 100644
--- a/drivers/soc/ti/knav_qmss_acc.c
+++ b/drivers/soc/ti/knav_qmss_acc.c
@@ -205,18 +205,18 @@ static int knav_range_setup_acc_irq(struct knav_range_info *range,
 {
 	struct knav_device *kdev = range->kdev;
 	struct knav_acc_channel *acc;
-	unsigned long cpu_map;
+	struct cpumask *cpu_mask;
 	int ret = 0, irq;
 	u32 old, new;
 
 	if (range->flags & RANGE_MULTI_QUEUE) {
 		acc = range->acc;
 		irq = range->irqs[0].irq;
-		cpu_map = range->irqs[0].cpu_map;
+		cpu_mask = range->irqs[0].cpu_mask;
 	} else {
 		acc = range->acc + queue;
 		irq = range->irqs[queue].irq;
-		cpu_map = range->irqs[queue].cpu_map;
+		cpu_mask = range->irqs[queue].cpu_mask;
 	}
 
 	old = acc->open_mask;
@@ -239,8 +239,8 @@ static int knav_range_setup_acc_irq(struct knav_range_info *range,
 			acc->name, acc->name);
 		ret = request_irq(irq, knav_acc_int_handler, 0, acc->name,
 				  range);
-		if (!ret && cpu_map) {
-			ret = irq_set_affinity_hint(irq, to_cpumask(&cpu_map));
+		if (!ret && cpu_mask) {
+			ret = irq_set_affinity_hint(irq, cpu_mask);
 			if (ret) {
 				dev_warn(range->kdev->dev,
 					 "Failed to set IRQ affinity\n");
diff --git a/drivers/soc/ti/knav_qmss_queue.c b/drivers/soc/ti/knav_qmss_queue.c
index b5d5673c255ca..8b418379272da 100644
--- a/drivers/soc/ti/knav_qmss_queue.c
+++ b/drivers/soc/ti/knav_qmss_queue.c
@@ -118,19 +118,17 @@ static int knav_queue_setup_irq(struct knav_range_info *range,
 			  struct knav_queue_inst *inst)
 {
 	unsigned queue = inst->id - range->queue_base;
-	unsigned long cpu_map;
 	int ret = 0, irq;
 
 	if (range->flags & RANGE_HAS_IRQ) {
 		irq = range->irqs[queue].irq;
-		cpu_map = range->irqs[queue].cpu_map;
 		ret = request_irq(irq, knav_queue_int_handler, 0,
 					inst->irq_name, inst);
 		if (ret)
 			return ret;
 		disable_irq(irq);
-		if (cpu_map) {
-			ret = irq_set_affinity_hint(irq, to_cpumask(&cpu_map));
+		if (range->irqs[queue].cpu_mask) {
+			ret = irq_set_affinity_hint(irq, range->irqs[queue].cpu_mask);
 			if (ret) {
 				dev_warn(range->kdev->dev,
 					 "Failed to set IRQ affinity\n");
@@ -1262,9 +1260,19 @@ static int knav_setup_queue_range(struct knav_device *kdev,
 
 		range->num_irqs++;
 
-		if (IS_ENABLED(CONFIG_SMP) && oirq.args_count == 3)
-			range->irqs[i].cpu_map =
-				(oirq.args[2] & 0x0000ff00) >> 8;
+		if (IS_ENABLED(CONFIG_SMP) && oirq.args_count == 3) {
+			unsigned long mask;
+			int bit;
+
+			range->irqs[i].cpu_mask = devm_kzalloc(dev,
+							       cpumask_size(), GFP_KERNEL);
+			if (!range->irqs[i].cpu_mask)
+				return -ENOMEM;
+
+			mask = (oirq.args[2] & 0x0000ff00) >> 8;
+			for_each_set_bit(bit, &mask, BITS_PER_LONG)
+				cpumask_set_cpu(bit, range->irqs[i].cpu_mask);
+		}
 	}
 
 	range->num_irqs = min(range->num_irqs, range->num_queues);
-- 
GitLab


From f62717551b2b7d72fc2a3975539117d350bad84d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 30 Oct 2018 09:41:29 +0200
Subject: [PATCH 0482/1890] arm64: fix warnings without CONFIG_IOMMU_DMA

__swiotlb_get_sgtable_page and __swiotlb_mmap_pfn are not only misnamed
but also only used if CONFIG_IOMMU_DMA is set.  Just add a simple ifdef
for now, given that we plan to remove them entirely for the next merge
window.

Reported-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Tested-by: Will Deacon <will.deacon@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/dma-mapping.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 3a703e5d4e323..a3ac262848451 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -160,6 +160,7 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
 	__dma_unmap_area(phys_to_virt(paddr), size, dir);
 }
 
+#ifdef CONFIG_IOMMU_DMA
 static int __swiotlb_get_sgtable_page(struct sg_table *sgt,
 				      struct page *page, size_t size)
 {
@@ -188,6 +189,7 @@ static int __swiotlb_mmap_pfn(struct vm_area_struct *vma,
 
 	return ret;
 }
+#endif /* CONFIG_IOMMU_DMA */
 
 static int __init atomic_pool_init(void)
 {
-- 
GitLab


From 7f3d08f5255b964827214e69967b4d5008d6ea18 Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Fri, 19 Oct 2018 00:17:07 +0800
Subject: [PATCH 0483/1890] arm64: defconfig: Enable some IPMI configs

The arm64 port now runs on servers which use IPMI. This patch enables
relevant core configs to save manually enabling them when testing
mainline.

Signed-off-by: John Garry <john.garry@huawei.com>
[olof: Switched to =m instead of =y]
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 arch/arm64/configs/defconfig | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 3cb995606e605..c9a57d11330b8 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -308,6 +308,9 @@ CONFIG_SERIAL_XILINX_PS_UART_CONSOLE=y
 CONFIG_SERIAL_MVEBU_UART=y
 CONFIG_SERIAL_DEV_BUS=y
 CONFIG_VIRTIO_CONSOLE=y
+CONFIG_IPMI_HANDLER=m
+CONFIG_IPMI_DEVICE_INTERFACE=m
+CONFIG_IPMI_SI=m
 CONFIG_TCG_TPM=y
 CONFIG_TCG_TIS_I2C_INFINEON=y
 CONFIG_I2C_CHARDEV=y
-- 
GitLab


From 513eb98595522bc0cb83831a9daee1d5738e66f1 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Fri, 19 Oct 2018 11:24:26 +0200
Subject: [PATCH 0484/1890] ARM: defconfig: Update multi_v7 to use PREEMPT

Using CONFIG_PREEMPT as preemption model for ARMv7 systems
appear to be the most reasonable default.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 arch/arm/configs/multi_v7_defconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index 63af6234c1b69..1c7616815a86a 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -1,6 +1,7 @@
 CONFIG_SYSVIPC=y
 CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
+CONFIG_PREEMPT=y
 CONFIG_CGROUPS=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_EMBEDDED=y
-- 
GitLab


From a7ad38b0dd3c1ba8d6e5a55241e875e9db8331ab Mon Sep 17 00:00:00 2001
From: Guo Ren <ren_guo@c-sky.com>
Date: Sat, 3 Nov 2018 00:51:28 +0800
Subject: [PATCH 0485/1890] clocksource/drivers/c-sky: Add C-SKY SMP timer

The driver is for C-SKY SMP timer. It only supports oneshot event
and 32bit overflow for clocksource. Per cpu core has one timer and
all timers share one clock-counter-input from the same clocksource.

This use mfcr&mtcr instructions to access the regs.

Signed-off-by: Guo Ren <ren_guo@c-sky.com>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/clocksource/Kconfig         |  10 ++
 drivers/clocksource/Makefile        |   1 +
 drivers/clocksource/timer-mp-csky.c | 173 ++++++++++++++++++++++++++++
 include/linux/cpuhotplug.h          |   1 +
 4 files changed, 185 insertions(+)
 create mode 100644 drivers/clocksource/timer-mp-csky.c

diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index a11f4ba98b05c..591c9a8649a5e 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -620,4 +620,14 @@ config RISCV_TIMER
 	  is accessed via both the SBI and the rdcycle instruction.  This is
 	  required for all RISC-V systems.
 
+config CSKY_MP_TIMER
+	bool "SMP Timer for the C-SKY platform" if COMPILE_TEST
+	depends on CSKY
+	select TIMER_OF
+	help
+	  Say yes here to enable C-SKY SMP timer driver used for C-SKY SMP
+	  system.
+	  csky,mptimer is not only used in SMP system, it also could be used
+	  single core system. It's not a mmio reg and it use mtcr/mfcr instruction.
+
 endmenu
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index db51b2427e8a6..5ce82d39cda7d 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -79,3 +79,4 @@ obj-$(CONFIG_CLKSRC_ST_LPC)		+= clksrc_st_lpc.o
 obj-$(CONFIG_X86_NUMACHIP)		+= numachip.o
 obj-$(CONFIG_ATCPIT100_TIMER)		+= timer-atcpit100.o
 obj-$(CONFIG_RISCV_TIMER)		+= riscv_timer.o
+obj-$(CONFIG_CSKY_MP_TIMER)		+= timer-mp-csky.o
diff --git a/drivers/clocksource/timer-mp-csky.c b/drivers/clocksource/timer-mp-csky.c
new file mode 100644
index 0000000000000..a8acc431a774c
--- /dev/null
+++ b/drivers/clocksource/timer-mp-csky.c
@@ -0,0 +1,173 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
+
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/sched_clock.h>
+#include <linux/cpu.h>
+#include <linux/of_irq.h>
+#include <asm/reg_ops.h>
+
+#include "timer-of.h"
+
+#define PTIM_CCVR	"cr<3, 14>"
+#define PTIM_CTLR	"cr<0, 14>"
+#define PTIM_LVR	"cr<6, 14>"
+#define PTIM_TSR	"cr<1, 14>"
+
+static int csky_mptimer_irq;
+
+static int csky_mptimer_set_next_event(unsigned long delta,
+				       struct clock_event_device *ce)
+{
+	mtcr(PTIM_LVR, delta);
+
+	return 0;
+}
+
+static int csky_mptimer_shutdown(struct clock_event_device *ce)
+{
+	mtcr(PTIM_CTLR, 0);
+
+	return 0;
+}
+
+static int csky_mptimer_oneshot(struct clock_event_device *ce)
+{
+	mtcr(PTIM_CTLR, 1);
+
+	return 0;
+}
+
+static int csky_mptimer_oneshot_stopped(struct clock_event_device *ce)
+{
+	mtcr(PTIM_CTLR, 0);
+
+	return 0;
+}
+
+static DEFINE_PER_CPU(struct timer_of, csky_to) = {
+	.flags					= TIMER_OF_CLOCK,
+	.clkevt = {
+		.rating				= 300,
+		.features			= CLOCK_EVT_FEAT_PERCPU |
+						  CLOCK_EVT_FEAT_ONESHOT,
+		.set_state_shutdown		= csky_mptimer_shutdown,
+		.set_state_oneshot		= csky_mptimer_oneshot,
+		.set_state_oneshot_stopped	= csky_mptimer_oneshot_stopped,
+		.set_next_event			= csky_mptimer_set_next_event,
+	},
+};
+
+static irqreturn_t csky_timer_interrupt(int irq, void *dev)
+{
+	struct timer_of *to = this_cpu_ptr(&csky_to);
+
+	mtcr(PTIM_TSR, 0);
+
+	to->clkevt.event_handler(&to->clkevt);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * clock event for percpu
+ */
+static int csky_mptimer_starting_cpu(unsigned int cpu)
+{
+	struct timer_of *to = per_cpu_ptr(&csky_to, cpu);
+
+	to->clkevt.cpumask = cpumask_of(cpu);
+
+	clockevents_config_and_register(&to->clkevt, timer_of_rate(to),
+					2, ULONG_MAX);
+
+	enable_percpu_irq(csky_mptimer_irq, 0);
+
+	return 0;
+}
+
+static int csky_mptimer_dying_cpu(unsigned int cpu)
+{
+	disable_percpu_irq(csky_mptimer_irq);
+
+	return 0;
+}
+
+/*
+ * clock source
+ */
+static u64 sched_clock_read(void)
+{
+	return (u64)mfcr(PTIM_CCVR);
+}
+
+static u64 clksrc_read(struct clocksource *c)
+{
+	return (u64)mfcr(PTIM_CCVR);
+}
+
+struct clocksource csky_clocksource = {
+	.name	= "csky",
+	.rating	= 400,
+	.mask	= CLOCKSOURCE_MASK(32),
+	.flags	= CLOCK_SOURCE_IS_CONTINUOUS,
+	.read	= clksrc_read,
+};
+
+static int __init csky_mptimer_init(struct device_node *np)
+{
+	int ret, cpu, cpu_rollback;
+	struct timer_of *to = NULL;
+
+	/*
+	 * Csky_mptimer is designed for C-SKY SMP multi-processors and
+	 * every core has it's own private irq and regs for clkevt and
+	 * clksrc.
+	 *
+	 * The regs is accessed by cpu instruction: mfcr/mtcr instead of
+	 * mmio map style. So we needn't mmio-address in dts, but we still
+	 * need to give clk and irq number.
+	 *
+	 * We use private irq for the mptimer and irq number is the same
+	 * for every core. So we use request_percpu_irq() in timer_of_init.
+	 */
+	csky_mptimer_irq = irq_of_parse_and_map(np, 0);
+	if (csky_mptimer_irq <= 0)
+		return -EINVAL;
+
+	ret = request_percpu_irq(csky_mptimer_irq, csky_timer_interrupt,
+				 "csky_mp_timer", &csky_to);
+	if (ret)
+		return -EINVAL;
+
+	for_each_possible_cpu(cpu) {
+		to = per_cpu_ptr(&csky_to, cpu);
+		ret = timer_of_init(np, to);
+		if (ret)
+			goto rollback;
+	}
+
+	clocksource_register_hz(&csky_clocksource, timer_of_rate(to));
+	sched_clock_register(sched_clock_read, 32, timer_of_rate(to));
+
+	ret = cpuhp_setup_state(CPUHP_AP_CSKY_TIMER_STARTING,
+				"clockevents/csky/timer:starting",
+				csky_mptimer_starting_cpu,
+				csky_mptimer_dying_cpu);
+	if (ret)
+		return -EINVAL;
+
+	return 0;
+
+rollback:
+	for_each_possible_cpu(cpu_rollback) {
+		if (cpu_rollback == cpu)
+			break;
+
+		to = per_cpu_ptr(&csky_to, cpu_rollback);
+		timer_of_cleanup(to);
+	}
+	return -EINVAL;
+}
+TIMER_OF_DECLARE(csky_mptimer, "csky,mptimer", csky_mptimer_init);
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index caf40ad0bbc6e..e0cd2baa83809 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -126,6 +126,7 @@ enum cpuhp_state {
 	CPUHP_AP_MIPS_GIC_TIMER_STARTING,
 	CPUHP_AP_ARC_TIMER_STARTING,
 	CPUHP_AP_RISCV_TIMER_STARTING,
+	CPUHP_AP_CSKY_TIMER_STARTING,
 	CPUHP_AP_KVM_STARTING,
 	CPUHP_AP_KVM_ARM_VGIC_INIT_STARTING,
 	CPUHP_AP_KVM_ARM_VGIC_STARTING,
-- 
GitLab


From 732e5dca374d764bd9711c8c121b0e18ff92985e Mon Sep 17 00:00:00 2001
From: Guo Ren <ren_guo@c-sky.com>
Date: Sat, 3 Nov 2018 00:51:29 +0800
Subject: [PATCH 0486/1890] dt-bindings: timer: C-SKY Multi-processor timer

Dt-bingdings doc for C-SKY SMP system setting.

Signed-off-by: Guo Ren <ren_guo@c-sky.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 .../bindings/timer/csky,mptimer.txt           | 42 +++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/timer/csky,mptimer.txt

diff --git a/Documentation/devicetree/bindings/timer/csky,mptimer.txt b/Documentation/devicetree/bindings/timer/csky,mptimer.txt
new file mode 100644
index 0000000000000..15cfec08fbb8e
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/csky,mptimer.txt
@@ -0,0 +1,42 @@
+============================
+C-SKY Multi-processors Timer
+============================
+
+C-SKY multi-processors timer is designed for C-SKY SMP system and the
+regs is accessed by cpu co-processor 4 registers with mtcr/mfcr.
+
+ - PTIM_CTLR "cr<0, 14>" Control reg to start reset timer.
+ - PTIM_TSR  "cr<1, 14>" Interrupt cleanup status reg.
+ - PTIM_CCVR "cr<3, 14>" Current counter value reg.
+ - PTIM_LVR  "cr<6, 14>" Window value reg to triger next event.
+
+==============================
+timer node bindings definition
+==============================
+
+	Description: Describes SMP timer
+
+	PROPERTIES
+
+	- compatible
+		Usage: required
+		Value type: <string>
+		Definition: must be "csky,mptimer"
+	- clocks
+		Usage: required
+		Value type: <node>
+		Definition: must be input clk node
+	- interrupts
+		Usage: required
+		Value type: <u32>
+		Definition: must be timer irq num defined by soc
+
+Examples:
+---------
+
+	timer: timer {
+		compatible = "csky,mptimer";
+		clocks = <&dummy_apb_clk>;
+		interrupts = <16>;
+		interrupt-parent = <&intc>;
+	};
-- 
GitLab


From 33745c3cc588d9d5e18d6fd88709002158dddd5e Mon Sep 17 00:00:00 2001
From: Guo Ren <ren_guo@c-sky.com>
Date: Sat, 3 Nov 2018 00:51:30 +0800
Subject: [PATCH 0487/1890] clocksource/drivers/c-sky: Add gx6605s SOC system
 timer

The driver is for gx6605s SOC system timer and there are two
same timers in gx6605s. We use one for clkevt and another one for
clksrc.

The timer is mmio map to access, so we need give mmio address in dts.

The counter at 0x0  offset is clock event.
The counter at 0x40 offset is clock source.

Signed-off-by: Guo Ren <ren_guo@c-sky.com>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/clocksource/Kconfig         |   8 ++
 drivers/clocksource/Makefile        |   1 +
 drivers/clocksource/timer-gx6605s.c | 154 ++++++++++++++++++++++++++++
 3 files changed, 163 insertions(+)
 create mode 100644 drivers/clocksource/timer-gx6605s.c

diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index 591c9a8649a5e..55c77e44bb2db 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -630,4 +630,12 @@ config CSKY_MP_TIMER
 	  csky,mptimer is not only used in SMP system, it also could be used
 	  single core system. It's not a mmio reg and it use mtcr/mfcr instruction.
 
+config GX6605S_TIMER
+	bool "Gx6605s SOC system timer driver" if COMPILE_TEST
+	depends on CSKY
+	select CLKSRC_MMIO
+	select TIMER_OF
+	help
+	  This option enables support for gx6605s SOC's timer.
+
 endmenu
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index 5ce82d39cda7d..919633133271d 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -80,3 +80,4 @@ obj-$(CONFIG_X86_NUMACHIP)		+= numachip.o
 obj-$(CONFIG_ATCPIT100_TIMER)		+= timer-atcpit100.o
 obj-$(CONFIG_RISCV_TIMER)		+= riscv_timer.o
 obj-$(CONFIG_CSKY_MP_TIMER)		+= timer-mp-csky.o
+obj-$(CONFIG_GX6605S_TIMER)		+= timer-gx6605s.o
diff --git a/drivers/clocksource/timer-gx6605s.c b/drivers/clocksource/timer-gx6605s.c
new file mode 100644
index 0000000000000..80d0939d040b5
--- /dev/null
+++ b/drivers/clocksource/timer-gx6605s.c
@@ -0,0 +1,154 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
+
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/sched_clock.h>
+
+#include "timer-of.h"
+
+#define CLKSRC_OFFSET	0x40
+
+#define TIMER_STATUS	0x00
+#define TIMER_VALUE	0x04
+#define TIMER_CONTRL	0x10
+#define TIMER_CONFIG	0x20
+#define TIMER_DIV	0x24
+#define TIMER_INI	0x28
+
+#define GX6605S_STATUS_CLR	BIT(0)
+#define GX6605S_CONTRL_RST	BIT(0)
+#define GX6605S_CONTRL_START	BIT(1)
+#define GX6605S_CONFIG_EN	BIT(0)
+#define GX6605S_CONFIG_IRQ_EN	BIT(1)
+
+static irqreturn_t gx6605s_timer_interrupt(int irq, void *dev)
+{
+	struct clock_event_device *ce = dev;
+	void __iomem *base = timer_of_base(to_timer_of(ce));
+
+	writel_relaxed(GX6605S_STATUS_CLR, base + TIMER_STATUS);
+
+	ce->event_handler(ce);
+
+	return IRQ_HANDLED;
+}
+
+static int gx6605s_timer_set_oneshot(struct clock_event_device *ce)
+{
+	void __iomem *base = timer_of_base(to_timer_of(ce));
+
+	/* reset and stop counter */
+	writel_relaxed(GX6605S_CONTRL_RST, base + TIMER_CONTRL);
+
+	/* enable with irq and start */
+	writel_relaxed(GX6605S_CONFIG_EN | GX6605S_CONFIG_IRQ_EN,
+		       base + TIMER_CONFIG);
+
+	return 0;
+}
+
+static int gx6605s_timer_set_next_event(unsigned long delta,
+					struct clock_event_device *ce)
+{
+	void __iomem *base = timer_of_base(to_timer_of(ce));
+
+	/* use reset to pause timer */
+	writel_relaxed(GX6605S_CONTRL_RST, base + TIMER_CONTRL);
+
+	/* config next timeout value */
+	writel_relaxed(ULONG_MAX - delta, base + TIMER_INI);
+	writel_relaxed(GX6605S_CONTRL_START, base + TIMER_CONTRL);
+
+	return 0;
+}
+
+static int gx6605s_timer_shutdown(struct clock_event_device *ce)
+{
+	void __iomem *base = timer_of_base(to_timer_of(ce));
+
+	writel_relaxed(0, base + TIMER_CONTRL);
+	writel_relaxed(0, base + TIMER_CONFIG);
+
+	return 0;
+}
+
+static struct timer_of to = {
+	.flags = TIMER_OF_IRQ | TIMER_OF_BASE | TIMER_OF_CLOCK,
+	.clkevt = {
+		.rating			= 300,
+		.features		= CLOCK_EVT_FEAT_DYNIRQ |
+					  CLOCK_EVT_FEAT_ONESHOT,
+		.set_state_shutdown	= gx6605s_timer_shutdown,
+		.set_state_oneshot	= gx6605s_timer_set_oneshot,
+		.set_next_event		= gx6605s_timer_set_next_event,
+		.cpumask		= cpu_possible_mask,
+	},
+	.of_irq = {
+		.handler		= gx6605s_timer_interrupt,
+		.flags			= IRQF_TIMER | IRQF_IRQPOLL,
+	},
+};
+
+static u64 notrace gx6605s_sched_clock_read(void)
+{
+	void __iomem *base;
+
+	base = timer_of_base(&to) + CLKSRC_OFFSET;
+
+	return (u64)readl_relaxed(base + TIMER_VALUE);
+}
+
+static void gx6605s_clkevt_init(void __iomem *base)
+{
+	writel_relaxed(0, base + TIMER_DIV);
+	writel_relaxed(0, base + TIMER_CONFIG);
+
+	clockevents_config_and_register(&to.clkevt, timer_of_rate(&to), 2,
+					ULONG_MAX);
+}
+
+static int gx6605s_clksrc_init(void __iomem *base)
+{
+	writel_relaxed(0, base + TIMER_DIV);
+	writel_relaxed(0, base + TIMER_INI);
+
+	writel_relaxed(GX6605S_CONTRL_RST, base + TIMER_CONTRL);
+
+	writel_relaxed(GX6605S_CONFIG_EN, base + TIMER_CONFIG);
+
+	writel_relaxed(GX6605S_CONTRL_START, base + TIMER_CONTRL);
+
+	sched_clock_register(gx6605s_sched_clock_read, 32, timer_of_rate(&to));
+
+	return clocksource_mmio_init(base + TIMER_VALUE, "gx6605s",
+			timer_of_rate(&to), 200, 32, clocksource_mmio_readl_up);
+}
+
+static int __init gx6605s_timer_init(struct device_node *np)
+{
+	int ret;
+
+	/*
+	 * The timer driver is for nationalchip gx6605s SOC and there are two
+	 * same timer in gx6605s. We use one for clkevt and another for clksrc.
+	 *
+	 * The timer is mmio map to access, so we need give mmio address in dts.
+	 *
+	 * It provides a 32bit countup timer and interrupt will be caused by
+	 * count-overflow.
+	 * So we need set-next-event by ULONG_MAX - delta in TIMER_INI reg.
+	 *
+	 * The counter at 0x0  offset is clock event.
+	 * The counter at 0x40 offset is clock source.
+	 * They are the same in hardware, just different used by driver.
+	 */
+	ret = timer_of_init(np, &to);
+	if (ret)
+		return ret;
+
+	gx6605s_clkevt_init(timer_of_base(&to));
+
+	return gx6605s_clksrc_init(timer_of_base(&to) + CLKSRC_OFFSET);
+}
+TIMER_OF_DECLARE(csky_gx6605s_timer, "csky,gx6605s-timer", gx6605s_timer_init);
-- 
GitLab


From ab1e77c3f590536b2659c097980efe6e8f713921 Mon Sep 17 00:00:00 2001
From: Guo Ren <ren_guo@c-sky.com>
Date: Sat, 3 Nov 2018 00:51:31 +0800
Subject: [PATCH 0488/1890] dt-bindings: timer: gx6605s SOC timer

Dt-bindings doc for gx6605s SOC's system timer.

Signed-off-by: Guo Ren <ren_guo@c-sky.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 .../bindings/timer/csky,gx6605s-timer.txt     | 42 +++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/timer/csky,gx6605s-timer.txt

diff --git a/Documentation/devicetree/bindings/timer/csky,gx6605s-timer.txt b/Documentation/devicetree/bindings/timer/csky,gx6605s-timer.txt
new file mode 100644
index 0000000000000..6b04344f4beaa
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/csky,gx6605s-timer.txt
@@ -0,0 +1,42 @@
+=================
+gx6605s SOC Timer
+=================
+
+The timer is used in gx6605s soc as system timer and the driver
+contain clk event and clk source.
+
+==============================
+timer node bindings definition
+==============================
+
+	Description: Describes gx6605s SOC timer
+
+	PROPERTIES
+
+	- compatible
+		Usage: required
+		Value type: <string>
+		Definition: must be "csky,gx6605s-timer"
+	- reg
+		Usage: required
+		Value type: <u32 u32>
+		Definition: <phyaddr size> in soc from cpu view
+	- clocks
+		Usage: required
+		Value type: phandle + clock specifier cells
+		Definition: must be input clk node
+	- interrupt
+		Usage: required
+		Value type: <u32>
+		Definition: must be timer irq num defined by soc
+
+Examples:
+---------
+
+	timer0: timer@20a000 {
+		compatible = "csky,gx6605s-timer";
+		reg = <0x0020a000 0x400>;
+		clocks = <&dummy_apb_clk>;
+		interrupts = <10>;
+		interrupt-parent = <&intc>;
+	};
-- 
GitLab


From 0c5d6cb6643f48ad3775322f3ebab6c7eb67484e Mon Sep 17 00:00:00 2001
From: Ronnie Sahlberg <lsahlber@redhat.com>
Date: Thu, 25 Oct 2018 15:43:36 +1000
Subject: [PATCH 0489/1890] cifs: fix return value for cifs_listxattr

If the application buffer was too small to fit all the names
we would still count the number of bytes and return this for
listxattr. This would then trigger a BUG in usercopy.c

Fix the computation of the size so that we return -ERANGE
correctly when the buffer is too small.

This fixes the kernel BUG for xfstest generic/377

Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
---
 fs/cifs/smb2ops.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 23c0a21a66f82..8489b5f706c3e 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -747,6 +747,7 @@ move_smb2_ea_to_cifs(char *dst, size_t dst_size,
 	int rc = 0;
 	unsigned int ea_name_len = ea_name ? strlen(ea_name) : 0;
 	char *name, *value;
+	size_t buf_size = dst_size;
 	size_t name_len, value_len, user_name_len;
 
 	while (src_size > 0) {
@@ -782,9 +783,10 @@ move_smb2_ea_to_cifs(char *dst, size_t dst_size,
 			/* 'user.' plus a terminating null */
 			user_name_len = 5 + 1 + name_len;
 
-			rc += user_name_len;
-
-			if (dst_size >= user_name_len) {
+			if (buf_size == 0) {
+				/* skip copy - calc size only */
+				rc += user_name_len;
+			} else if (dst_size >= user_name_len) {
 				dst_size -= user_name_len;
 				memcpy(dst, "user.", 5);
 				dst += 5;
@@ -792,8 +794,7 @@ move_smb2_ea_to_cifs(char *dst, size_t dst_size,
 				dst += name_len;
 				*dst = 0;
 				++dst;
-			} else if (dst_size == 0) {
-				/* skip copy - calc size only */
+				rc += user_name_len;
 			} else {
 				/* stop before overrun buffer */
 				rc = -ERANGE;
-- 
GitLab


From 413d61008131c8ea22312164ba498075ce2fccc8 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 26 Oct 2018 19:07:21 +0100
Subject: [PATCH 0490/1890] cifs: fix spelling mistake, EACCESS -> EACCES

Trivial fix to a spelling mistake of the error access name EACCESS,
rename to EACCES

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/file.c  | 2 +-
 fs/cifs/inode.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index e262a05a98bfc..542d8828e1d01 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1005,7 +1005,7 @@ cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
  * Set the byte-range lock (mandatory style). Returns:
  * 1) 0, if we set the lock and don't need to request to the server;
  * 2) 1, if no locks prevent us but we need to request to the server;
- * 3) -EACCESS, if there is a lock that prevents us and wait is false.
+ * 3) -EACCES, if there is a lock that prevents us and wait is false.
  */
 static int
 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 1023d78673fb1..a81a9df997c1c 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1320,8 +1320,8 @@ cifs_drop_nlink(struct inode *inode)
 /*
  * If d_inode(dentry) is null (usually meaning the cached dentry
  * is a negative dentry) then we would attempt a standard SMB delete, but
- * if that fails we can not attempt the fall back mechanisms on EACCESS
- * but will return the EACCESS to the caller. Note that the VFS does not call
+ * if that fails we can not attempt the fall back mechanisms on EACCES
+ * but will return the EACCES to the caller. Note that the VFS does not call
  * unlink on negative dentries currently.
  */
 int cifs_unlink(struct inode *dir, struct dentry *dentry)
-- 
GitLab


From f8af49dd1702118e9520f946ce1cf591d553608f Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Sun, 28 Oct 2018 00:47:11 -0500
Subject: [PATCH 0491/1890] smb3: add trace point for tree connection

In debugging certain scenarios, especially reconnect cases,
it can be helpful to have a dynamic trace point for the
result of tree connect.  See sample output below
from a reconnect event. The new event is 'smb3_tcon'

            TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION
               | |       |   ||||       |         |
           cifsd-6071  [001] ....  2659.897923: smb3_reconnect: server=localhost current_mid=0xa
     kworker/1:1-71    [001] ....  2666.026342: smb3_cmd_done: 	sid=0x0 tid=0x0 cmd=0 mid=0
     kworker/1:1-71    [001] ....  2666.026576: smb3_cmd_err: 	sid=0xc49e1787 tid=0x0 cmd=1 mid=1 status=0xc0000016 rc=-5
     kworker/1:1-71    [001] ....  2666.031677: smb3_cmd_done: 	sid=0xc49e1787 tid=0x0 cmd=1 mid=2
     kworker/1:1-71    [001] ....  2666.031921: smb3_cmd_done: 	sid=0xc49e1787 tid=0x6e78f05f cmd=3 mid=3
     kworker/1:1-71    [001] ....  2666.031923: smb3_tcon: xid=0 sid=0xc49e1787 tid=0x0 unc_name=\\localhost\test rc=0
     kworker/1:1-71    [001] ....  2666.032097: smb3_cmd_done: 	sid=0xc49e1787 tid=0x6e78f05f cmd=11 mid=4
     kworker/1:1-71    [001] ....  2666.032265: smb3_cmd_done: 	sid=0xc49e1787 tid=0x7912332f cmd=3 mid=5
     kworker/1:1-71    [001] ....  2666.032266: smb3_tcon: xid=0 sid=0xc49e1787 tid=0x0 unc_name=\\localhost\IPC$ rc=0
     kworker/1:1-71    [001] ....  2666.032386: smb3_cmd_done: 	sid=0xc49e1787 tid=0x7912332f cmd=11 mid=6

Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
---
 fs/cifs/smb2pdu.c |  3 ++-
 fs/cifs/trace.h   | 42 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 7d7b016fe8bb0..a92d3840db9d5 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1512,7 +1512,7 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
 	rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags, &rsp_iov);
 	cifs_small_buf_release(req);
 	rsp = (struct smb2_tree_connect_rsp *)rsp_iov.iov_base;
-
+	trace_smb3_tcon(xid, tcon->tid, ses->Suid, tree, rc);
 	if (rc != 0) {
 		if (tcon) {
 			cifs_stats_fail_inc(tcon, SMB2_TREE_CONNECT_HE);
@@ -1559,6 +1559,7 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
 	if (tcon->ses->server->ops->validate_negotiate)
 		rc = tcon->ses->server->ops->validate_negotiate(xid, tcon);
 tcon_exit:
+
 	free_rsp_buf(resp_buftype, rsp);
 	kfree(unc_path);
 	return rc;
diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h
index cce8414fe7ec2..fb049809555fe 100644
--- a/fs/cifs/trace.h
+++ b/fs/cifs/trace.h
@@ -373,6 +373,48 @@ DEFINE_EVENT(smb3_enter_exit_class, smb3_##name,  \
 DEFINE_SMB3_ENTER_EXIT_EVENT(enter);
 DEFINE_SMB3_ENTER_EXIT_EVENT(exit_done);
 
+/*
+ * For SMB2/SMB3 tree connect
+ */
+
+DECLARE_EVENT_CLASS(smb3_tcon_class,
+	TP_PROTO(unsigned int xid,
+		__u32	tid,
+		__u64	sesid,
+		const char *unc_name,
+		int	rc),
+	TP_ARGS(xid, tid, sesid, unc_name, rc),
+	TP_STRUCT__entry(
+		__field(unsigned int, xid)
+		__field(__u32, tid)
+		__field(__u64, sesid)
+		__field(const char *,  unc_name)
+		__field(int, rc)
+	),
+	TP_fast_assign(
+		__entry->xid = xid;
+		__entry->tid = tid;
+		__entry->sesid = sesid;
+		__entry->unc_name = unc_name;
+		__entry->rc = rc;
+	),
+	TP_printk("xid=%u sid=0x%llx tid=0x%x unc_name=%s rc=%d",
+		__entry->xid, __entry->sesid, __entry->tid,
+		__entry->unc_name, __entry->rc)
+)
+
+#define DEFINE_SMB3_TCON_EVENT(name)          \
+DEFINE_EVENT(smb3_tcon_class, smb3_##name,    \
+	TP_PROTO(unsigned int xid,		\
+		__u32	tid,			\
+		__u64	sesid,			\
+		const char *unc_name,		\
+		int	rc),			\
+	TP_ARGS(xid, tid, sesid, unc_name, rc))
+
+DEFINE_SMB3_TCON_EVENT(tcon);
+
+
 /*
  * For smb2/smb3 open call
  */
-- 
GitLab


From 926674de6705f0f1dbf29a62fd758d0977f535d6 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Sun, 28 Oct 2018 13:13:23 -0500
Subject: [PATCH 0492/1890] smb3: on kerberos mount if server doesn't specify
 auth type use krb5

Some servers (e.g. Azure) do not include a spnego blob in the SMB3
negotiate protocol response, so on kerberos mounts ("sec=krb5")
we can fail, as we expected the server to list its supported
auth types (OIDs in the spnego blob in the negprot response).
Change this so that on krb5 mounts we default to trying krb5 if the
server doesn't list its supported protocol mechanisms.

Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
CC: Stable <stable@vger.kernel.org>
---
 fs/cifs/cifs_spnego.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index b611fc2e8984e..7f01c6e607918 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -147,8 +147,10 @@ cifs_get_spnego_key(struct cifs_ses *sesInfo)
 		sprintf(dp, ";sec=krb5");
 	else if (server->sec_mskerberos)
 		sprintf(dp, ";sec=mskrb5");
-	else
-		goto out;
+	else {
+		cifs_dbg(VFS, "unknown or missing server auth type, use krb5\n");
+		sprintf(dp, ";sec=krb5");
+	}
 
 	dp = description + strlen(description);
 	sprintf(dp, ";uid=0x%x",
-- 
GitLab


From dfe33f9abc08997e56f9bdf14fe9ac7ac0e14075 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Tue, 30 Oct 2018 19:50:31 -0500
Subject: [PATCH 0493/1890] smb3: allow more detailed protocol info on open
 files for debugging

In order to debug complex problems it is often helpful to
have detailed information on the client and server view
of the open file information.  Add the ability for root to
view the list of smb3 open files and dump the persistent
handle and other info so that it can be more easily
correlated with server logs.

Sample output from "cat /proc/fs/cifs/open_files"

 # Version:1
 # Format:
 # <tree id> <persistent fid> <flags> <count> <pid> <uid> <filename> <mid>
 0x5 0x800000378 0x8000 1 7704 0 some-file 0x14
 0xcb903c0c 0x84412e67 0x8000 1 7754 1001 rofile 0x1a6d
 0xcb903c0c 0x9526b767 0x8000 1 7720 1000 file 0x1a5b
 0xcb903c0c 0x9ce41a21 0x8000 1 7715 0 smallfile 0xd67

Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
---
 fs/cifs/cifs_debug.c | 56 ++++++++++++++++++++++++++++++++++++++++++++
 fs/cifs/cifsglob.h   |  3 +++
 fs/cifs/smb2ops.c    |  3 +++
 fs/cifs/smb2pdu.c    |  3 +++
 4 files changed, 65 insertions(+)

diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 3e812428ac8d9..ba178b09de0b4 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -145,6 +145,58 @@ cifs_dump_iface(struct seq_file *m, struct cifs_server_iface *iface)
 		seq_printf(m, "\t\tIPv6: %pI6\n", &ipv6->sin6_addr);
 }
 
+static int cifs_debug_files_proc_show(struct seq_file *m, void *v)
+{
+	struct list_head *stmp, *tmp, *tmp1, *tmp2;
+	struct TCP_Server_Info *server;
+	struct cifs_ses *ses;
+	struct cifs_tcon *tcon;
+	struct cifsFileInfo *cfile;
+
+	seq_puts(m, "# Version:1\n");
+	seq_puts(m, "# Format:\n");
+	seq_puts(m, "# <tree id> <persistent fid> <flags> <count> <pid> <uid>");
+#ifdef CONFIG_CIFS_DEBUG2
+	seq_printf(m, " <filename> <mid>\n");
+#else
+	seq_printf(m, " <filename>\n");
+#endif /* CIFS_DEBUG2 */
+	spin_lock(&cifs_tcp_ses_lock);
+	list_for_each(stmp, &cifs_tcp_ses_list) {
+		server = list_entry(stmp, struct TCP_Server_Info,
+				    tcp_ses_list);
+		list_for_each(tmp, &server->smb_ses_list) {
+			ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
+			list_for_each(tmp1, &ses->tcon_list) {
+				tcon = list_entry(tmp1, struct cifs_tcon, tcon_list);
+				spin_lock(&tcon->open_file_lock);
+				list_for_each(tmp2, &tcon->openFileList) {
+					cfile = list_entry(tmp2, struct cifsFileInfo,
+						     tlist);
+					seq_printf(m,
+						"0x%x 0x%llx 0x%x %d %d %d %s",
+						tcon->tid,
+						cfile->fid.persistent_fid,
+						cfile->f_flags,
+						cfile->count,
+						cfile->pid,
+						from_kuid(&init_user_ns, cfile->uid),
+						cfile->dentry->d_name.name);
+#ifdef CONFIG_CIFS_DEBUG2
+					seq_printf(m, " 0x%llx\n", cfile->fid.mid);
+#else
+					seq_printf(m, "\n");
+#endif /* CIFS_DEBUG2 */
+				}
+				spin_unlock(&tcon->open_file_lock);
+			}
+		}
+	}
+	spin_unlock(&cifs_tcp_ses_lock);
+	seq_putc(m, '\n');
+	return 0;
+}
+
 static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
 {
 	struct list_head *tmp1, *tmp2, *tmp3;
@@ -565,6 +617,9 @@ cifs_proc_init(void)
 	proc_create_single("DebugData", 0, proc_fs_cifs,
 			cifs_debug_data_proc_show);
 
+	proc_create_single("open_files", 0400, proc_fs_cifs,
+			cifs_debug_files_proc_show);
+
 	proc_create("Stats", 0644, proc_fs_cifs, &cifs_stats_proc_fops);
 	proc_create("cifsFYI", 0644, proc_fs_cifs, &cifsFYI_proc_fops);
 	proc_create("traceSMB", 0644, proc_fs_cifs, &traceSMB_proc_fops);
@@ -601,6 +656,7 @@ cifs_proc_clean(void)
 		return;
 
 	remove_proc_entry("DebugData", proc_fs_cifs);
+	remove_proc_entry("open_files", proc_fs_cifs);
 	remove_proc_entry("cifsFYI", proc_fs_cifs);
 	remove_proc_entry("traceSMB", proc_fs_cifs);
 	remove_proc_entry("Stats", proc_fs_cifs);
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index ed1e0fcb69e3f..d7c0443d47a40 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1125,6 +1125,9 @@ struct cifs_fid {
 	__u8 create_guid[16];
 	struct cifs_pending_open *pending_open;
 	unsigned int epoch;
+#ifdef CONFIG_CIFS_DEBUG2
+	__u64 mid;
+#endif /* CIFS_DEBUG2 */
 	bool purge_cache;
 };
 
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 8489b5f706c3e..225fec1cfa673 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -1079,6 +1079,9 @@ smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock)
 
 	cfile->fid.persistent_fid = fid->persistent_fid;
 	cfile->fid.volatile_fid = fid->volatile_fid;
+#ifdef CONFIG_CIFS_DEBUG2
+	cfile->fid.mid = fid->mid;
+#endif /* CIFS_DEBUG2 */
 	server->ops->set_oplock_level(cinode, oplock, fid->epoch,
 				      &fid->purge_cache);
 	cinode->can_cache_brlcks = CIFS_CACHE_WRITE(cinode);
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index a92d3840db9d5..27f86537a5d11 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -2309,6 +2309,9 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
 	atomic_inc(&tcon->num_remote_opens);
 	oparms->fid->persistent_fid = rsp->PersistentFileId;
 	oparms->fid->volatile_fid = rsp->VolatileFileId;
+#ifdef CONFIG_CIFS_DEBUG2
+	oparms->fid->mid = le64_to_cpu(rsp->sync_hdr.MessageId);
+#endif /* CIFS_DEBUG2 */
 
 	if (buf) {
 		memcpy(buf, &rsp->CreationTime, 32);
-- 
GitLab


From 0df444a00f32a3ab4d37c3c101bb960ee38a9617 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Wed, 31 Oct 2018 11:24:33 -0500
Subject: [PATCH 0494/1890] smb3: missing defines and structs for reparse point
 handling

We were missing some structs from MS-FSCC relating to
reparse point handling.  Add them to protocol defines
in smb2pdu.h

Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
---
 fs/cifs/cifspdu.h |  3 +++
 fs/cifs/smb2pdu.h | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+)

diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 1ce733f3582f6..79d842e7240c7 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -1539,6 +1539,9 @@ struct reparse_symlink_data {
 	char	PathBuffer[0];
 } __attribute__((packed));
 
+/* Flag above */
+#define SYMLINK_FLAG_RELATIVE 0x00000001
+
 /* For IO_REPARSE_TAG_NFS */
 #define NFS_SPECFILE_LNK	0x00000000014B4E4C
 #define NFS_SPECFILE_CHR	0x0000000000524843
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index f753f424d7f11..5671d5ee7f58f 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -842,6 +842,41 @@ struct fsctl_get_integrity_information_rsp {
 /* Integrity flags for above */
 #define FSCTL_INTEGRITY_FLAG_CHECKSUM_ENFORCEMENT_OFF	0x00000001
 
+/* Reparse structures - see MS-FSCC 2.1.2 */
+
+/* struct fsctl_reparse_info_req is empty, only response structs (see below) */
+
+struct reparse_data_buffer {
+	__le32	ReparseTag;
+	__le16	ReparseDataLength;
+	__u16	Reserved;
+	__u8	DataBuffer[0]; /* Variable Length */
+} __packed;
+
+struct reparse_guid_data_buffer {
+	__le32	ReparseTag;
+	__le16	ReparseDataLength;
+	__u16	Reserved;
+	__u8	ReparseGuid[16];
+	__u8	DataBuffer[0]; /* Variable Length */
+} __packed;
+
+struct reparse_mount_point_data_buffer {
+	__le32	ReparseTag;
+	__le16	ReparseDataLength;
+	__u16	Reserved;
+	__le16	SubstituteNameOffset;
+	__le16	SubstituteNameLength;
+	__le16	PrintNameOffset;
+	__le16	PrintNameLength;
+	__u8	PathBuffer[0]; /* Variable Length */
+} __packed;
+
+/* See MS-FSCC 2.1.2.4 and cifspdu.h for struct reparse_symlink_data */
+
+/* See MS-FSCC 2.1.2.6 and cifspdu.h for struct reparse_posix_data */
+
+
 /* See MS-DFSC 2.2.2 */
 struct fsctl_get_dfs_referral_req {
 	__le16 MaxReferralLevel;
-- 
GitLab


From 6e6e2b86c29c6fcfa16ad9fdc7ea32027bea5d73 Mon Sep 17 00:00:00 2001
From: Long Li <longli@microsoft.com>
Date: Wed, 31 Oct 2018 22:13:09 +0000
Subject: [PATCH 0495/1890] CIFS: Add support for direct I/O read

With direct I/O read, we transfer the data directly from transport layer to
the user data buffer.

Change in v3: add support for kernel AIO

Change in v4:
Refactor common read code to __cifs_readv for direct and non-direct I/O.
Retry on direct I/O failure.

Signed-off-by: Long Li <longli@microsoft.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/cifsfs.h   |   1 +
 fs/cifs/cifsglob.h |   5 +
 fs/cifs/file.c     | 225 +++++++++++++++++++++++++++++++++++++--------
 3 files changed, 192 insertions(+), 39 deletions(-)

diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 24e265a518746..3abea1a3f20c7 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -101,6 +101,7 @@ extern int cifs_open(struct inode *inode, struct file *file);
 extern int cifs_close(struct inode *inode, struct file *file);
 extern int cifs_closedir(struct inode *inode, struct file *file);
 extern ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to);
+extern ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to);
 extern ssize_t cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to);
 extern ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from);
 extern ssize_t cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from);
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index d7c0443d47a40..38ab0fca49e1d 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1186,6 +1186,11 @@ struct cifs_aio_ctx {
 	unsigned int		len;
 	unsigned int		total_len;
 	bool			should_dirty;
+	/*
+	 * Indicates if this aio_ctx is for direct_io,
+	 * If yes, iter is a copy of the user passed iov_iter
+	 */
+	bool			direct_io;
 };
 
 struct cifs_readdata;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 542d8828e1d01..1be36076e9604 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2979,7 +2979,6 @@ cifs_uncached_readdata_release(struct kref *refcount)
 	kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
 	for (i = 0; i < rdata->nr_pages; i++) {
 		put_page(rdata->pages[i]);
-		rdata->pages[i] = NULL;
 	}
 	cifs_readdata_release(refcount);
 }
@@ -3106,6 +3105,67 @@ cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
 	return uncached_fill_pages(server, rdata, iter, iter->count);
 }
 
+static int cifs_resend_rdata(struct cifs_readdata *rdata,
+			struct list_head *rdata_list,
+			struct cifs_aio_ctx *ctx)
+{
+	int wait_retry = 0;
+	unsigned int rsize, credits;
+	int rc;
+	struct TCP_Server_Info *server =
+		tlink_tcon(rdata->cfile->tlink)->ses->server;
+
+	/*
+	 * Try to resend this rdata, waiting for credits up to 3 seconds.
+	 * Note: we are attempting to resend the whole rdata not in segments
+	 */
+	do {
+		rc = server->ops->wait_mtu_credits(server, rdata->bytes,
+						&rsize, &credits);
+
+		if (rc)
+			break;
+
+		if (rsize < rdata->bytes) {
+			add_credits_and_wake_if(server, credits, 0);
+			msleep(1000);
+			wait_retry++;
+		}
+	} while (rsize < rdata->bytes && wait_retry < 3);
+
+	/*
+	 * If we can't find enough credits to send this rdata
+	 * release the rdata and return failure, this will pass
+	 * whatever I/O amount we have finished to VFS.
+	 */
+	if (rsize < rdata->bytes) {
+		rc = -EBUSY;
+		goto out;
+	}
+
+	rc = -EAGAIN;
+	while (rc == -EAGAIN) {
+		rc = 0;
+		if (rdata->cfile->invalidHandle)
+			rc = cifs_reopen_file(rdata->cfile, true);
+		if (!rc)
+			rc = server->ops->async_readv(rdata);
+	}
+
+	if (!rc) {
+		/* Add to aio pending list */
+		list_add_tail(&rdata->list, rdata_list);
+		return 0;
+	}
+
+	add_credits_and_wake_if(server, rdata->credits, 0);
+out:
+	kref_put(&rdata->refcount,
+		cifs_uncached_readdata_release);
+
+	return rc;
+}
+
 static int
 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
 		     struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
@@ -3117,6 +3177,9 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
 	int rc;
 	pid_t pid;
 	struct TCP_Server_Info *server;
+	struct page **pagevec;
+	size_t start;
+	struct iov_iter direct_iov = ctx->iter;
 
 	server = tlink_tcon(open_file->tlink)->ses->server;
 
@@ -3125,6 +3188,9 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
 	else
 		pid = current->tgid;
 
+	if (ctx->direct_io)
+		iov_iter_advance(&direct_iov, offset - ctx->pos);
+
 	do {
 		rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
 						   &rsize, &credits);
@@ -3132,20 +3198,57 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
 			break;
 
 		cur_len = min_t(const size_t, len, rsize);
-		npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
 
-		/* allocate a readdata struct */
-		rdata = cifs_readdata_alloc(npages,
+		if (ctx->direct_io) {
+
+			cur_len = iov_iter_get_pages_alloc(
+					&direct_iov, &pagevec,
+					cur_len, &start);
+			if (cur_len < 0) {
+				cifs_dbg(VFS,
+					"couldn't get user pages (cur_len=%zd)"
+					" iter type %d"
+					" iov_offset %zd count %zd\n",
+					cur_len, direct_iov.type,
+					direct_iov.iov_offset,
+					direct_iov.count);
+				dump_stack();
+				break;
+			}
+			iov_iter_advance(&direct_iov, cur_len);
+
+			rdata = cifs_readdata_direct_alloc(
+					pagevec, cifs_uncached_readv_complete);
+			if (!rdata) {
+				add_credits_and_wake_if(server, credits, 0);
+				rc = -ENOMEM;
+				break;
+			}
+
+			npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
+			rdata->page_offset = start;
+			rdata->tailsz = npages > 1 ?
+				cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
+				cur_len;
+
+		} else {
+
+			npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
+			/* allocate a readdata struct */
+			rdata = cifs_readdata_alloc(npages,
 					    cifs_uncached_readv_complete);
-		if (!rdata) {
-			add_credits_and_wake_if(server, credits, 0);
-			rc = -ENOMEM;
-			break;
-		}
+			if (!rdata) {
+				add_credits_and_wake_if(server, credits, 0);
+				rc = -ENOMEM;
+				break;
+			}
 
-		rc = cifs_read_allocate_pages(rdata, npages);
-		if (rc)
-			goto error;
+			rc = cifs_read_allocate_pages(rdata, npages);
+			if (rc)
+				goto error;
+
+			rdata->tailsz = PAGE_SIZE;
+		}
 
 		rdata->cfile = cifsFileInfo_get(open_file);
 		rdata->nr_pages = npages;
@@ -3153,7 +3256,6 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
 		rdata->bytes = cur_len;
 		rdata->pid = pid;
 		rdata->pagesz = PAGE_SIZE;
-		rdata->tailsz = PAGE_SIZE;
 		rdata->read_into_pages = cifs_uncached_read_into_pages;
 		rdata->copy_into_pages = cifs_uncached_copy_into_pages;
 		rdata->credits = credits;
@@ -3167,9 +3269,11 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
 		if (rc) {
 			add_credits_and_wake_if(server, rdata->credits, 0);
 			kref_put(&rdata->refcount,
-				 cifs_uncached_readdata_release);
-			if (rc == -EAGAIN)
+				cifs_uncached_readdata_release);
+			if (rc == -EAGAIN) {
+				iov_iter_revert(&direct_iov, cur_len);
 				continue;
+			}
 			break;
 		}
 
@@ -3225,45 +3329,62 @@ collect_uncached_read_data(struct cifs_aio_ctx *ctx)
 				 * reading.
 				 */
 				if (got_bytes && got_bytes < rdata->bytes) {
-					rc = cifs_readdata_to_iov(rdata, to);
+					rc = 0;
+					if (!ctx->direct_io)
+						rc = cifs_readdata_to_iov(rdata, to);
 					if (rc) {
 						kref_put(&rdata->refcount,
-						cifs_uncached_readdata_release);
+							cifs_uncached_readdata_release);
 						continue;
 					}
 				}
 
-				rc = cifs_send_async_read(
+				if (ctx->direct_io) {
+					/*
+					 * Re-use rdata as this is a
+					 * direct I/O
+					 */
+					rc = cifs_resend_rdata(
+						rdata,
+						&tmp_list, ctx);
+				} else {
+					rc = cifs_send_async_read(
 						rdata->offset + got_bytes,
 						rdata->bytes - got_bytes,
 						rdata->cfile, cifs_sb,
 						&tmp_list, ctx);
 
+					kref_put(&rdata->refcount,
+						cifs_uncached_readdata_release);
+				}
+
 				list_splice(&tmp_list, &ctx->list);
 
-				kref_put(&rdata->refcount,
-					 cifs_uncached_readdata_release);
 				goto again;
 			} else if (rdata->result)
 				rc = rdata->result;
-			else
+			else if (!ctx->direct_io)
 				rc = cifs_readdata_to_iov(rdata, to);
 
 			/* if there was a short read -- discard anything left */
 			if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
 				rc = -ENODATA;
+
+			ctx->total_len += rdata->got_bytes;
 		}
 		list_del_init(&rdata->list);
 		kref_put(&rdata->refcount, cifs_uncached_readdata_release);
 	}
 
-	for (i = 0; i < ctx->npages; i++) {
-		if (ctx->should_dirty)
-			set_page_dirty(ctx->bv[i].bv_page);
-		put_page(ctx->bv[i].bv_page);
-	}
+	if (!ctx->direct_io) {
+		for (i = 0; i < ctx->npages; i++) {
+			if (ctx->should_dirty)
+				set_page_dirty(ctx->bv[i].bv_page);
+			put_page(ctx->bv[i].bv_page);
+		}
 
-	ctx->total_len = ctx->len - iov_iter_count(to);
+		ctx->total_len = ctx->len - iov_iter_count(to);
+	}
 
 	cifs_stats_bytes_read(tcon, ctx->total_len);
 
@@ -3281,18 +3402,28 @@ collect_uncached_read_data(struct cifs_aio_ctx *ctx)
 		complete(&ctx->done);
 }
 
-ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
+static ssize_t __cifs_readv(
+	struct kiocb *iocb, struct iov_iter *to, bool direct)
 {
-	struct file *file = iocb->ki_filp;
-	ssize_t rc;
 	size_t len;
-	ssize_t total_read = 0;
-	loff_t offset = iocb->ki_pos;
+	struct file *file = iocb->ki_filp;
 	struct cifs_sb_info *cifs_sb;
-	struct cifs_tcon *tcon;
 	struct cifsFileInfo *cfile;
+	struct cifs_tcon *tcon;
+	ssize_t rc, total_read = 0;
+	loff_t offset = iocb->ki_pos;
 	struct cifs_aio_ctx *ctx;
 
+	/*
+	 * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
+	 * fall back to data copy read path
+	 * this could be improved by getting pages directly in ITER_KVEC
+	 */
+	if (direct && to->type & ITER_KVEC) {
+		cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
+		direct = false;
+	}
+
 	len = iov_iter_count(to);
 	if (!len)
 		return 0;
@@ -3319,14 +3450,20 @@ ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
 	if (iter_is_iovec(to))
 		ctx->should_dirty = true;
 
-	rc = setup_aio_ctx_iter(ctx, to, READ);
-	if (rc) {
-		kref_put(&ctx->refcount, cifs_aio_ctx_release);
-		return rc;
+	if (direct) {
+		ctx->pos = offset;
+		ctx->direct_io = true;
+		ctx->iter = *to;
+		ctx->len = len;
+	} else {
+		rc = setup_aio_ctx_iter(ctx, to, READ);
+		if (rc) {
+			kref_put(&ctx->refcount, cifs_aio_ctx_release);
+			return rc;
+		}
+		len = ctx->len;
 	}
 
-	len = ctx->len;
-
 	/* grab a lock here due to read response handlers can access ctx */
 	mutex_lock(&ctx->aio_mutex);
 
@@ -3368,6 +3505,16 @@ ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
 	return rc;
 }
 
+ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
+{
+	return __cifs_readv(iocb, to, true);
+}
+
+ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
+{
+	return __cifs_readv(iocb, to, false);
+}
+
 ssize_t
 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
 {
-- 
GitLab


From 8c5f9c1ab7cb45f6584ce312b14fb310a8fa28f3 Mon Sep 17 00:00:00 2001
From: Long Li <longli@microsoft.com>
Date: Wed, 31 Oct 2018 22:13:10 +0000
Subject: [PATCH 0496/1890] CIFS: Add support for direct I/O write

With direct I/O write, user supplied buffers are pinned to the memory and data
are transferred directly from user buffers to the transport layer.

Change in v3: add support for kernel AIO

Change in v4:
Refactor common write code to __cifs_writev for direct and non-direct I/O.
Retry on direct I/O failure.

Signed-off-by: Long Li <longli@microsoft.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/cifsfs.h |   1 +
 fs/cifs/file.c   | 204 +++++++++++++++++++++++++++++++++++++----------
 2 files changed, 164 insertions(+), 41 deletions(-)

diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 3abea1a3f20c7..4c3b5cfccc49a 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -104,6 +104,7 @@ extern ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to);
 extern ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to);
 extern ssize_t cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to);
 extern ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from);
+extern ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from);
 extern ssize_t cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from);
 extern int cifs_lock(struct file *, int, struct file_lock *);
 extern int cifs_fsync(struct file *, loff_t, loff_t, int);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 1be36076e9604..937ffa79066b7 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2537,6 +2537,61 @@ wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
 	return 0;
 }
 
+static int
+cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
+	struct cifs_aio_ctx *ctx)
+{
+	int wait_retry = 0;
+	unsigned int wsize, credits;
+	int rc;
+	struct TCP_Server_Info *server =
+		tlink_tcon(wdata->cfile->tlink)->ses->server;
+
+	/*
+	 * Try to resend this wdata, waiting for credits up to 3 seconds.
+	 * Note: we are attempting to resend the whole wdata not in segments
+	 */
+	do {
+		rc = server->ops->wait_mtu_credits(
+			server, wdata->bytes, &wsize, &credits);
+
+		if (rc)
+			break;
+
+		if (wsize < wdata->bytes) {
+			add_credits_and_wake_if(server, credits, 0);
+			msleep(1000);
+			wait_retry++;
+		}
+	} while (wsize < wdata->bytes && wait_retry < 3);
+
+	if (wsize < wdata->bytes) {
+		rc = -EBUSY;
+		goto out;
+	}
+
+	rc = -EAGAIN;
+	while (rc == -EAGAIN) {
+		rc = 0;
+		if (wdata->cfile->invalidHandle)
+			rc = cifs_reopen_file(wdata->cfile, false);
+		if (!rc)
+			rc = server->ops->async_writev(wdata,
+					cifs_uncached_writedata_release);
+	}
+
+	if (!rc) {
+		list_add_tail(&wdata->list, wdata_list);
+		return 0;
+	}
+
+	add_credits_and_wake_if(server, wdata->credits, 0);
+out:
+	kref_put(&wdata->refcount, cifs_uncached_writedata_release);
+
+	return rc;
+}
+
 static int
 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
 		     struct cifsFileInfo *open_file,
@@ -2551,6 +2606,8 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
 	loff_t saved_offset = offset;
 	pid_t pid;
 	struct TCP_Server_Info *server;
+	struct page **pagevec;
+	size_t start;
 
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
 		pid = open_file->pid;
@@ -2567,38 +2624,76 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
 		if (rc)
 			break;
 
-		nr_pages = get_numpages(wsize, len, &cur_len);
-		wdata = cifs_writedata_alloc(nr_pages,
+		if (ctx->direct_io) {
+			cur_len = iov_iter_get_pages_alloc(
+				from, &pagevec, wsize, &start);
+			if (cur_len < 0) {
+				cifs_dbg(VFS,
+					"direct_writev couldn't get user pages "
+					"(rc=%zd) iter type %d iov_offset %zd "
+					"count %zd\n",
+					cur_len, from->type,
+					from->iov_offset, from->count);
+				dump_stack();
+				break;
+			}
+			iov_iter_advance(from, cur_len);
+
+			nr_pages =
+				(cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
+
+			wdata = cifs_writedata_direct_alloc(pagevec,
 					     cifs_uncached_writev_complete);
-		if (!wdata) {
-			rc = -ENOMEM;
-			add_credits_and_wake_if(server, credits, 0);
-			break;
-		}
+			if (!wdata) {
+				rc = -ENOMEM;
+				add_credits_and_wake_if(server, credits, 0);
+				break;
+			}
 
-		rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
-		if (rc) {
-			kfree(wdata);
-			add_credits_and_wake_if(server, credits, 0);
-			break;
-		}
 
-		num_pages = nr_pages;
-		rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages);
-		if (rc) {
-			for (i = 0; i < nr_pages; i++)
-				put_page(wdata->pages[i]);
-			kfree(wdata);
-			add_credits_and_wake_if(server, credits, 0);
-			break;
-		}
+			wdata->page_offset = start;
+			wdata->tailsz =
+				nr_pages > 1 ?
+					cur_len - (PAGE_SIZE - start) -
+					(nr_pages - 2) * PAGE_SIZE :
+					cur_len;
+		} else {
+			nr_pages = get_numpages(wsize, len, &cur_len);
+			wdata = cifs_writedata_alloc(nr_pages,
+					     cifs_uncached_writev_complete);
+			if (!wdata) {
+				rc = -ENOMEM;
+				add_credits_and_wake_if(server, credits, 0);
+				break;
+			}
 
-		/*
-		 * Bring nr_pages down to the number of pages we actually used,
-		 * and free any pages that we didn't use.
-		 */
-		for ( ; nr_pages > num_pages; nr_pages--)
-			put_page(wdata->pages[nr_pages - 1]);
+			rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
+			if (rc) {
+				kfree(wdata);
+				add_credits_and_wake_if(server, credits, 0);
+				break;
+			}
+
+			num_pages = nr_pages;
+			rc = wdata_fill_from_iovec(
+				wdata, from, &cur_len, &num_pages);
+			if (rc) {
+				for (i = 0; i < nr_pages; i++)
+					put_page(wdata->pages[i]);
+				kfree(wdata);
+				add_credits_and_wake_if(server, credits, 0);
+				break;
+			}
+
+			/*
+			 * Bring nr_pages down to the number of pages we
+			 * actually used, and free any pages that we didn't use.
+			 */
+			for ( ; nr_pages > num_pages; nr_pages--)
+				put_page(wdata->pages[nr_pages - 1]);
+
+			wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
+		}
 
 		wdata->sync_mode = WB_SYNC_ALL;
 		wdata->nr_pages = nr_pages;
@@ -2607,7 +2702,6 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
 		wdata->pid = pid;
 		wdata->bytes = cur_len;
 		wdata->pagesz = PAGE_SIZE;
-		wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
 		wdata->credits = credits;
 		wdata->ctx = ctx;
 		kref_get(&ctx->refcount);
@@ -2682,13 +2776,18 @@ static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
 				INIT_LIST_HEAD(&tmp_list);
 				list_del_init(&wdata->list);
 
-				iov_iter_advance(&tmp_from,
+				if (ctx->direct_io)
+					rc = cifs_resend_wdata(
+						wdata, &tmp_list, ctx);
+				else {
+					iov_iter_advance(&tmp_from,
 						 wdata->offset - ctx->pos);
 
-				rc = cifs_write_from_iter(wdata->offset,
+					rc = cifs_write_from_iter(wdata->offset,
 						wdata->bytes, &tmp_from,
 						ctx->cfile, cifs_sb, &tmp_list,
 						ctx);
+				}
 
 				list_splice(&tmp_list, &ctx->list);
 
@@ -2701,8 +2800,9 @@ static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
 		kref_put(&wdata->refcount, cifs_uncached_writedata_release);
 	}
 
-	for (i = 0; i < ctx->npages; i++)
-		put_page(ctx->bv[i].bv_page);
+	if (!ctx->direct_io)
+		for (i = 0; i < ctx->npages; i++)
+			put_page(ctx->bv[i].bv_page);
 
 	cifs_stats_bytes_written(tcon, ctx->total_len);
 	set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
@@ -2717,7 +2817,8 @@ static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
 		complete(&ctx->done);
 }
 
-ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
+static ssize_t __cifs_writev(
+	struct kiocb *iocb, struct iov_iter *from, bool direct)
 {
 	struct file *file = iocb->ki_filp;
 	ssize_t total_written = 0;
@@ -2726,13 +2827,18 @@ ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
 	struct cifs_sb_info *cifs_sb;
 	struct cifs_aio_ctx *ctx;
 	struct iov_iter saved_from = *from;
+	size_t len = iov_iter_count(from);
 	int rc;
 
 	/*
-	 * BB - optimize the way when signing is disabled. We can drop this
-	 * extra memory-to-memory copying and use iovec buffers for constructing
-	 * write request.
+	 * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
+	 * In this case, fall back to non-direct write function.
+	 * this could be improved by getting pages directly in ITER_KVEC
 	 */
+	if (direct && from->type & ITER_KVEC) {
+		cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
+		direct = false;
+	}
 
 	rc = generic_write_checks(iocb, from);
 	if (rc <= 0)
@@ -2756,10 +2862,16 @@ ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
 
 	ctx->pos = iocb->ki_pos;
 
-	rc = setup_aio_ctx_iter(ctx, from, WRITE);
-	if (rc) {
-		kref_put(&ctx->refcount, cifs_aio_ctx_release);
-		return rc;
+	if (direct) {
+		ctx->direct_io = true;
+		ctx->iter = *from;
+		ctx->len = len;
+	} else {
+		rc = setup_aio_ctx_iter(ctx, from, WRITE);
+		if (rc) {
+			kref_put(&ctx->refcount, cifs_aio_ctx_release);
+			return rc;
+		}
 	}
 
 	/* grab a lock here due to read response handlers can access ctx */
@@ -2809,6 +2921,16 @@ ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
 	return total_written;
 }
 
+ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
+{
+	return __cifs_writev(iocb, from, true);
+}
+
+ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
+{
+	return __cifs_writev(iocb, from, false);
+}
+
 static ssize_t
 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
 {
-- 
GitLab


From be4eb68846dc4450a9a4998d7c16d9f4955f9988 Mon Sep 17 00:00:00 2001
From: Long Li <longli@microsoft.com>
Date: Wed, 31 Oct 2018 22:13:11 +0000
Subject: [PATCH 0497/1890] CIFS: Add direct I/O functions to file_operations

With direct read/write functions implemented, add them to file_operations.

Dircet I/O is used under two conditions:
1. When mounting with "cache=none", CIFS uses direct I/O for all user file
data transfer.
2. When opening a file with O_DIRECT, CIFS uses direct I/O for all data
transfer on this file.

Signed-off-by: Long Li <longli@microsoft.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
---
 fs/cifs/cifsfs.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index b7ac09e38159c..1b21262ec8747 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -1180,9 +1180,8 @@ const struct file_operations cifs_file_strict_ops = {
 };
 
 const struct file_operations cifs_file_direct_ops = {
-	/* BB reevaluate whether they can be done with directio, no cache */
-	.read_iter = cifs_user_readv,
-	.write_iter = cifs_user_writev,
+	.read_iter = cifs_direct_readv,
+	.write_iter = cifs_direct_writev,
 	.open = cifs_open,
 	.release = cifs_close,
 	.lock = cifs_lock,
@@ -1236,9 +1235,8 @@ const struct file_operations cifs_file_strict_nobrl_ops = {
 };
 
 const struct file_operations cifs_file_direct_nobrl_ops = {
-	/* BB reevaluate whether they can be done with directio, no cache */
-	.read_iter = cifs_user_readv,
-	.write_iter = cifs_user_writev,
+	.read_iter = cifs_direct_readv,
+	.write_iter = cifs_direct_writev,
 	.open = cifs_open,
 	.release = cifs_close,
 	.fsync = cifs_fsync,
-- 
GitLab


From 8c6c9bed8773375b1d54ccca2911ec892c59db5d Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 1 Nov 2018 13:14:30 +0000
Subject: [PATCH 0498/1890] cifs: don't dereference smb_file_target before null
 check

There is a null check on dst_file->private data which suggests
it can be potentially null. However, before this check, pointer
smb_file_target is derived from dst_file->private and dereferenced
in the call to tlink_tcon, hence there is a potential null pointer
deference.

Fix this by assigning smb_file_target and target_tcon after the
null pointer sanity checks.

Detected by CoverityScan, CID#1475302 ("Dereference before null check")

Fixes: 04b38d601239 ("vfs: pull btrfs clone API to vfs layer")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/cifsfs.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 1b21262ec8747..865706edb307d 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -999,8 +999,8 @@ static loff_t cifs_remap_file_range(struct file *src_file, loff_t off,
 	struct inode *src_inode = file_inode(src_file);
 	struct inode *target_inode = file_inode(dst_file);
 	struct cifsFileInfo *smb_file_src = src_file->private_data;
-	struct cifsFileInfo *smb_file_target = dst_file->private_data;
-	struct cifs_tcon *target_tcon = tlink_tcon(smb_file_target->tlink);
+	struct cifsFileInfo *smb_file_target;
+	struct cifs_tcon *target_tcon;
 	unsigned int xid;
 	int rc;
 
@@ -1017,6 +1017,9 @@ static loff_t cifs_remap_file_range(struct file *src_file, loff_t off,
 		goto out;
 	}
 
+	smb_file_target = dst_file->private_data;
+	target_tcon = tlink_tcon(smb_file_target->tlink);
+
 	/*
 	 * Note: cifs case is easier than btrfs since server responsible for
 	 * checks for proper open modes and file type and if it wants
-- 
GitLab


From b98e26df07549d4649ac5b8f24c49f5c722bbc7e Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Thu, 1 Nov 2018 10:54:32 -0500
Subject: [PATCH 0499/1890] cifs: fix signed/unsigned mismatch on aio_read
 patch

The patch "CIFS: Add support for direct I/O read" had
a signed/unsigned mismatch (ssize_t vs. size_t) in the
return from one function.  Similar trivial change
in aio_write

Signed-off-by: Long Li <longli@microsoft.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
Reported-by: Julia Lawall <julia.lawall@lip6.fr>
---
 fs/cifs/file.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 937ffa79066b7..74c33d5fafc83 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2625,18 +2625,21 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
 			break;
 
 		if (ctx->direct_io) {
-			cur_len = iov_iter_get_pages_alloc(
+			ssize_t result;
+
+			result = iov_iter_get_pages_alloc(
 				from, &pagevec, wsize, &start);
-			if (cur_len < 0) {
+			if (result < 0) {
 				cifs_dbg(VFS,
 					"direct_writev couldn't get user pages "
 					"(rc=%zd) iter type %d iov_offset %zd "
 					"count %zd\n",
-					cur_len, from->type,
+					result, from->type,
 					from->iov_offset, from->count);
 				dump_stack();
 				break;
 			}
+			cur_len = (size_t)result;
 			iov_iter_advance(from, cur_len);
 
 			nr_pages =
@@ -3322,21 +3325,23 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
 		cur_len = min_t(const size_t, len, rsize);
 
 		if (ctx->direct_io) {
+			ssize_t result;
 
-			cur_len = iov_iter_get_pages_alloc(
+			result = iov_iter_get_pages_alloc(
 					&direct_iov, &pagevec,
 					cur_len, &start);
-			if (cur_len < 0) {
+			if (result < 0) {
 				cifs_dbg(VFS,
 					"couldn't get user pages (cur_len=%zd)"
 					" iter type %d"
 					" iov_offset %zd count %zd\n",
-					cur_len, direct_iov.type,
+					result, direct_iov.type,
 					direct_iov.iov_offset,
 					direct_iov.count);
 				dump_stack();
 				break;
 			}
+			cur_len = (size_t)result;
 			iov_iter_advance(&direct_iov, cur_len);
 
 			rdata = cifs_readdata_direct_alloc(
-- 
GitLab


From df0734702a7cbba49d6765bd5ba069340bf9c5db Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Fri, 2 Nov 2018 10:16:15 -0700
Subject: [PATCH 0500/1890] bpf: show real jited prog address in /proc/kallsyms

Currently, /proc/kallsyms shows page address of jited bpf program. The
main reason here is to not expose randomized start address. However,
This is not ideal for detailed profiling (find hot instructions from
stack traces). This patch replaces the page address with real prog start
address.

This change is OK because these addresses are still protected by sysctl
kptr_restrict (see kallsyms_show_value()), and only programs loaded by
root are added to kallsyms (see bpf_prog_kallsyms_add()).

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/core.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 6377225b20820..1a796e0799ec4 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -553,7 +553,6 @@ bool is_bpf_text_address(unsigned long addr)
 int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
 		    char *sym)
 {
-	unsigned long symbol_start, symbol_end;
 	struct bpf_prog_aux *aux;
 	unsigned int it = 0;
 	int ret = -ERANGE;
@@ -566,10 +565,9 @@ int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
 		if (it++ != symnum)
 			continue;
 
-		bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end);
 		bpf_get_prog_name(aux->prog, sym);
 
-		*value = symbol_start;
+		*value = (unsigned long)aux->prog->bpf_func;
 		*type  = BPF_SYM_ELF_TYPE;
 
 		ret = 0;
-- 
GitLab


From de57e99ceb65d0d7775cc14a8ba5931d7de1d708 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Fri, 2 Nov 2018 10:16:16 -0700
Subject: [PATCH 0501/1890] bpf: show real jited address in
 bpf_prog_info->jited_ksyms

Currently, jited_ksyms in bpf_prog_info shows page addresses of jited
bpf program. The main reason here is to not expose randomized start
address. However, this is not ideal for detailed profiling (find hot
instructions from stack traces). This patch replaces the page address
with real prog start address.

This change is OK because bpf_prog_get_info_by_fd() is only available
to root.

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/syscall.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index ccb93277aae2c..34a9eef5992c2 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2172,7 +2172,6 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 			user_ksyms = u64_to_user_ptr(info.jited_ksyms);
 			for (i = 0; i < ulen; i++) {
 				ksym_addr = (ulong) prog->aux->func[i]->bpf_func;
-				ksym_addr &= PAGE_MASK;
 				if (put_user((u64) ksym_addr, &user_ksyms[i]))
 					return -EFAULT;
 			}
-- 
GitLab


From ff1889fc531f582f902175c0acc80321af540b24 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Fri, 2 Nov 2018 10:16:17 -0700
Subject: [PATCH 0502/1890] bpf: show main program address and length in
 bpf_prog_info

Currently, when there is no subprog (prog->aux->func_cnt == 0),
bpf_prog_info does not return any jited_ksyms or jited_func_lens. This
patch adds main program address (prog->bpf_func) and main program
length (prog->jited_len) to bpf_prog_info.

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/syscall.c | 33 ++++++++++++++++++++++++---------
 1 file changed, 24 insertions(+), 9 deletions(-)

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 34a9eef5992c2..9418174c276c7 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2158,11 +2158,11 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 	}
 
 	ulen = info.nr_jited_ksyms;
-	info.nr_jited_ksyms = prog->aux->func_cnt;
+	info.nr_jited_ksyms = prog->aux->func_cnt ? : 1;
 	if (info.nr_jited_ksyms && ulen) {
 		if (bpf_dump_raw_ok()) {
+			unsigned long ksym_addr;
 			u64 __user *user_ksyms;
-			ulong ksym_addr;
 			u32 i;
 
 			/* copy the address of the kernel symbol
@@ -2170,9 +2170,17 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 			 */
 			ulen = min_t(u32, info.nr_jited_ksyms, ulen);
 			user_ksyms = u64_to_user_ptr(info.jited_ksyms);
-			for (i = 0; i < ulen; i++) {
-				ksym_addr = (ulong) prog->aux->func[i]->bpf_func;
-				if (put_user((u64) ksym_addr, &user_ksyms[i]))
+			if (prog->aux->func_cnt) {
+				for (i = 0; i < ulen; i++) {
+					ksym_addr = (unsigned long)
+						prog->aux->func[i]->bpf_func;
+					if (put_user((u64) ksym_addr,
+						     &user_ksyms[i]))
+						return -EFAULT;
+				}
+			} else {
+				ksym_addr = (unsigned long) prog->bpf_func;
+				if (put_user((u64) ksym_addr, &user_ksyms[0]))
 					return -EFAULT;
 			}
 		} else {
@@ -2181,7 +2189,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 	}
 
 	ulen = info.nr_jited_func_lens;
-	info.nr_jited_func_lens = prog->aux->func_cnt;
+	info.nr_jited_func_lens = prog->aux->func_cnt ? : 1;
 	if (info.nr_jited_func_lens && ulen) {
 		if (bpf_dump_raw_ok()) {
 			u32 __user *user_lens;
@@ -2190,9 +2198,16 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 			/* copy the JITed image lengths for each function */
 			ulen = min_t(u32, info.nr_jited_func_lens, ulen);
 			user_lens = u64_to_user_ptr(info.jited_func_lens);
-			for (i = 0; i < ulen; i++) {
-				func_len = prog->aux->func[i]->jited_len;
-				if (put_user(func_len, &user_lens[i]))
+			if (prog->aux->func_cnt) {
+				for (i = 0; i < ulen; i++) {
+					func_len =
+						prog->aux->func[i]->jited_len;
+					if (put_user(func_len, &user_lens[i]))
+						return -EFAULT;
+				}
+			} else {
+				func_len = prog->jited_len;
+				if (put_user(func_len, &user_lens[0]))
 					return -EFAULT;
 			}
 		} else {
-- 
GitLab


From 28c2fae726bf5003cd209b0d5910a642af98316f Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 2 Nov 2018 11:35:46 +0100
Subject: [PATCH 0503/1890] bpf: fix bpf_prog_get_info_by_fd to return 0
 func_lens for unpriv

While dbecd7388476 ("bpf: get kernel symbol addresses via syscall")
zeroed info.nr_jited_ksyms in bpf_prog_get_info_by_fd() for queries
from unprivileged users, commit 815581c11cc2 ("bpf: get JITed image
lengths of functions via syscall") forgot about doing so and therefore
returns the #elems of the user set up buffer which is incorrect. It
also needs to indicate a info.nr_jited_func_lens of zero.

Fixes: 815581c11cc2 ("bpf: get JITed image lengths of functions via syscall")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Sandipan Das <sandipan@linux.vnet.ibm.com>
Cc: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/syscall.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 9418174c276c7..cf5040fd54344 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2078,6 +2078,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 		info.jited_prog_len = 0;
 		info.xlated_prog_len = 0;
 		info.nr_jited_ksyms = 0;
+		info.nr_jited_func_lens = 0;
 		goto done;
 	}
 
-- 
GitLab


From 2cbfdf4df58330f6cb493500387427dae1c5551d Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 2 Nov 2018 17:16:51 +0000
Subject: [PATCH 0504/1890] iov_iter: Fix 9p virtio breakage

When switching to the new iovec accessors, a negation got subtly
dropped, leading to 9p being remarkably broken (here with kvmtool):

[    7.430941] VFS: Mounted root (9p filesystem) on device 0:15.
[    7.432080] devtmpfs: mounted
[    7.432717] Freeing unused kernel memory: 1344K
[    7.433658] Run /virt/init as init process
  Warning: unable to translate guest address 0x7e00902ff000 to host
  Warning: unable to translate guest address 0x7e00902fefc0 to host
  Warning: unable to translate guest address 0x7e00902ff000 to host
  Warning: unable to translate guest address 0x7e008febef80 to host
  Warning: unable to translate guest address 0x7e008febf000 to host
  Warning: unable to translate guest address 0x7e008febef00 to host
  Warning: unable to translate guest address 0x7e008febf000 to host
[    7.436376] Kernel panic - not syncing: Requested init /virt/init failed (error -8).
[    7.437554] CPU: 29 PID: 1 Comm: swapper/0 Not tainted 4.19.0-rc8-02267-g00e23707442a #291
[    7.439006] Hardware name: linux,dummy-virt (DT)
[    7.439902] Call trace:
[    7.440387]  dump_backtrace+0x0/0x148
[    7.441104]  show_stack+0x14/0x20
[    7.441768]  dump_stack+0x90/0xb4
[    7.442425]  panic+0x120/0x27c
[    7.443036]  kernel_init+0xa4/0x100
[    7.443725]  ret_from_fork+0x10/0x18
[    7.444444] SMP: stopping secondary CPUs
[    7.445391] Kernel Offset: disabled
[    7.446169] CPU features: 0x0,23000438
[    7.446974] Memory Limit: none
[    7.447645] ---[ end Kernel panic - not syncing: Requested init /virt/init failed (error -8). ]---

Restoring the missing "!" brings the guest back to life.

Fixes: 00e23707442a ("iov_iter: Use accessor function")
Reported-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/9p/trans_virtio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 4d7d2070e9c85..ce94e5668ac2c 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -322,7 +322,7 @@ static int p9_get_mapped_pages(struct virtio_chan *chan,
 	if (!iov_iter_count(data))
 		return 0;
 
-	if (iov_iter_is_kvec(data)) {
+	if (!iov_iter_is_kvec(data)) {
 		int n;
 		/*
 		 * We allow only p9_max_pages pinned. We wait for the
-- 
GitLab


From d4f79cb872bd4faa44eb133c1aee5f3411342c54 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 5 Oct 2018 18:15:49 +0200
Subject: [PATCH 0505/1890] ARM: orion: avoid VLA in orion_mpp_conf

Testing randconfig builds found an instance of a VLA that was
missed when determining that we have removed them all:

arch/arm/plat-orion/mpp.c: In function 'orion_mpp_conf':
arch/arm/plat-orion/mpp.c:31:2: error: ISO C90 forbids variable length array 'mpp_ctrl' [-Werror=vla]

This one is fairly straightforward: we know what all three
callers are, and the maximum length is not very long.

Fixes: 68664695ae57 ("Makefile: Globally enable VLA warning")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 arch/arm/plat-orion/mpp.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/arm/plat-orion/mpp.c b/arch/arm/plat-orion/mpp.c
index 5b4ff9373c894..8a6880d528b6f 100644
--- a/arch/arm/plat-orion/mpp.c
+++ b/arch/arm/plat-orion/mpp.c
@@ -28,10 +28,15 @@ void __init orion_mpp_conf(unsigned int *mpp_list, unsigned int variant_mask,
 			   unsigned int mpp_max, void __iomem *dev_bus)
 {
 	unsigned int mpp_nr_regs = (1 + mpp_max/8);
-	u32 mpp_ctrl[mpp_nr_regs];
+	u32 mpp_ctrl[8];
 	int i;
 
 	printk(KERN_DEBUG "initial MPP regs:");
+	if (mpp_nr_regs > ARRAY_SIZE(mpp_ctrl)) {
+		printk(KERN_ERR "orion_mpp_conf: invalid mpp_max\n");
+		return;
+	}
+
 	for (i = 0; i < mpp_nr_regs; i++) {
 		mpp_ctrl[i] = readl(mpp_ctrl_addr(i, dev_bus));
 		printk(" %08x", mpp_ctrl[i]);
-- 
GitLab


From 00a6a845c0b66e7ffb9adb6b3a9ce2ae97a0c172 Mon Sep 17 00:00:00 2001
From: Alexandre Torgue <alexandre.torgue@st.com>
Date: Thu, 20 Sep 2018 18:34:17 +0200
Subject: [PATCH 0506/1890] ARM: dts: stm32: update HASH1 dmas property on
 stm32mp157c

Remove unused parameter from HASH1 dmas property on stm32mp157c SoC.

Fixes: 1e726a40e067 ("ARM: dts: stm32: Add HASH support on stm32mp157c")
Signed-off-by: Alexandre Torgue <alexandre.torgue@st.com>
[Olof: Bug doesn't cause any harm, so shouldn't need stable backport]
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 arch/arm/boot/dts/stm32mp157c.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/stm32mp157c.dtsi b/arch/arm/boot/dts/stm32mp157c.dtsi
index c50c36baba758..8bf1c17f8cefb 100644
--- a/arch/arm/boot/dts/stm32mp157c.dtsi
+++ b/arch/arm/boot/dts/stm32mp157c.dtsi
@@ -923,7 +923,7 @@ hash1: hash@54002000 {
 			interrupts = <GIC_SPI 80 IRQ_TYPE_LEVEL_HIGH>;
 			clocks = <&rcc HASH1>;
 			resets = <&rcc HASH1_R>;
-			dmas = <&mdma1 31 0x10 0x1000A02 0x0 0x0 0x0>;
+			dmas = <&mdma1 31 0x10 0x1000A02 0x0 0x0>;
 			dma-names = "in";
 			dma-maxburst = <2>;
 			status = "disabled";
-- 
GitLab


From 49682bfa1e0e448a711471a5db83be0df1fb39a2 Mon Sep 17 00:00:00 2001
From: Mathieu Malaterre <malat@debian.org>
Date: Wed, 31 Oct 2018 13:16:58 +0100
Subject: [PATCH 0507/1890] net: document skb parameter in function
 'skb_gso_size_check'

Remove kernel-doc warning:

  net/core/skbuff.c:4953: warning: Function parameter or member 'skb' not described in 'skb_gso_size_check'

Signed-off-by: Mathieu Malaterre <malat@debian.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 946de0e24c876..b4ee5c8b928f0 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4944,6 +4944,8 @@ static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb)
  *
  * This is a helper to do that correctly considering GSO_BY_FRAGS.
  *
+ * @skb: GSO skb
+ *
  * @seg_len: The segmented length (from skb_gso_*_seglen). In the
  *           GSO_BY_FRAGS case this will be [header sizes + GSO_BY_FRAGS].
  *
-- 
GitLab


From 7b900ead6cc66b2ee873cb042dfba169aa68b56c Mon Sep 17 00:00:00 2001
From: Frieder Schrempf <frieder.schrempf@kontron.de>
Date: Wed, 31 Oct 2018 22:52:19 +0100
Subject: [PATCH 0508/1890] usbnet: smsc95xx: disable carrier check while
 suspending

We need to make sure, that the carrier check polling is disabled
while suspending. Otherwise we can end up with usbnet_read_cmd()
being issued when only usbnet_read_cmd_nopm() is allowed. If this
happens, read operations lock up.

Fixes: d69d169493 ("usbnet: smsc95xx: fix link detection for disabled autonegotiation")
Signed-off-by: Frieder Schrempf <frieder.schrempf@kontron.de>
Reviewed-by: Raghuram Chary J <RaghuramChary.Jallipalli@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/smsc95xx.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index 262e7a3c23cb6..2d17f3b9bb165 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -1598,6 +1598,8 @@ static int smsc95xx_suspend(struct usb_interface *intf, pm_message_t message)
 		return ret;
 	}
 
+	cancel_delayed_work_sync(&pdata->carrier_check);
+
 	if (pdata->suspend_flags) {
 		netdev_warn(dev->net, "error during last resume\n");
 		pdata->suspend_flags = 0;
@@ -1840,6 +1842,11 @@ static int smsc95xx_suspend(struct usb_interface *intf, pm_message_t message)
 	 */
 	if (ret && PMSG_IS_AUTO(message))
 		usbnet_resume(intf);
+
+	if (ret)
+		schedule_delayed_work(&pdata->carrier_check,
+				      CARRIER_CHECK_DELAY);
+
 	return ret;
 }
 
-- 
GitLab


From 2384d02520ff2a916169b2fd85ea50e923ed56c2 Mon Sep 17 00:00:00 2001
From: Jeff Barnhill <0xeffeff@gmail.com>
Date: Fri, 2 Nov 2018 20:23:57 +0000
Subject: [PATCH 0509/1890] net/ipv6: Add anycast addresses to a global
 hashtable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

icmp6_send() function is expensive on systems with a large number of
interfaces. Every time itâ€™s called, it has to verify that the source
address does not correspond to an existing anycast address by looping
through every device and every anycast address on the device.  This can
result in significant delays for a CPU when there are a large number of
neighbors and ND timers are frequently timing out and calling
neigh_invalidate().

Add anycast addresses to a global hashtable to allow quick searching for
matching anycast addresses.  This is based on inet6_addr_lst in addrconf.c.

Signed-off-by: Jeff Barnhill <0xeffeff@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/addrconf.h |  2 ++
 include/net/if_inet6.h |  2 ++
 net/ipv6/af_inet6.c    |  5 +++
 net/ipv6/anycast.c     | 80 +++++++++++++++++++++++++++++++++++++++---
 4 files changed, 85 insertions(+), 4 deletions(-)

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 14b789a123e7d..1656c59784987 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -317,6 +317,8 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
 			 const struct in6_addr *addr);
 bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev,
 			     const struct in6_addr *addr);
+int ipv6_anycast_init(void);
+void ipv6_anycast_cleanup(void);
 
 /* Device notifier */
 int register_inet6addr_notifier(struct notifier_block *nb);
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index d7578cf49c3af..c9c78c15bce04 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -146,10 +146,12 @@ struct ifacaddr6 {
 	struct in6_addr		aca_addr;
 	struct fib6_info	*aca_rt;
 	struct ifacaddr6	*aca_next;
+	struct hlist_node	aca_addr_lst;
 	int			aca_users;
 	refcount_t		aca_refcnt;
 	unsigned long		aca_cstamp;
 	unsigned long		aca_tstamp;
+	struct rcu_head		rcu;
 };
 
 #define	IFA_HOST	IPV6_ADDR_LOOPBACK
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 3f4d61017a694..f0cd291034f0f 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -1001,6 +1001,9 @@ static int __init inet6_init(void)
 	err = ip6_flowlabel_init();
 	if (err)
 		goto ip6_flowlabel_fail;
+	err = ipv6_anycast_init();
+	if (err)
+		goto ipv6_anycast_fail;
 	err = addrconf_init();
 	if (err)
 		goto addrconf_fail;
@@ -1091,6 +1094,8 @@ static int __init inet6_init(void)
 ipv6_exthdrs_fail:
 	addrconf_cleanup();
 addrconf_fail:
+	ipv6_anycast_cleanup();
+ipv6_anycast_fail:
 	ip6_flowlabel_cleanup();
 ip6_flowlabel_fail:
 	ndisc_late_cleanup();
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 4e0ff7031edd5..7698637cf8276 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -44,8 +44,22 @@
 
 #include <net/checksum.h>
 
+#define IN6_ADDR_HSIZE_SHIFT	8
+#define IN6_ADDR_HSIZE		BIT(IN6_ADDR_HSIZE_SHIFT)
+/*	anycast address hash table
+ */
+static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE];
+static DEFINE_SPINLOCK(acaddr_hash_lock);
+
 static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr);
 
+static u32 inet6_acaddr_hash(struct net *net, const struct in6_addr *addr)
+{
+	u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net);
+
+	return hash_32(val, IN6_ADDR_HSIZE_SHIFT);
+}
+
 /*
  *	socket join an anycast group
  */
@@ -204,16 +218,39 @@ void ipv6_sock_ac_close(struct sock *sk)
 	rtnl_unlock();
 }
 
+static void ipv6_add_acaddr_hash(struct net *net, struct ifacaddr6 *aca)
+{
+	unsigned int hash = inet6_acaddr_hash(net, &aca->aca_addr);
+
+	spin_lock(&acaddr_hash_lock);
+	hlist_add_head_rcu(&aca->aca_addr_lst, &inet6_acaddr_lst[hash]);
+	spin_unlock(&acaddr_hash_lock);
+}
+
+static void ipv6_del_acaddr_hash(struct ifacaddr6 *aca)
+{
+	spin_lock(&acaddr_hash_lock);
+	hlist_del_init_rcu(&aca->aca_addr_lst);
+	spin_unlock(&acaddr_hash_lock);
+}
+
 static void aca_get(struct ifacaddr6 *aca)
 {
 	refcount_inc(&aca->aca_refcnt);
 }
 
+static void aca_free_rcu(struct rcu_head *h)
+{
+	struct ifacaddr6 *aca = container_of(h, struct ifacaddr6, rcu);
+
+	fib6_info_release(aca->aca_rt);
+	kfree(aca);
+}
+
 static void aca_put(struct ifacaddr6 *ac)
 {
 	if (refcount_dec_and_test(&ac->aca_refcnt)) {
-		fib6_info_release(ac->aca_rt);
-		kfree(ac);
+		call_rcu(&ac->rcu, aca_free_rcu);
 	}
 }
 
@@ -229,6 +266,7 @@ static struct ifacaddr6 *aca_alloc(struct fib6_info *f6i,
 	aca->aca_addr = *addr;
 	fib6_info_hold(f6i);
 	aca->aca_rt = f6i;
+	INIT_HLIST_NODE(&aca->aca_addr_lst);
 	aca->aca_users = 1;
 	/* aca_tstamp should be updated upon changes */
 	aca->aca_cstamp = aca->aca_tstamp = jiffies;
@@ -285,6 +323,8 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
 	aca_get(aca);
 	write_unlock_bh(&idev->lock);
 
+	ipv6_add_acaddr_hash(net, aca);
+
 	ip6_ins_rt(net, f6i);
 
 	addrconf_join_solict(idev->dev, &aca->aca_addr);
@@ -325,6 +365,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
 	else
 		idev->ac_list = aca->aca_next;
 	write_unlock_bh(&idev->lock);
+	ipv6_del_acaddr_hash(aca);
 	addrconf_leave_solict(idev, &aca->aca_addr);
 
 	ip6_del_rt(dev_net(idev->dev), aca->aca_rt);
@@ -352,6 +393,8 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev)
 		idev->ac_list = aca->aca_next;
 		write_unlock_bh(&idev->lock);
 
+		ipv6_del_acaddr_hash(aca);
+
 		addrconf_leave_solict(idev, &aca->aca_addr);
 
 		ip6_del_rt(dev_net(idev->dev), aca->aca_rt);
@@ -390,17 +433,25 @@ static bool ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *ad
 bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
 			 const struct in6_addr *addr)
 {
+	unsigned int hash = inet6_acaddr_hash(net, addr);
+	struct net_device *nh_dev;
+	struct ifacaddr6 *aca;
 	bool found = false;
 
 	rcu_read_lock();
 	if (dev)
 		found = ipv6_chk_acast_dev(dev, addr);
 	else
-		for_each_netdev_rcu(net, dev)
-			if (ipv6_chk_acast_dev(dev, addr)) {
+		hlist_for_each_entry_rcu(aca, &inet6_acaddr_lst[hash],
+					 aca_addr_lst) {
+			nh_dev = fib6_info_nh_dev(aca->aca_rt);
+			if (!nh_dev || !net_eq(dev_net(nh_dev), net))
+				continue;
+			if (ipv6_addr_equal(&aca->aca_addr, addr)) {
 				found = true;
 				break;
 			}
+		}
 	rcu_read_unlock();
 	return found;
 }
@@ -539,4 +590,25 @@ void ac6_proc_exit(struct net *net)
 {
 	remove_proc_entry("anycast6", net->proc_net);
 }
+
+/*	Init / cleanup code
+ */
+int __init ipv6_anycast_init(void)
+{
+	int i;
+
+	for (i = 0; i < IN6_ADDR_HSIZE; i++)
+		INIT_HLIST_HEAD(&inet6_acaddr_lst[i]);
+	return 0;
+}
+
+void ipv6_anycast_cleanup(void)
+{
+	int i;
+
+	spin_lock(&acaddr_hash_lock);
+	for (i = 0; i < IN6_ADDR_HSIZE; i++)
+		WARN_ON(!hlist_empty(&inet6_acaddr_lst[i]));
+	spin_unlock(&acaddr_hash_lock);
+}
 #endif
-- 
GitLab


From 284fb78ed7572117846f8e1d1d8e3dbfd16880c2 Mon Sep 17 00:00:00 2001
From: Tristram Ha <Tristram.Ha@microchip.com>
Date: Fri, 2 Nov 2018 19:23:41 -0700
Subject: [PATCH 0510/1890] net: dsa: microchip: initialize mutex before use

Initialize mutex before use.  Avoid kernel complaint when
CONFIG_DEBUG_LOCK_ALLOC is enabled.

Fixes: b987e98e50ab90e5 ("dsa: add DSA switch driver for Microchip KSZ9477")
Signed-off-by: Tristram Ha <Tristram.Ha@microchip.com>
Reviewed-by: Pavel Machek <pavel@ucw.cz>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/microchip/ksz_common.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index 54e0ca6ed7308..86b6464b4525c 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -1117,11 +1117,6 @@ static int ksz_switch_init(struct ksz_device *dev)
 {
 	int i;
 
-	mutex_init(&dev->reg_mutex);
-	mutex_init(&dev->stats_mutex);
-	mutex_init(&dev->alu_mutex);
-	mutex_init(&dev->vlan_mutex);
-
 	dev->ds->ops = &ksz_switch_ops;
 
 	for (i = 0; i < ARRAY_SIZE(ksz_switch_chips); i++) {
@@ -1206,6 +1201,11 @@ int ksz_switch_register(struct ksz_device *dev)
 	if (dev->pdata)
 		dev->chip_id = dev->pdata->chip_id;
 
+	mutex_init(&dev->reg_mutex);
+	mutex_init(&dev->stats_mutex);
+	mutex_init(&dev->alu_mutex);
+	mutex_init(&dev->vlan_mutex);
+
 	if (ksz_switch_detect(dev))
 		return -EINVAL;
 
-- 
GitLab


From c7e86acfcee30794dc99a0759924bf7b9d43f1ca Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 1 Nov 2018 13:39:53 +0000
Subject: [PATCH 0511/1890] rxrpc: Fix lockup due to no error backoff after ack
 transmit error

If the network becomes (partially) unavailable, say by disabling IPv6, the
background ACK transmission routine can get itself into a tizzy by
proposing immediate ACK retransmission.  Since we're in the call event
processor, that happens immediately without returning to the workqueue
manager.

The condition should clear after a while when either the network comes back
or the call times out.

Fix this by:

 (1) When re-proposing an ACK on failed Tx, don't schedule it immediately.
     This will allow a certain amount of time to elapse before we try
     again.

 (2) Enforce a return to the workqueue manager after a certain number of
     iterations of the call processing loop.

 (3) Add a backoff delay that increases the delay on deferred ACKs by a
     jiffy per failed transmission to a limit of HZ.  The backoff delay is
     cleared on a successful return from kernel_sendmsg().

 (4) Cancel calls immediately if the opening sendmsg fails.  The layer
     above can arrange retransmission or rotate to another server.

Fixes: 248f219cb8bc ("rxrpc: Rewrite the data and ack handling code")
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/ar-internal.h |  1 +
 net/rxrpc/call_event.c  | 18 ++++++++++++++----
 net/rxrpc/output.c      | 35 +++++++++++++++++++++++++++++++----
 3 files changed, 46 insertions(+), 8 deletions(-)

diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 382196e57a26c..bc628acf4f4ff 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -611,6 +611,7 @@ struct rxrpc_call {
 						 * not hard-ACK'd packet follows this.
 						 */
 	rxrpc_seq_t		tx_top;		/* Highest Tx slot allocated. */
+	u16			tx_backoff;	/* Delay to insert due to Tx failure */
 
 	/* TCP-style slow-start congestion control [RFC5681].  Since the SMSS
 	 * is fixed, we keep these numbers in terms of segments (ie. DATA
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 8e7434e92097e..468efc3660c03 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -123,6 +123,7 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
 		else
 			ack_at = expiry;
 
+		ack_at += READ_ONCE(call->tx_backoff);
 		ack_at += now;
 		if (time_before(ack_at, call->ack_at)) {
 			WRITE_ONCE(call->ack_at, ack_at);
@@ -311,6 +312,7 @@ void rxrpc_process_call(struct work_struct *work)
 		container_of(work, struct rxrpc_call, processor);
 	rxrpc_serial_t *send_ack;
 	unsigned long now, next, t;
+	unsigned int iterations = 0;
 
 	rxrpc_see_call(call);
 
@@ -319,6 +321,11 @@ void rxrpc_process_call(struct work_struct *work)
 	       call->debug_id, rxrpc_call_states[call->state], call->events);
 
 recheck_state:
+	/* Limit the number of times we do this before returning to the manager */
+	iterations++;
+	if (iterations > 5)
+		goto requeue;
+
 	if (test_and_clear_bit(RXRPC_CALL_EV_ABORT, &call->events)) {
 		rxrpc_send_abort_packet(call);
 		goto recheck_state;
@@ -447,13 +454,16 @@ void rxrpc_process_call(struct work_struct *work)
 	rxrpc_reduce_call_timer(call, next, now, rxrpc_timer_restart);
 
 	/* other events may have been raised since we started checking */
-	if (call->events && call->state < RXRPC_CALL_COMPLETE) {
-		__rxrpc_queue_call(call);
-		goto out;
-	}
+	if (call->events && call->state < RXRPC_CALL_COMPLETE)
+		goto requeue;
 
 out_put:
 	rxrpc_put_call(call, rxrpc_call_put);
 out:
 	_leave("");
+	return;
+
+requeue:
+	__rxrpc_queue_call(call);
+	goto out;
 }
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 1894188888391..736aa92811004 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -34,6 +34,21 @@ struct rxrpc_abort_buffer {
 
 static const char rxrpc_keepalive_string[] = "";
 
+/*
+ * Increase Tx backoff on transmission failure and clear it on success.
+ */
+static void rxrpc_tx_backoff(struct rxrpc_call *call, int ret)
+{
+	if (ret < 0) {
+		u16 tx_backoff = READ_ONCE(call->tx_backoff);
+
+		if (tx_backoff < HZ)
+			WRITE_ONCE(call->tx_backoff, tx_backoff + 1);
+	} else {
+		WRITE_ONCE(call->tx_backoff, 0);
+	}
+}
+
 /*
  * Arrange for a keepalive ping a certain time after we last transmitted.  This
  * lets the far side know we're still interested in this call and helps keep
@@ -210,6 +225,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
 	else
 		trace_rxrpc_tx_packet(call->debug_id, &pkt->whdr,
 				      rxrpc_tx_point_call_ack);
+	rxrpc_tx_backoff(call, ret);
 
 	if (call->state < RXRPC_CALL_COMPLETE) {
 		if (ret < 0) {
@@ -218,7 +234,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
 			rxrpc_propose_ACK(call, pkt->ack.reason,
 					  ntohs(pkt->ack.maxSkew),
 					  ntohl(pkt->ack.serial),
-					  true, true,
+					  false, true,
 					  rxrpc_propose_ack_retry_tx);
 		} else {
 			spin_lock_bh(&call->lock);
@@ -300,7 +316,7 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call)
 	else
 		trace_rxrpc_tx_packet(call->debug_id, &pkt.whdr,
 				      rxrpc_tx_point_call_abort);
-
+	rxrpc_tx_backoff(call, ret);
 
 	rxrpc_put_connection(conn);
 	return ret;
@@ -413,6 +429,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
 	else
 		trace_rxrpc_tx_packet(call->debug_id, &whdr,
 				      rxrpc_tx_point_call_data_nofrag);
+	rxrpc_tx_backoff(call, ret);
 	if (ret == -EMSGSIZE)
 		goto send_fragmentable;
 
@@ -445,9 +462,18 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
 			rxrpc_reduce_call_timer(call, expect_rx_by, nowj,
 						rxrpc_timer_set_for_normal);
 		}
-	}
 
-	rxrpc_set_keepalive(call);
+		rxrpc_set_keepalive(call);
+	} else {
+		/* Cancel the call if the initial transmission fails,
+		 * particularly if that's due to network routing issues that
+		 * aren't going away anytime soon.  The layer above can arrange
+		 * the retransmission.
+		 */
+		if (!test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags))
+			rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
+						  RX_USER_ABORT, ret);
+	}
 
 	_leave(" = %d [%u]", ret, call->peer->maxdata);
 	return ret;
@@ -506,6 +532,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
 	else
 		trace_rxrpc_tx_packet(call->debug_id, &whdr,
 				      rxrpc_tx_point_call_data_frag);
+	rxrpc_tx_backoff(call, ret);
 
 	up_write(&conn->params.local->defrag_sem);
 	goto done;
-- 
GitLab


From 09e805d2570a3a94f13dd9c9ad2bcab23da76e09 Mon Sep 17 00:00:00 2001
From: Doug Berger <opendmb@gmail.com>
Date: Thu, 1 Nov 2018 15:55:37 -0700
Subject: [PATCH 0512/1890] net: bcmgenet: protect stop from timeout

A timing hazard exists when the network interface is stopped that
allows a watchdog timeout to be processed by a separate core in
parallel. This creates the potential for the timeout handler to
wake the queues while the driver is shutting down, or access
registers after their clocks have been removed.

The more common case is that the watchdog timeout will produce a
warning message which doesn't lead to a crash. The chances of this
are greatly increased by the fact that bcmgenet_netif_stop stops
the transmit queues which can easily precipitate a watchdog time-
out because of stale trans_start data in the queues.

This commit corrects the behavior by ensuring that the watchdog
timeout is disabled before enterring bcmgenet_netif_stop. There
are currently only two users of the bcmgenet_netif_stop function:
close and suspend.

The close case already handles the issue by exiting the RUNNING
state before invoking the driver close service.

The suspend case now performs the netif_device_detach to exit the
PRESENT state before the call to bcmgenet_netif_stop rather than
after it.

These behaviors prevent any future scheduling of the driver timeout
service during the window. The netif_tx_stop_all_queues function
in bcmgenet_netif_stop is replaced with netif_tx_disable to ensure
synchronization with any transmit or timeout threads that may
already be executing on other cores.

For symmetry, the netif_device_attach call upon resume is moved to
after the call to bcmgenet_netif_start. Since it wakes the transmit
queues it is not necessary to invoke netif_tx_start_all_queues from
bcmgenet_netif_start so it is moved into the driver open service.

Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file")
Signed-off-by: Doug Berger <opendmb@gmail.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/genet/bcmgenet.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 20c1681bb1afe..2d6f090bf6440 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -2855,7 +2855,6 @@ static void bcmgenet_netif_start(struct net_device *dev)
 
 	umac_enable_set(priv, CMD_TX_EN | CMD_RX_EN, true);
 
-	netif_tx_start_all_queues(dev);
 	bcmgenet_enable_tx_napi(priv);
 
 	/* Monitor link interrupts now */
@@ -2937,6 +2936,8 @@ static int bcmgenet_open(struct net_device *dev)
 
 	bcmgenet_netif_start(dev);
 
+	netif_tx_start_all_queues(dev);
+
 	return 0;
 
 err_irq1:
@@ -2958,7 +2959,7 @@ static void bcmgenet_netif_stop(struct net_device *dev)
 	struct bcmgenet_priv *priv = netdev_priv(dev);
 
 	bcmgenet_disable_tx_napi(priv);
-	netif_tx_stop_all_queues(dev);
+	netif_tx_disable(dev);
 
 	/* Disable MAC receive */
 	umac_enable_set(priv, CMD_RX_EN, false);
@@ -3620,13 +3621,13 @@ static int bcmgenet_suspend(struct device *d)
 	if (!netif_running(dev))
 		return 0;
 
+	netif_device_detach(dev);
+
 	bcmgenet_netif_stop(dev);
 
 	if (!device_may_wakeup(d))
 		phy_suspend(dev->phydev);
 
-	netif_device_detach(dev);
-
 	/* Prepare the device for Wake-on-LAN and switch to the slow clock */
 	if (device_may_wakeup(d) && priv->wolopts) {
 		ret = bcmgenet_power_down(priv, GENET_POWER_WOL_MAGIC);
@@ -3700,8 +3701,6 @@ static int bcmgenet_resume(struct device *d)
 	/* Always enable ring 16 - descriptor ring */
 	bcmgenet_enable_dma(priv, dma_ctrl);
 
-	netif_device_attach(dev);
-
 	if (!device_may_wakeup(d))
 		phy_resume(dev->phydev);
 
@@ -3710,6 +3709,8 @@ static int bcmgenet_resume(struct device *d)
 
 	bcmgenet_netif_start(dev);
 
+	netif_device_attach(dev);
+
 	return 0;
 
 out_clk_disable:
-- 
GitLab


From 7cb6a2a2c72c1ed8f42fb01f1a661281b568dead Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 1 Nov 2018 15:55:38 -0700
Subject: [PATCH 0513/1890] net: systemport: Protect stop from timeout

A timing hazard exists when the network interface is stopped that
allows a watchdog timeout to be processed by a separate core in
parallel. This creates the potential for the timeout handler to
wake the queues while the driver is shutting down, or access
registers after their clocks have been removed.

The more common case is that the watchdog timeout will produce a
warning message which doesn't lead to a crash. The chances of this
are greatly increased by the fact that bcm_sysport_netif_stop stops
the transmit queues which can easily precipitate a watchdog time-
out because of stale trans_start data in the queues.

This commit corrects the behavior by ensuring that the watchdog
timeout is disabled before enterring bcm_sysport_netif_stop. There
are currently only two users of the bcm_sysport_netif_stop function:
close and suspend.

The close case already handles the issue by exiting the RUNNING
state before invoking the driver close service.

The suspend case now performs the netif_device_detach to exit the
PRESENT state before the call to bcm_sysport_netif_stop rather than
after it.

These behaviors prevent any future scheduling of the driver timeout
service during the window. The netif_tx_stop_all_queues function
in bcm_sysport_netif_stop is replaced with netif_tx_disable to ensure
synchronization with any transmit or timeout threads that may
already be executing on other cores.

For symmetry, the netif_device_attach call upon resume is moved to
after the call to bcm_sysport_netif_start. Since it wakes the transmit
queues it is not necessary to invoke netif_tx_start_all_queues from
bcm_sysport_netif_start so it is moved into the driver open service.

Fixes: 40755a0fce17 ("net: systemport: add suspend and resume support")
Fixes: 80105befdb4b ("net: systemport: add Broadcom SYSTEMPORT Ethernet MAC driver")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bcmsysport.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index 4122553e224b2..0e2d99c737e35 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -1902,9 +1902,6 @@ static void bcm_sysport_netif_start(struct net_device *dev)
 		intrl2_1_mask_clear(priv, 0xffffffff);
 	else
 		intrl2_0_mask_clear(priv, INTRL2_0_TDMA_MBDONE_MASK);
-
-	/* Last call before we start the real business */
-	netif_tx_start_all_queues(dev);
 }
 
 static void rbuf_init(struct bcm_sysport_priv *priv)
@@ -2048,6 +2045,8 @@ static int bcm_sysport_open(struct net_device *dev)
 
 	bcm_sysport_netif_start(dev);
 
+	netif_tx_start_all_queues(dev);
+
 	return 0;
 
 out_clear_rx_int:
@@ -2071,7 +2070,7 @@ static void bcm_sysport_netif_stop(struct net_device *dev)
 	struct bcm_sysport_priv *priv = netdev_priv(dev);
 
 	/* stop all software from updating hardware */
-	netif_tx_stop_all_queues(dev);
+	netif_tx_disable(dev);
 	napi_disable(&priv->napi);
 	cancel_work_sync(&priv->dim.dim.work);
 	phy_stop(dev->phydev);
@@ -2658,12 +2657,12 @@ static int __maybe_unused bcm_sysport_suspend(struct device *d)
 	if (!netif_running(dev))
 		return 0;
 
+	netif_device_detach(dev);
+
 	bcm_sysport_netif_stop(dev);
 
 	phy_suspend(dev->phydev);
 
-	netif_device_detach(dev);
-
 	/* Disable UniMAC RX */
 	umac_enable_set(priv, CMD_RX_EN, 0);
 
@@ -2746,8 +2745,6 @@ static int __maybe_unused bcm_sysport_resume(struct device *d)
 		goto out_free_rx_ring;
 	}
 
-	netif_device_attach(dev);
-
 	/* RX pipe enable */
 	topctrl_writel(priv, 0, RX_FLUSH_CNTL);
 
@@ -2788,6 +2785,8 @@ static int __maybe_unused bcm_sysport_resume(struct device *d)
 
 	bcm_sysport_netif_start(dev);
 
+	netif_device_attach(dev);
+
 	return 0;
 
 out_free_rx_ring:
-- 
GitLab


From cd26ea6d50a207ee37e0364ecc2d196d6c9671e8 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Sat, 3 Nov 2018 08:19:56 -0300
Subject: [PATCH 0514/1890] perf trace: Fix setting of augmented payload when
 using eBPF + raw_syscalls

For now with BPF raw_augmented we hook into raw_syscalls:sys_enter and
there we get all 6 syscall args plus the tracepoint common fields
(sizeof(long)) and the syscall_nr (another long). So we check if that is
the case and if so don't look after the sc->args_size, but always after
the full raw_syscalls:sys_enter payload, which is fixed.

We'll revisit this later to pass s->args_size to the BPF augmenter (now
tools/perf/examples/bpf/augmented_raw_syscalls.c, so that it copies only
what we need for each syscall, like what happens when we use
syscalls:sys_enter_NAME, so that we reduce the kernel/userspace traffic
to just what is needed for each syscall.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-nlslrg8apxdsobt4pwl3n7ur@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-trace.c | 37 ++++++++++++++++++++++++++++++-------
 1 file changed, 30 insertions(+), 7 deletions(-)

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index f582ca575883e..835619476370c 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -108,6 +108,7 @@ struct trace {
 	} stats;
 	unsigned int		max_stack;
 	unsigned int		min_stack;
+	bool			raw_augmented_syscalls;
 	bool			not_ev_qualifier;
 	bool			live;
 	bool			full_time;
@@ -1724,13 +1725,28 @@ static int trace__fprintf_sample(struct trace *trace, struct perf_evsel *evsel,
 	return printed;
 }
 
-static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sample, int *augmented_args_size)
+static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sample, int *augmented_args_size, bool raw_augmented)
 {
 	void *augmented_args = NULL;
+	/*
+	 * For now with BPF raw_augmented we hook into raw_syscalls:sys_enter
+	 * and there we get all 6 syscall args plus the tracepoint common
+	 * fields (sizeof(long)) and the syscall_nr (another long). So we check
+	 * if that is the case and if so don't look after the sc->args_size,
+	 * but always after the full raw_syscalls:sys_enter payload, which is
+	 * fixed.
+	 *
+	 * We'll revisit this later to pass s->args_size to the BPF augmenter
+	 * (now tools/perf/examples/bpf/augmented_raw_syscalls.c, so that it
+	 * copies only what we need for each syscall, like what happens when we
+	 * use syscalls:sys_enter_NAME, so that we reduce the kernel/userspace
+	 * traffic to just what is needed for each syscall.
+	 */
+	int args_size = raw_augmented ? (8 * (int)sizeof(long)) : sc->args_size;
 
-	*augmented_args_size = sample->raw_size - sc->args_size;
+	*augmented_args_size = sample->raw_size - args_size;
 	if (*augmented_args_size > 0)
-		augmented_args = sample->raw_data + sc->args_size;
+		augmented_args = sample->raw_data + args_size;
 
 	return augmented_args;
 }
@@ -1780,7 +1796,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
 	 * here and avoid using augmented syscalls when the evsel is the raw_syscalls one.
 	 */
 	if (evsel != trace->syscalls.events.sys_enter)
-		augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size);
+		augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls);
 	ttrace->entry_time = sample->time;
 	msg = ttrace->entry_str;
 	printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
@@ -1833,7 +1849,7 @@ static int trace__fprintf_sys_enter(struct trace *trace, struct perf_evsel *evse
 		goto out_put;
 
 	args = perf_evsel__sc_tp_ptr(evsel, args, sample);
-	augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size);
+	augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls);
 	syscall__scnprintf_args(sc, msg, sizeof(msg), args, augmented_args, augmented_args_size, trace, thread);
 	fprintf(trace->output, "%s", msg);
 	err = 0;
@@ -3501,8 +3517,15 @@ int cmd_trace(int argc, const char **argv)
 		evsel->handler = trace__sys_enter;
 
 		evlist__for_each_entry(trace.evlist, evsel) {
-			if (strstarts(perf_evsel__name(evsel), "syscalls:sys_exit_") ||
-			    strcmp(perf_evsel__name(evsel), "raw_syscalls:sys_exit") == 0) {
+			bool raw_syscalls_sys_exit = strcmp(perf_evsel__name(evsel), "raw_syscalls:sys_exit") == 0;
+
+			if (raw_syscalls_sys_exit) {
+				trace.raw_augmented_syscalls = true;
+				goto init_augmented_syscall_tp;
+			}
+
+			if (strstarts(perf_evsel__name(evsel), "syscalls:sys_exit_")) {
+init_augmented_syscall_tp:
 				perf_evsel__init_augmented_syscall_tp(evsel);
 				perf_evsel__init_augmented_syscall_tp_ret(evsel);
 				evsel->handler = trace__sys_exit;
-- 
GitLab


From fe5192ac81ad0d4dfe1395d11f393f0513c15f7f Mon Sep 17 00:00:00 2001
From: Martin Kelly <martin@martingkelly.com>
Date: Sun, 28 Oct 2018 20:18:53 -0700
Subject: [PATCH 0515/1890] iio:st_magn: Fix enable device after trigger

Currently, we enable the device before we enable the device trigger. At
high frequencies, this can cause interrupts that don't yet have a poll
function associated with them and are thus treated as spurious. At high
frequencies with level interrupts, this can even cause an interrupt storm
of repeated spurious interrupts (~100,000 on my Beagleboard with the
LSM9DS1 magnetometer). If these repeat too much, the interrupt will get
disabled and the device will stop functioning.

To prevent these problems, enable the device prior to enabling the device
trigger, and disable the divec prior to disabling the trigger. This means
there's no window of time during which the device creates interrupts but we
have no trigger to answer them.

Fixes: 90efe055629 ("iio: st_sensors: harden interrupt handling")
Signed-off-by: Martin Kelly <martin@martingkelly.com>
Tested-by: Denis Ciocca <denis.ciocca@st.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/magnetometer/st_magn_buffer.c | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/drivers/iio/magnetometer/st_magn_buffer.c b/drivers/iio/magnetometer/st_magn_buffer.c
index 0a9e8fadfa9de..37ab305664649 100644
--- a/drivers/iio/magnetometer/st_magn_buffer.c
+++ b/drivers/iio/magnetometer/st_magn_buffer.c
@@ -30,11 +30,6 @@ int st_magn_trig_set_state(struct iio_trigger *trig, bool state)
 	return st_sensors_set_dataready_irq(indio_dev, state);
 }
 
-static int st_magn_buffer_preenable(struct iio_dev *indio_dev)
-{
-	return st_sensors_set_enable(indio_dev, true);
-}
-
 static int st_magn_buffer_postenable(struct iio_dev *indio_dev)
 {
 	int err;
@@ -50,7 +45,7 @@ static int st_magn_buffer_postenable(struct iio_dev *indio_dev)
 	if (err < 0)
 		goto st_magn_buffer_postenable_error;
 
-	return err;
+	return st_sensors_set_enable(indio_dev, true);
 
 st_magn_buffer_postenable_error:
 	kfree(mdata->buffer_data);
@@ -63,11 +58,11 @@ static int st_magn_buffer_predisable(struct iio_dev *indio_dev)
 	int err;
 	struct st_sensor_data *mdata = iio_priv(indio_dev);
 
-	err = iio_triggered_buffer_predisable(indio_dev);
+	err = st_sensors_set_enable(indio_dev, false);
 	if (err < 0)
 		goto st_magn_buffer_predisable_error;
 
-	err = st_sensors_set_enable(indio_dev, false);
+	err = iio_triggered_buffer_predisable(indio_dev);
 
 st_magn_buffer_predisable_error:
 	kfree(mdata->buffer_data);
@@ -75,7 +70,6 @@ static int st_magn_buffer_predisable(struct iio_dev *indio_dev)
 }
 
 static const struct iio_buffer_setup_ops st_magn_buffer_setup_ops = {
-	.preenable = &st_magn_buffer_preenable,
 	.postenable = &st_magn_buffer_postenable,
 	.predisable = &st_magn_buffer_predisable,
 };
-- 
GitLab


From 17b8b74c0f8dbf9b9e3301f9ca5b65dd1c079951 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Fri, 19 Oct 2018 19:35:19 +0200
Subject: [PATCH 0516/1890] netfilter: ipset: Correct rcu_dereference() call in
 ip_set_put_comment()

The function is called when rcu_read_lock() is held and not
when rcu_read_lock_bh() is held.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/ipset/ip_set_comment.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/netfilter/ipset/ip_set_comment.h b/include/linux/netfilter/ipset/ip_set_comment.h
index 8e2bab1e8e909..70877f8de7e91 100644
--- a/include/linux/netfilter/ipset/ip_set_comment.h
+++ b/include/linux/netfilter/ipset/ip_set_comment.h
@@ -43,11 +43,11 @@ ip_set_init_comment(struct ip_set *set, struct ip_set_comment *comment,
 	rcu_assign_pointer(comment->c, c);
 }
 
-/* Used only when dumping a set, protected by rcu_read_lock_bh() */
+/* Used only when dumping a set, protected by rcu_read_lock() */
 static inline int
 ip_set_put_comment(struct sk_buff *skb, const struct ip_set_comment *comment)
 {
-	struct ip_set_comment_rcu *c = rcu_dereference_bh(comment->c);
+	struct ip_set_comment_rcu *c = rcu_dereference(comment->c);
 
 	if (!c)
 		return 0;
-- 
GitLab


From 54451f60c8fa061af9051a53be9786393947367c Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Sun, 21 Oct 2018 00:00:08 +0900
Subject: [PATCH 0517/1890] netfilter: xt_IDLETIMER: add sysfs filename
 checking routine

When IDLETIMER rule is added, sysfs file is created under
/sys/class/xt_idletimer/timers/
But some label name shouldn't be used.
".", "..", "power", "uevent", "subsystem", etc...
So that sysfs filename checking routine is needed.

test commands:
   %iptables -I INPUT -j IDLETIMER --timeout 1 --label "power"

splat looks like:
[95765.423132] sysfs: cannot create duplicate filename '/devices/virtual/xt_idletimer/timers/power'
[95765.433418] CPU: 0 PID: 8446 Comm: iptables Not tainted 4.19.0-rc6+ #20
[95765.449755] Call Trace:
[95765.449755]  dump_stack+0xc9/0x16b
[95765.449755]  ? show_regs_print_info+0x5/0x5
[95765.449755]  sysfs_warn_dup+0x74/0x90
[95765.449755]  sysfs_add_file_mode_ns+0x352/0x500
[95765.449755]  sysfs_create_file_ns+0x179/0x270
[95765.449755]  ? sysfs_add_file_mode_ns+0x500/0x500
[95765.449755]  ? idletimer_tg_checkentry+0x3e5/0xb1b [xt_IDLETIMER]
[95765.449755]  ? rcu_read_lock_sched_held+0x114/0x130
[95765.449755]  ? __kmalloc_track_caller+0x211/0x2b0
[95765.449755]  ? memcpy+0x34/0x50
[95765.449755]  idletimer_tg_checkentry+0x4e2/0xb1b [xt_IDLETIMER]
[ ... ]

Fixes: 0902b469bd25 ("netfilter: xtables: idletimer target implementation")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/xt_IDLETIMER.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index c6acfc2d9c841..eb4cbd244c3d3 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -114,6 +114,22 @@ static void idletimer_tg_expired(struct timer_list *t)
 	schedule_work(&timer->work);
 }
 
+static int idletimer_check_sysfs_name(const char *name, unsigned int size)
+{
+	int ret;
+
+	ret = xt_check_proc_name(name, size);
+	if (ret < 0)
+		return ret;
+
+	if (!strcmp(name, "power") ||
+	    !strcmp(name, "subsystem") ||
+	    !strcmp(name, "uevent"))
+		return -EINVAL;
+
+	return 0;
+}
+
 static int idletimer_tg_create(struct idletimer_tg_info *info)
 {
 	int ret;
@@ -124,6 +140,10 @@ static int idletimer_tg_create(struct idletimer_tg_info *info)
 		goto out;
 	}
 
+	ret = idletimer_check_sysfs_name(info->label, sizeof(info->label));
+	if (ret < 0)
+		goto out_free_timer;
+
 	sysfs_attr_init(&info->timer->attr.attr);
 	info->timer->attr.attr.name = kstrdup(info->label, GFP_KERNEL);
 	if (!info->timer->attr.attr.name) {
-- 
GitLab


From 8a02bdd50b2ecb6d62121d2958d3ea186cc88ce7 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Tue, 30 Oct 2018 22:43:42 +0100
Subject: [PATCH 0518/1890] netfilter: ipset: Fix calling ip_set() macro at
 dumping

The ip_set() macro is called when either ip_set_ref_lock held only
or no lock/nfnl mutex is held at dumping. Take this into account
properly. Also, use Pablo's suggestion to use rcu_dereference_raw(),
the ref_netlink protects the set.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/ipset/ip_set_core.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 68db946df1511..1577f2f76060d 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -55,11 +55,15 @@ MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
 MODULE_DESCRIPTION("core IP set support");
 MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
 
-/* When the nfnl mutex is held: */
+/* When the nfnl mutex or ip_set_ref_lock is held: */
 #define ip_set_dereference(p)		\
-	rcu_dereference_protected(p, lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET))
+	rcu_dereference_protected(p,	\
+		lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET) || \
+		lockdep_is_held(&ip_set_ref_lock))
 #define ip_set(inst, id)		\
 	ip_set_dereference((inst)->ip_set_list)[id]
+#define ip_set_ref_netlink(inst,id)	\
+	rcu_dereference_raw((inst)->ip_set_list)[id]
 
 /* The set types are implemented in modules and registered set types
  * can be found in ip_set_type_list. Adding/deleting types is
@@ -1251,7 +1255,7 @@ ip_set_dump_done(struct netlink_callback *cb)
 		struct ip_set_net *inst =
 			(struct ip_set_net *)cb->args[IPSET_CB_NET];
 		ip_set_id_t index = (ip_set_id_t)cb->args[IPSET_CB_INDEX];
-		struct ip_set *set = ip_set(inst, index);
+		struct ip_set *set = ip_set_ref_netlink(inst, index);
 
 		if (set->variant->uref)
 			set->variant->uref(set, cb, false);
@@ -1440,7 +1444,7 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
 release_refcount:
 	/* If there was an error or set is done, release set */
 	if (ret || !cb->args[IPSET_CB_ARG0]) {
-		set = ip_set(inst, index);
+		set = ip_set_ref_netlink(inst, index);
 		if (set->variant->uref)
 			set->variant->uref(set, cb, false);
 		pr_debug("release set %s\n", set->name);
-- 
GitLab


From a95a7774d51e13f9cf4b7285666829b68852f07a Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 2 Nov 2018 00:11:34 +0100
Subject: [PATCH 0519/1890] netfilter: conntrack: add
 nf_{tcp,udp,sctp,icmp,dccp,icmpv6,generic}_pernet()

Expose these functions to access conntrack protocol tracker netns area,
nfnetlink_cttimeout needs this.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_l4proto.h | 39 ++++++++++++++++++++
 net/netfilter/nf_conntrack_proto_dccp.c      | 13 ++-----
 net/netfilter/nf_conntrack_proto_generic.c   | 11 ++----
 net/netfilter/nf_conntrack_proto_icmp.c      | 11 ++----
 net/netfilter/nf_conntrack_proto_icmpv6.c    | 11 ++----
 net/netfilter/nf_conntrack_proto_sctp.c      | 11 ++----
 net/netfilter/nf_conntrack_proto_tcp.c       | 15 +++-----
 net/netfilter/nf_conntrack_proto_udp.c       | 11 ++----
 8 files changed, 63 insertions(+), 59 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index eed04af9b75e5..ae7b86f587f2c 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -153,4 +153,43 @@ void nf_ct_l4proto_log_invalid(const struct sk_buff *skb,
 			       const char *fmt, ...) { }
 #endif /* CONFIG_SYSCTL */
 
+static inline struct nf_generic_net *nf_generic_pernet(struct net *net)
+{
+       return &net->ct.nf_ct_proto.generic;
+}
+
+static inline struct nf_tcp_net *nf_tcp_pernet(struct net *net)
+{
+       return &net->ct.nf_ct_proto.tcp;
+}
+
+static inline struct nf_udp_net *nf_udp_pernet(struct net *net)
+{
+       return &net->ct.nf_ct_proto.udp;
+}
+
+static inline struct nf_icmp_net *nf_icmp_pernet(struct net *net)
+{
+       return &net->ct.nf_ct_proto.icmp;
+}
+
+static inline struct nf_icmp_net *nf_icmpv6_pernet(struct net *net)
+{
+       return &net->ct.nf_ct_proto.icmpv6;
+}
+
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+static inline struct nf_dccp_net *nf_dccp_pernet(struct net *net)
+{
+       return &net->ct.nf_ct_proto.dccp;
+}
+#endif
+
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+static inline struct nf_sctp_net *nf_sctp_pernet(struct net *net)
+{
+       return &net->ct.nf_ct_proto.sctp;
+}
+#endif
+
 #endif /*_NF_CONNTRACK_PROTOCOL_H*/
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 171e9e122e5f1..023c1445bc396 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -384,11 +384,6 @@ dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] =
 	},
 };
 
-static inline struct nf_dccp_net *dccp_pernet(struct net *net)
-{
-	return &net->ct.nf_ct_proto.dccp;
-}
-
 static noinline bool
 dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
 	 const struct dccp_hdr *dh)
@@ -401,7 +396,7 @@ dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
 	state = dccp_state_table[CT_DCCP_ROLE_CLIENT][dh->dccph_type][CT_DCCP_NONE];
 	switch (state) {
 	default:
-		dn = dccp_pernet(net);
+		dn = nf_dccp_pernet(net);
 		if (dn->dccp_loose == 0) {
 			msg = "not picking up existing connection ";
 			goto out_invalid;
@@ -568,7 +563,7 @@ static int dccp_packet(struct nf_conn *ct, struct sk_buff *skb,
 
 	timeouts = nf_ct_timeout_lookup(ct);
 	if (!timeouts)
-		timeouts = dccp_pernet(nf_ct_net(ct))->dccp_timeout;
+		timeouts = nf_dccp_pernet(nf_ct_net(ct))->dccp_timeout;
 	nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]);
 
 	return NF_ACCEPT;
@@ -681,7 +676,7 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
 static int dccp_timeout_nlattr_to_obj(struct nlattr *tb[],
 				      struct net *net, void *data)
 {
-	struct nf_dccp_net *dn = dccp_pernet(net);
+	struct nf_dccp_net *dn = nf_dccp_pernet(net);
 	unsigned int *timeouts = data;
 	int i;
 
@@ -814,7 +809,7 @@ static int dccp_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *pn,
 
 static int dccp_init_net(struct net *net)
 {
-	struct nf_dccp_net *dn = dccp_pernet(net);
+	struct nf_dccp_net *dn = nf_dccp_pernet(net);
 	struct nf_proto_net *pn = &dn->pn;
 
 	if (!pn->users) {
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index e10e867e0b55f..5da19d5fbc767 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -27,11 +27,6 @@ static bool nf_generic_should_process(u8 proto)
 	}
 }
 
-static inline struct nf_generic_net *generic_pernet(struct net *net)
-{
-	return &net->ct.nf_ct_proto.generic;
-}
-
 static bool generic_pkt_to_tuple(const struct sk_buff *skb,
 				 unsigned int dataoff,
 				 struct net *net, struct nf_conntrack_tuple *tuple)
@@ -58,7 +53,7 @@ static int generic_packet(struct nf_conn *ct,
 	}
 
 	if (!timeout)
-		timeout = &generic_pernet(nf_ct_net(ct))->timeout;
+		timeout = &nf_generic_pernet(nf_ct_net(ct))->timeout;
 
 	nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
 	return NF_ACCEPT;
@@ -72,7 +67,7 @@ static int generic_packet(struct nf_conn *ct,
 static int generic_timeout_nlattr_to_obj(struct nlattr *tb[],
 					 struct net *net, void *data)
 {
-	struct nf_generic_net *gn = generic_pernet(net);
+	struct nf_generic_net *gn = nf_generic_pernet(net);
 	unsigned int *timeout = data;
 
 	if (!timeout)
@@ -138,7 +133,7 @@ static int generic_kmemdup_sysctl_table(struct nf_proto_net *pn,
 
 static int generic_init_net(struct net *net)
 {
-	struct nf_generic_net *gn = generic_pernet(net);
+	struct nf_generic_net *gn = nf_generic_pernet(net);
 	struct nf_proto_net *pn = &gn->pn;
 
 	gn->timeout = nf_ct_generic_timeout;
diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c
index 3598520bd19b7..de64d8a5fdfd1 100644
--- a/net/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/netfilter/nf_conntrack_proto_icmp.c
@@ -25,11 +25,6 @@
 
 static const unsigned int nf_ct_icmp_timeout = 30*HZ;
 
-static inline struct nf_icmp_net *icmp_pernet(struct net *net)
-{
-	return &net->ct.nf_ct_proto.icmp;
-}
-
 static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
 			      struct net *net, struct nf_conntrack_tuple *tuple)
 {
@@ -103,7 +98,7 @@ static int icmp_packet(struct nf_conn *ct,
 	}
 
 	if (!timeout)
-		timeout = &icmp_pernet(nf_ct_net(ct))->timeout;
+		timeout = &nf_icmp_pernet(nf_ct_net(ct))->timeout;
 
 	nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
 	return NF_ACCEPT;
@@ -275,7 +270,7 @@ static int icmp_timeout_nlattr_to_obj(struct nlattr *tb[],
 				      struct net *net, void *data)
 {
 	unsigned int *timeout = data;
-	struct nf_icmp_net *in = icmp_pernet(net);
+	struct nf_icmp_net *in = nf_icmp_pernet(net);
 
 	if (tb[CTA_TIMEOUT_ICMP_TIMEOUT]) {
 		if (!timeout)
@@ -337,7 +332,7 @@ static int icmp_kmemdup_sysctl_table(struct nf_proto_net *pn,
 
 static int icmp_init_net(struct net *net)
 {
-	struct nf_icmp_net *in = icmp_pernet(net);
+	struct nf_icmp_net *in = nf_icmp_pernet(net);
 	struct nf_proto_net *pn = &in->pn;
 
 	in->timeout = nf_ct_icmp_timeout;
diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c
index 378618feed5da..a15eefb8e3173 100644
--- a/net/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/netfilter/nf_conntrack_proto_icmpv6.c
@@ -30,11 +30,6 @@
 
 static const unsigned int nf_ct_icmpv6_timeout = 30*HZ;
 
-static inline struct nf_icmp_net *icmpv6_pernet(struct net *net)
-{
-	return &net->ct.nf_ct_proto.icmpv6;
-}
-
 static bool icmpv6_pkt_to_tuple(const struct sk_buff *skb,
 				unsigned int dataoff,
 				struct net *net,
@@ -87,7 +82,7 @@ static bool icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple,
 
 static unsigned int *icmpv6_get_timeouts(struct net *net)
 {
-	return &icmpv6_pernet(net)->timeout;
+	return &nf_icmpv6_pernet(net)->timeout;
 }
 
 /* Returns verdict for packet, or -1 for invalid. */
@@ -286,7 +281,7 @@ static int icmpv6_timeout_nlattr_to_obj(struct nlattr *tb[],
 					struct net *net, void *data)
 {
 	unsigned int *timeout = data;
-	struct nf_icmp_net *in = icmpv6_pernet(net);
+	struct nf_icmp_net *in = nf_icmpv6_pernet(net);
 
 	if (!timeout)
 		timeout = icmpv6_get_timeouts(net);
@@ -348,7 +343,7 @@ static int icmpv6_kmemdup_sysctl_table(struct nf_proto_net *pn,
 
 static int icmpv6_init_net(struct net *net)
 {
-	struct nf_icmp_net *in = icmpv6_pernet(net);
+	struct nf_icmp_net *in = nf_icmpv6_pernet(net);
 	struct nf_proto_net *pn = &in->pn;
 
 	in->timeout = nf_ct_icmpv6_timeout;
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 3d719d3eb9a38..d53e3e78f6052 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -146,11 +146,6 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = {
 	}
 };
 
-static inline struct nf_sctp_net *sctp_pernet(struct net *net)
-{
-	return &net->ct.nf_ct_proto.sctp;
-}
-
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
 /* Print out the private part of the conntrack. */
 static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
@@ -480,7 +475,7 @@ static int sctp_packet(struct nf_conn *ct,
 
 	timeouts = nf_ct_timeout_lookup(ct);
 	if (!timeouts)
-		timeouts = sctp_pernet(nf_ct_net(ct))->timeouts;
+		timeouts = nf_sctp_pernet(nf_ct_net(ct))->timeouts;
 
 	nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]);
 
@@ -599,7 +594,7 @@ static int sctp_timeout_nlattr_to_obj(struct nlattr *tb[],
 				      struct net *net, void *data)
 {
 	unsigned int *timeouts = data;
-	struct nf_sctp_net *sn = sctp_pernet(net);
+	struct nf_sctp_net *sn = nf_sctp_pernet(net);
 	int i;
 
 	/* set default SCTP timeouts. */
@@ -736,7 +731,7 @@ static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn,
 
 static int sctp_init_net(struct net *net)
 {
-	struct nf_sctp_net *sn = sctp_pernet(net);
+	struct nf_sctp_net *sn = nf_sctp_pernet(net);
 	struct nf_proto_net *pn = &sn->pn;
 
 	if (!pn->users) {
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 1bcf9984d45e8..4dcbd51a8e97f 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -272,11 +272,6 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
 	}
 };
 
-static inline struct nf_tcp_net *tcp_pernet(struct net *net)
-{
-	return &net->ct.nf_ct_proto.tcp;
-}
-
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
 /* Print out the private part of the conntrack. */
 static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
@@ -475,7 +470,7 @@ static bool tcp_in_window(const struct nf_conn *ct,
 			  const struct tcphdr *tcph)
 {
 	struct net *net = nf_ct_net(ct);
-	struct nf_tcp_net *tn = tcp_pernet(net);
+	struct nf_tcp_net *tn = nf_tcp_pernet(net);
 	struct ip_ct_tcp_state *sender = &state->seen[dir];
 	struct ip_ct_tcp_state *receiver = &state->seen[!dir];
 	const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
@@ -767,7 +762,7 @@ static noinline bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
 {
 	enum tcp_conntrack new_state;
 	struct net *net = nf_ct_net(ct);
-	const struct nf_tcp_net *tn = tcp_pernet(net);
+	const struct nf_tcp_net *tn = nf_tcp_pernet(net);
 	const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0];
 	const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1];
 
@@ -841,7 +836,7 @@ static int tcp_packet(struct nf_conn *ct,
 		      const struct nf_hook_state *state)
 {
 	struct net *net = nf_ct_net(ct);
-	struct nf_tcp_net *tn = tcp_pernet(net);
+	struct nf_tcp_net *tn = nf_tcp_pernet(net);
 	struct nf_conntrack_tuple *tuple;
 	enum tcp_conntrack new_state, old_state;
 	unsigned int index, *timeouts;
@@ -1283,7 +1278,7 @@ static unsigned int tcp_nlattr_tuple_size(void)
 static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[],
 				     struct net *net, void *data)
 {
-	struct nf_tcp_net *tn = tcp_pernet(net);
+	struct nf_tcp_net *tn = nf_tcp_pernet(net);
 	unsigned int *timeouts = data;
 	int i;
 
@@ -1508,7 +1503,7 @@ static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn,
 
 static int tcp_init_net(struct net *net)
 {
-	struct nf_tcp_net *tn = tcp_pernet(net);
+	struct nf_tcp_net *tn = nf_tcp_pernet(net);
 	struct nf_proto_net *pn = &tn->pn;
 
 	if (!pn->users) {
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index a7aa70370913c..c879d8d78cfde 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -32,14 +32,9 @@ static const unsigned int udp_timeouts[UDP_CT_MAX] = {
 	[UDP_CT_REPLIED]	= 180*HZ,
 };
 
-static inline struct nf_udp_net *udp_pernet(struct net *net)
-{
-	return &net->ct.nf_ct_proto.udp;
-}
-
 static unsigned int *udp_get_timeouts(struct net *net)
 {
-	return udp_pernet(net)->timeouts;
+	return nf_udp_pernet(net)->timeouts;
 }
 
 static void udp_error_log(const struct sk_buff *skb,
@@ -212,7 +207,7 @@ static int udp_timeout_nlattr_to_obj(struct nlattr *tb[],
 				     struct net *net, void *data)
 {
 	unsigned int *timeouts = data;
-	struct nf_udp_net *un = udp_pernet(net);
+	struct nf_udp_net *un = nf_udp_pernet(net);
 
 	if (!timeouts)
 		timeouts = un->timeouts;
@@ -292,7 +287,7 @@ static int udp_kmemdup_sysctl_table(struct nf_proto_net *pn,
 
 static int udp_init_net(struct net *net)
 {
-	struct nf_udp_net *un = udp_pernet(net);
+	struct nf_udp_net *un = nf_udp_pernet(net);
 	struct nf_proto_net *pn = &un->pn;
 
 	if (!pn->users) {
-- 
GitLab


From 8866df9264a34e675b4ee8a151db819b87cce2d3 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 2 Nov 2018 00:14:00 +0100
Subject: [PATCH 0520/1890] netfilter: nfnetlink_cttimeout: pass default
 timeout policy to obj_to_nlattr

Otherwise, we hit a NULL pointer deference since handlers always assume
default timeout policy is passed.

  netlink: 24 bytes leftover after parsing attributes in process `syz-executor2'.
  kasan: CONFIG_KASAN_INLINE enabled
  kasan: GPF could be caused by NULL-ptr deref or user memory access
  general protection fault: 0000 [#1] PREEMPT SMP KASAN
  CPU: 0 PID: 9575 Comm: syz-executor1 Not tainted 4.19.0+ #312
  Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
  RIP: 0010:icmp_timeout_obj_to_nlattr+0x77/0x170 net/netfilter/nf_conntrack_proto_icmp.c:297

Fixes: c779e849608a ("netfilter: conntrack: remove get_timeout() indirection")
Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nfnetlink_cttimeout.c | 47 ++++++++++++++++++++++++-----
 1 file changed, 40 insertions(+), 7 deletions(-)

diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index e7a50af1b3d61..a518eb162344e 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -382,7 +382,8 @@ static int cttimeout_default_set(struct net *net, struct sock *ctnl,
 static int
 cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid,
 			    u32 seq, u32 type, int event, u16 l3num,
-			    const struct nf_conntrack_l4proto *l4proto)
+			    const struct nf_conntrack_l4proto *l4proto,
+			    const unsigned int *timeouts)
 {
 	struct nlmsghdr *nlh;
 	struct nfgenmsg *nfmsg;
@@ -408,7 +409,7 @@ cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid,
 	if (!nest_parms)
 		goto nla_put_failure;
 
-	ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, NULL);
+	ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, timeouts);
 	if (ret < 0)
 		goto nla_put_failure;
 
@@ -430,6 +431,7 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
 				 struct netlink_ext_ack *extack)
 {
 	const struct nf_conntrack_l4proto *l4proto;
+	unsigned int *timeouts = NULL;
 	struct sk_buff *skb2;
 	int ret, err;
 	__u16 l3num;
@@ -442,12 +444,44 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
 	l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]);
 	l4proto = nf_ct_l4proto_find_get(l4num);
 
-	/* This protocol is not supported, skip. */
-	if (l4proto->l4proto != l4num) {
-		err = -EOPNOTSUPP;
+	err = -EOPNOTSUPP;
+	if (l4proto->l4proto != l4num)
 		goto err;
+
+	switch (l4proto->l4proto) {
+	case IPPROTO_ICMP:
+		timeouts = &nf_icmp_pernet(net)->timeout;
+		break;
+	case IPPROTO_TCP:
+		timeouts = nf_tcp_pernet(net)->timeouts;
+		break;
+	case IPPROTO_UDP:
+		timeouts = nf_udp_pernet(net)->timeouts;
+		break;
+	case IPPROTO_DCCP:
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+		timeouts = nf_dccp_pernet(net)->dccp_timeout;
+#endif
+		break;
+	case IPPROTO_ICMPV6:
+		timeouts = &nf_icmpv6_pernet(net)->timeout;
+		break;
+	case IPPROTO_SCTP:
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+		timeouts = nf_sctp_pernet(net)->timeouts;
+#endif
+		break;
+	case 255:
+		timeouts = &nf_generic_pernet(net)->timeout;
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		break;
 	}
 
+	if (!timeouts)
+		goto err;
+
 	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 	if (skb2 == NULL) {
 		err = -ENOMEM;
@@ -458,8 +492,7 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
 					  nlh->nlmsg_seq,
 					  NFNL_MSG_TYPE(nlh->nlmsg_type),
 					  IPCTNL_MSG_TIMEOUT_DEFAULT_SET,
-					  l3num,
-					  l4proto);
+					  l3num, l4proto, timeouts);
 	if (ret <= 0) {
 		kfree_skb(skb2);
 		err = -ENOMEM;
-- 
GitLab


From e4844c9c62a0fe47980d6c3d4b7a096a5d755925 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 2 Nov 2018 11:33:37 +0100
Subject: [PATCH 0521/1890] netfilter: nft_compat: ebtables 'nat' table is
 normal chain type

Unlike ip(6)tables, the ebtables nat table has no special properties.
This bug causes 'ebtables -A' to fail when using a target such as
'snat' (ebt_snat target sets ".table = "nat"').  Targets that have
no table restrictions work fine.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_compat.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 768292eac2a46..9d0ede4742240 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -54,9 +54,11 @@ static bool nft_xt_put(struct nft_xt *xt)
 	return false;
 }
 
-static int nft_compat_chain_validate_dependency(const char *tablename,
-						const struct nft_chain *chain)
+static int nft_compat_chain_validate_dependency(const struct nft_ctx *ctx,
+						const char *tablename)
 {
+	enum nft_chain_types type = NFT_CHAIN_T_DEFAULT;
+	const struct nft_chain *chain = ctx->chain;
 	const struct nft_base_chain *basechain;
 
 	if (!tablename ||
@@ -64,9 +66,12 @@ static int nft_compat_chain_validate_dependency(const char *tablename,
 		return 0;
 
 	basechain = nft_base_chain(chain);
-	if (strcmp(tablename, "nat") == 0 &&
-	    basechain->type->type != NFT_CHAIN_T_NAT)
-		return -EINVAL;
+	if (strcmp(tablename, "nat") == 0) {
+		if (ctx->family != NFPROTO_BRIDGE)
+			type = NFT_CHAIN_T_NAT;
+		if (basechain->type->type != type)
+			return -EINVAL;
+	}
 
 	return 0;
 }
@@ -342,8 +347,7 @@ static int nft_target_validate(const struct nft_ctx *ctx,
 		if (target->hooks && !(hook_mask & target->hooks))
 			return -EINVAL;
 
-		ret = nft_compat_chain_validate_dependency(target->table,
-							   ctx->chain);
+		ret = nft_compat_chain_validate_dependency(ctx, target->table);
 		if (ret < 0)
 			return ret;
 	}
@@ -590,8 +594,7 @@ static int nft_match_validate(const struct nft_ctx *ctx,
 		if (match->hooks && !(hook_mask & match->hooks))
 			return -EINVAL;
 
-		ret = nft_compat_chain_validate_dependency(match->table,
-							   ctx->chain);
+		ret = nft_compat_chain_validate_dependency(ctx, match->table);
 		if (ret < 0)
 			return ret;
 	}
-- 
GitLab


From f393808dc64149ccd0e5a8427505ba2974a59854 Mon Sep 17 00:00:00 2001
From: Vasily Khoruzhick <vasilykh@arista.com>
Date: Thu, 25 Oct 2018 12:15:43 -0700
Subject: [PATCH 0522/1890] netfilter: conntrack: fix calculation of next
 bucket number in early_drop

If there's no entry to drop in bucket that corresponds to the hash,
early_drop() should look for it in other buckets. But since it increments
hash instead of bucket number, it actually looks in the same bucket 8
times: hsize is 16k by default (14 bits) and hash is 32-bit value, so
reciprocal_scale(hash, hsize) returns the same value for hash..hash+7 in
most cases.

Fix it by increasing bucket number instead of hash and rename _hash
to bucket to avoid future confusion.

Fixes: 3e86638e9a0b ("netfilter: conntrack: consider ct netns in early_drop logic")
Cc: <stable@vger.kernel.org> # v4.7+
Signed-off-by: Vasily Khoruzhick <vasilykh@arista.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_core.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index ca1168d67fac6..e92e749aff53e 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1073,19 +1073,22 @@ static unsigned int early_drop_list(struct net *net,
 	return drops;
 }
 
-static noinline int early_drop(struct net *net, unsigned int _hash)
+static noinline int early_drop(struct net *net, unsigned int hash)
 {
-	unsigned int i;
+	unsigned int i, bucket;
 
 	for (i = 0; i < NF_CT_EVICTION_RANGE; i++) {
 		struct hlist_nulls_head *ct_hash;
-		unsigned int hash, hsize, drops;
+		unsigned int hsize, drops;
 
 		rcu_read_lock();
 		nf_conntrack_get_ht(&ct_hash, &hsize);
-		hash = reciprocal_scale(_hash++, hsize);
+		if (!i)
+			bucket = reciprocal_scale(hash, hsize);
+		else
+			bucket = (bucket + 1) % hsize;
 
-		drops = early_drop_list(net, &ct_hash[hash]);
+		drops = early_drop_list(net, &ct_hash[bucket]);
 		rcu_read_unlock();
 
 		if (drops) {
-- 
GitLab


From e68599a3c3ad0f3171a7cb4e48aa6f9a69381902 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Fri, 2 Nov 2018 15:47:49 -0700
Subject: [PATCH 0523/1890] mm: handle no memcg case in memcg_kmem_charge()
 properly

Mike Galbraith reported a regression caused by the commit 9b6f7e163cd0
("mm: rework memcg kernel stack accounting") on a system with
"cgroup_disable=memory" boot option: the system panics with the following
stack trace:

  BUG: unable to handle kernel NULL pointer dereference at 00000000000000f8
  PGD 0 P4D 0
  Oops: 0002 [#1] PREEMPT SMP PTI
  CPU: 0 PID: 1 Comm: systemd Not tainted 4.19.0-preempt+ #410
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20180531_142017-buildhw-08.phx2.fed4
  RIP: 0010:page_counter_try_charge+0x22/0xc0
  Code: 41 5d c3 c3 0f 1f 40 00 0f 1f 44 00 00 48 85 ff 0f 84 a7 00 00 00 41 56 48 89 f8 49 89 fe 49
  Call Trace:
   try_charge+0xcb/0x780
   memcg_kmem_charge_memcg+0x28/0x80
   memcg_kmem_charge+0x8b/0x1d0
   copy_process.part.41+0x1ca/0x2070
   _do_fork+0xd7/0x3d0
   do_syscall_64+0x5a/0x180
   entry_SYSCALL_64_after_hwframe+0x49/0xbe

The problem occurs because get_mem_cgroup_from_current() returns the NULL
pointer if memory controller is disabled.  Let's check if this is a case
at the beginning of memcg_kmem_charge() and just return 0 if
mem_cgroup_disabled() returns true.  This is how we handle this case in
many other places in the memory controller code.

Link: http://lkml.kernel.org/r/20181029215123.17830-1-guro@fb.com
Fixes: 9b6f7e163cd0 ("mm: rework memcg kernel stack accounting")
Signed-off-by: Roman Gushchin <guro@fb.com>
Reported-by: Mike Galbraith <efault@gmx.de>
Acked-by: Rik van Riel <riel@surriel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Shakeel Butt <shakeelb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memcontrol.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 54920cbc46bfd..6e1469b80cb7d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2593,7 +2593,7 @@ int memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
 	struct mem_cgroup *memcg;
 	int ret = 0;
 
-	if (memcg_kmem_bypass())
+	if (mem_cgroup_disabled() || memcg_kmem_bypass())
 		return 0;
 
 	memcg = get_mem_cgroup_from_current();
-- 
GitLab


From 94e297c50b529f5d01cfd1dbc808d61e95180ab7 Mon Sep 17 00:00:00 2001
From: Sam Protsenko <semen.protsenko@linaro.org>
Date: Fri, 2 Nov 2018 15:47:53 -0700
Subject: [PATCH 0524/1890] include/linux/notifier.h: SRCU: fix ctags

ctags indexing ("make tags" command) throws this warning:

    ctags: Warning: include/linux/notifier.h:125:
    null expansion of name pattern "\1"

This is the result of DEFINE_PER_CPU() macro expansion.  Fix that by
getting rid of line break.

Similar fix was already done in commit 25528213fe9f ("tags: Fix
DEFINE_PER_CPU expansions"), but this one probably wasn't noticed.

Link: http://lkml.kernel.org/r/20181030202808.28027-1-semen.protsenko@linaro.org
Fixes: 9c80172b902d ("kernel/SRCU: provide a static initializer")
Signed-off-by: Sam Protsenko <semen.protsenko@linaro.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/notifier.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index f35c7bf761430..0096a05395e38 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -122,8 +122,7 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
 
 #ifdef CONFIG_TREE_SRCU
 #define _SRCU_NOTIFIER_HEAD(name, mod)				\
-	static DEFINE_PER_CPU(struct srcu_data,			\
-			name##_head_srcu_data);			\
+	static DEFINE_PER_CPU(struct srcu_data, name##_head_srcu_data); \
 	mod struct srcu_notifier_head name =			\
 			SRCU_NOTIFIER_INIT(name, name##_head_srcu_data)
 
-- 
GitLab


From ac5b2c18911ffe95c08d69273917f90212cf5659 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Fri, 2 Nov 2018 15:47:59 -0700
Subject: [PATCH 0525/1890] mm: thp: relax __GFP_THISNODE for MADV_HUGEPAGE
 mappings

THP allocation might be really disruptive when allocated on NUMA system
with the local node full or hard to reclaim.  Stefan has posted an
allocation stall report on 4.12 based SLES kernel which suggests the
same issue:

  kvm: page allocation stalls for 194572ms, order:9, mode:0x4740ca(__GFP_HIGHMEM|__GFP_IO|__GFP_FS|__GFP_COMP|__GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_THISNODE|__GFP_MOVABLE|__GFP_DIRECT_RECLAIM), nodemask=(null)
  kvm cpuset=/ mems_allowed=0-1
  CPU: 10 PID: 84752 Comm: kvm Tainted: G        W 4.12.0+98-ph <a href="/view.php?id=1" title="[geschlossen] Integration Ramdisk" class="resolved">0000001</a> SLE15 (unreleased)
  Hardware name: Supermicro SYS-1029P-WTRT/X11DDW-NT, BIOS 2.0 12/05/2017
  Call Trace:
   dump_stack+0x5c/0x84
   warn_alloc+0xe0/0x180
   __alloc_pages_slowpath+0x820/0xc90
   __alloc_pages_nodemask+0x1cc/0x210
   alloc_pages_vma+0x1e5/0x280
   do_huge_pmd_wp_page+0x83f/0xf00
   __handle_mm_fault+0x93d/0x1060
   handle_mm_fault+0xc6/0x1b0
   __do_page_fault+0x230/0x430
   do_page_fault+0x2a/0x70
   page_fault+0x7b/0x80
   [...]
  Mem-Info:
  active_anon:126315487 inactive_anon:1612476 isolated_anon:5
   active_file:60183 inactive_file:245285 isolated_file:0
   unevictable:15657 dirty:286 writeback:1 unstable:0
   slab_reclaimable:75543 slab_unreclaimable:2509111
   mapped:81814 shmem:31764 pagetables:370616 bounce:0
   free:32294031 free_pcp:6233 free_cma:0
  Node 0 active_anon:254680388kB inactive_anon:1112760kB active_file:240648kB inactive_file:981168kB unevictable:13368kB isolated(anon):0kB isolated(file):0kB mapped:280240kB dirty:1144kB writeback:0kB shmem:95832kB shmem_thp: 0kB shmem_pmdmapped: 0kB anon_thp: 81225728kB writeback_tmp:0kB unstable:0kB all_unreclaimable? no
  Node 1 active_anon:250583072kB inactive_anon:5337144kB active_file:84kB inactive_file:0kB unevictable:49260kB isolated(anon):20kB isolated(file):0kB mapped:47016kB dirty:0kB writeback:4kB shmem:31224kB shmem_thp: 0kB shmem_pmdmapped: 0kB anon_thp: 31897600kB writeback_tmp:0kB unstable:0kB all_unreclaimable? no

The defrag mode is "madvise" and from the above report it is clear that
the THP has been allocated for MADV_HUGEPAGA vma.

Andrea has identified that the main source of the problem is
__GFP_THISNODE usage:

: The problem is that direct compaction combined with the NUMA
: __GFP_THISNODE logic in mempolicy.c is telling reclaim to swap very
: hard the local node, instead of failing the allocation if there's no
: THP available in the local node.
:
: Such logic was ok until __GFP_THISNODE was added to the THP allocation
: path even with MPOL_DEFAULT.
:
: The idea behind the __GFP_THISNODE addition, is that it is better to
: provide local memory in PAGE_SIZE units than to use remote NUMA THP
: backed memory. That largely depends on the remote latency though, on
: threadrippers for example the overhead is relatively low in my
: experience.
:
: The combination of __GFP_THISNODE and __GFP_DIRECT_RECLAIM results in
: extremely slow qemu startup with vfio, if the VM is larger than the
: size of one host NUMA node. This is because it will try very hard to
: unsuccessfully swapout get_user_pages pinned pages as result of the
: __GFP_THISNODE being set, instead of falling back to PAGE_SIZE
: allocations and instead of trying to allocate THP on other nodes (it
: would be even worse without vfio type1 GUP pins of course, except it'd
: be swapping heavily instead).

Fix this by removing __GFP_THISNODE for THP requests which are
requesting the direct reclaim.  This effectivelly reverts 5265047ac301
on the grounds that the zone/node reclaim was known to be disruptive due
to premature reclaim when there was memory free.  While it made sense at
the time for HPC workloads without NUMA awareness on rare machines, it
was ultimately harmful in the majority of cases.  The existing behaviour
is similar, if not as widespare as it applies to a corner case but
crucially, it cannot be tuned around like zone_reclaim_mode can.  The
default behaviour should always be to cause the least harm for the
common case.

If there are specialised use cases out there that want zone_reclaim_mode
in specific cases, then it can be built on top.  Longterm we should
consider a memory policy which allows for the node reclaim like behavior
for the specific memory ranges which would allow a

[1] http://lkml.kernel.org/r/20180820032204.9591-1-aarcange@redhat.com

Mel said:

: Both patches look correct to me but I'm responding to this one because
: it's the fix.  The change makes sense and moves further away from the
: severe stalling behaviour we used to see with both THP and zone reclaim
: mode.
:
: I put together a basic experiment with usemem configured to reference a
: buffer multiple times that is 80% the size of main memory on a 2-socket
: box with symmetric node sizes and defrag set to "always".  The defrag
: setting is not the default but it would be functionally similar to
: accessing a buffer with madvise(MADV_HUGEPAGE).  Usemem is configured to
: reference the buffer multiple times and while it's not an interesting
: workload, it would be expected to complete reasonably quickly as it fits
: within memory.  The results were;
:
: usemem
:                                   vanilla           noreclaim-v1
: Amean     Elapsd-1       42.78 (   0.00%)       26.87 (  37.18%)
: Amean     Elapsd-3       27.55 (   0.00%)        7.44 (  73.00%)
: Amean     Elapsd-4        5.72 (   0.00%)        5.69 (   0.45%)
:
: This shows the elapsed time in seconds for 1 thread, 3 threads and 4
: threads referencing buffers 80% the size of memory.  With the patches
: applied, it's 37.18% faster for the single thread and 73% faster with two
: threads.  Note that 4 threads showing little difference does not indicate
: the problem is related to thread counts.  It's simply the case that 4
: threads gets spread so their workload mostly fits in one node.
:
: The overall view from /proc/vmstats is more startling
:
:                          4.19.0-rc1  4.19.0-rc1
:                             vanillanoreclaim-v1r1
: Minor Faults               35593425      708164
: Major Faults                 484088          36
: Swap Ins                    3772837           0
: Swap Outs                   3932295           0
:
: Massive amounts of swap in/out without the patch
:
: Direct pages scanned        6013214           0
: Kswapd pages scanned              0           0
: Kswapd pages reclaimed            0           0
: Direct pages reclaimed      4033009           0
:
: Lots of reclaim activity without the patch
:
: Kswapd efficiency              100%        100%
: Kswapd velocity               0.000       0.000
: Direct efficiency               67%        100%
: Direct velocity           11191.956       0.000
:
: Mostly from direct reclaim context as you'd expect without the patch.
:
: Page writes by reclaim  3932314.000       0.000
: Page writes file                 19           0
: Page writes anon            3932295           0
: Page reclaim immediate        42336           0
:
: Writes from reclaim context is never good but the patch eliminates it.
:
: We should never have default behaviour to thrash the system for such a
: basic workload.  If zone reclaim mode behaviour is ever desired but on a
: single task instead of a global basis then the sensible option is to build
: a mempolicy that enforces that behaviour.

This was a severe regression compared to previous kernels that made
important workloads unusable and it starts when __GFP_THISNODE was
added to THP allocations under MADV_HUGEPAGE.  It is not a significant
risk to go to the previous behavior before __GFP_THISNODE was added, it
worked like that for years.

This was simply an optimization to some lucky workloads that can fit in
a single node, but it ended up breaking the VM for others that can't
possibly fit in a single node, so going back is safe.

[mhocko@suse.com: rewrote the changelog based on the one from Andrea]
Link: http://lkml.kernel.org/r/20180925120326.24392-2-mhocko@kernel.org
Fixes: 5265047ac301 ("mm, thp: really limit transparent hugepage allocation to local node")
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Michal Hocko <mhocko@suse.com>
Reported-by: Stefan Priebe <s.priebe@profihost.ag>
Debugged-by: Andrea Arcangeli <aarcange@redhat.com>
Reported-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: Mel Gorman <mgorman@techsingularity.net>
Tested-by: Mel Gorman <mgorman@techsingularity.net>
Cc: Zi Yan <zi.yan@cs.rutgers.edu>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: David Rientjes <rientjes@google.com>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: <stable@vger.kernel.org>	[4.1+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/mempolicy.c | 32 ++++++++++++++++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)

diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index cfd26d7e61a17..58fb833fce0ce 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2060,8 +2060,36 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
 		nmask = policy_nodemask(gfp, pol);
 		if (!nmask || node_isset(hpage_node, *nmask)) {
 			mpol_cond_put(pol);
-			page = __alloc_pages_node(hpage_node,
-						gfp | __GFP_THISNODE, order);
+			/*
+			 * We cannot invoke reclaim if __GFP_THISNODE
+			 * is set. Invoking reclaim with
+			 * __GFP_THISNODE set, would cause THP
+			 * allocations to trigger heavy swapping
+			 * despite there may be tons of free memory
+			 * (including potentially plenty of THP
+			 * already available in the buddy) on all the
+			 * other NUMA nodes.
+			 *
+			 * At most we could invoke compaction when
+			 * __GFP_THISNODE is set (but we would need to
+			 * refrain from invoking reclaim even if
+			 * compaction returned COMPACT_SKIPPED because
+			 * there wasn't not enough memory to succeed
+			 * compaction). For now just avoid
+			 * __GFP_THISNODE instead of limiting the
+			 * allocation path to a strict and single
+			 * compaction invocation.
+			 *
+			 * Supposedly if direct reclaim was enabled by
+			 * the caller, the app prefers THP regardless
+			 * of the node it comes from so this would be
+			 * more desiderable behavior than only
+			 * providing THP originated from the local
+			 * node in such case.
+			 */
+			if (!(gfp & __GFP_DIRECT_RECLAIM))
+				gfp |= __GFP_THISNODE;
+			page = __alloc_pages_node(hpage_node, gfp, order);
 			goto out;
 		}
 	}
-- 
GitLab


From a634644751c46238df58bbfe992e30c1668388db Mon Sep 17 00:00:00 2001
From: Gang He <ghe@suse.com>
Date: Fri, 2 Nov 2018 15:48:03 -0700
Subject: [PATCH 0526/1890] ocfs2: remove ocfs2_is_o2cb_active()

Remove ocfs2_is_o2cb_active().  We have similar functions to identify
which cluster stack is being used via osb->osb_cluster_stack.

Secondly, the current implementation of ocfs2_is_o2cb_active() is not
totally safe.  Based on the design of stackglue, we need to get
ocfs2_stack_lock before using ocfs2_stack related data structures, and
that active_stack pointer can be NULL in the case of mount failure.

Link: http://lkml.kernel.org/r/1495441079-11708-1-git-send-email-ghe@suse.com
Signed-off-by: Gang He <ghe@suse.com>
Reviewed-by: Joseph Qi <jiangqi903@gmail.com>
Reviewed-by: Eric Ren <zren@suse.com>
Acked-by: Changwei Ge <ge.changwei@h3c.com>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ocfs2/dlmglue.c   | 2 +-
 fs/ocfs2/stackglue.c | 6 ------
 fs/ocfs2/stackglue.h | 3 ---
 3 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 933aac5da1934..178cb9e6772ac 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -3603,7 +3603,7 @@ static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
 	 * we can recover correctly from node failure. Otherwise, we may get
 	 * invalid LVB in LKB, but without DLM_SBF_VALNOTVALID being set.
 	 */
-	if (!ocfs2_is_o2cb_active() &&
+	if (ocfs2_userspace_stack(osb) &&
 	    lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
 		lvb = 1;
 
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index d6c350ba25b96..c4b029c43464e 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -48,12 +48,6 @@ static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl";
  */
 static struct ocfs2_stack_plugin *active_stack;
 
-inline int ocfs2_is_o2cb_active(void)
-{
-	return !strcmp(active_stack->sp_name, OCFS2_STACK_PLUGIN_O2CB);
-}
-EXPORT_SYMBOL_GPL(ocfs2_is_o2cb_active);
-
 static struct ocfs2_stack_plugin *ocfs2_stack_lookup(const char *name)
 {
 	struct ocfs2_stack_plugin *p;
diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h
index e3036e1790e86..f2dce10fae543 100644
--- a/fs/ocfs2/stackglue.h
+++ b/fs/ocfs2/stackglue.h
@@ -298,9 +298,6 @@ void ocfs2_stack_glue_set_max_proto_version(struct ocfs2_protocol_version *max_p
 int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin);
 void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin);
 
-/* In ocfs2_downconvert_lock(), we need to know which stack we are using */
-int ocfs2_is_o2cb_active(void);
-
 extern struct kset *ocfs2_kset;
 
 #endif  /* STACKGLUE_H */
-- 
GitLab


From 21158ca85b73ddd0088076a5209cfd040513a8b5 Mon Sep 17 00:00:00 2001
From: Guozhonghua <guozhonghua@h3c.com>
Date: Fri, 2 Nov 2018 15:48:07 -0700
Subject: [PATCH 0527/1890] ocfs2: without quota support, avoid calling quota
 recovery

During one dead node's recovery by other node, quota recovery work will
be queued.  We should avoid calling quota when it is not supported, so
check the quota flags.

Link: http://lkml.kernel.org/r/71604351584F6A4EBAE558C676F37CA401071AC9FB@H3CMLB12-EX.srv.huawei-3com.com
Signed-off-by: guozhonghua <guozhonghua@h3c.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Joseph Qi <jiangqi903@gmail.com>
Cc: Changwei Ge <ge.changwei@h3c.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ocfs2/journal.c | 51 ++++++++++++++++++++++++++++++----------------
 1 file changed, 34 insertions(+), 17 deletions(-)

diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index bd3475694e83a..b63c97f4318e0 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1378,15 +1378,23 @@ static int __ocfs2_recovery_thread(void *arg)
 	int rm_quota_used = 0, i;
 	struct ocfs2_quota_recovery *qrec;
 
+	/* Whether the quota supported. */
+	int quota_enabled = OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb,
+			OCFS2_FEATURE_RO_COMPAT_USRQUOTA)
+		|| OCFS2_HAS_RO_COMPAT_FEATURE(osb->sb,
+			OCFS2_FEATURE_RO_COMPAT_GRPQUOTA);
+
 	status = ocfs2_wait_on_mount(osb);
 	if (status < 0) {
 		goto bail;
 	}
 
-	rm_quota = kcalloc(osb->max_slots, sizeof(int), GFP_NOFS);
-	if (!rm_quota) {
-		status = -ENOMEM;
-		goto bail;
+	if (quota_enabled) {
+		rm_quota = kcalloc(osb->max_slots, sizeof(int), GFP_NOFS);
+		if (!rm_quota) {
+			status = -ENOMEM;
+			goto bail;
+		}
 	}
 restart:
 	status = ocfs2_super_lock(osb, 1);
@@ -1422,9 +1430,14 @@ static int __ocfs2_recovery_thread(void *arg)
 		 * then quota usage would be out of sync until some node takes
 		 * the slot. So we remember which nodes need quota recovery
 		 * and when everything else is done, we recover quotas. */
-		for (i = 0; i < rm_quota_used && rm_quota[i] != slot_num; i++);
-		if (i == rm_quota_used)
-			rm_quota[rm_quota_used++] = slot_num;
+		if (quota_enabled) {
+			for (i = 0; i < rm_quota_used
+					&& rm_quota[i] != slot_num; i++)
+				;
+
+			if (i == rm_quota_used)
+				rm_quota[rm_quota_used++] = slot_num;
+		}
 
 		status = ocfs2_recover_node(osb, node_num, slot_num);
 skip_recovery:
@@ -1452,16 +1465,19 @@ static int __ocfs2_recovery_thread(void *arg)
 	/* Now it is right time to recover quotas... We have to do this under
 	 * superblock lock so that no one can start using the slot (and crash)
 	 * before we recover it */
-	for (i = 0; i < rm_quota_used; i++) {
-		qrec = ocfs2_begin_quota_recovery(osb, rm_quota[i]);
-		if (IS_ERR(qrec)) {
-			status = PTR_ERR(qrec);
-			mlog_errno(status);
-			continue;
+	if (quota_enabled) {
+		for (i = 0; i < rm_quota_used; i++) {
+			qrec = ocfs2_begin_quota_recovery(osb, rm_quota[i]);
+			if (IS_ERR(qrec)) {
+				status = PTR_ERR(qrec);
+				mlog_errno(status);
+				continue;
+			}
+			ocfs2_queue_recovery_completion(osb->journal,
+					rm_quota[i],
+					NULL, NULL, qrec,
+					ORPHAN_NEED_TRUNCATE);
 		}
-		ocfs2_queue_recovery_completion(osb->journal, rm_quota[i],
-						NULL, NULL, qrec,
-						ORPHAN_NEED_TRUNCATE);
 	}
 
 	ocfs2_super_unlock(osb, 1);
@@ -1483,7 +1499,8 @@ static int __ocfs2_recovery_thread(void *arg)
 
 	mutex_unlock(&osb->recovery_lock);
 
-	kfree(rm_quota);
+	if (quota_enabled)
+		kfree(rm_quota);
 
 	/* no one is callint kthread_stop() for us so the kthread() api
 	 * requires that we call do_exit().  And it isn't exported, but
-- 
GitLab


From 9e985787750db8aae87f02b67e908f28ac4d6b83 Mon Sep 17 00:00:00 2001
From: Changwei Ge <ge.changwei@h3c.com>
Date: Fri, 2 Nov 2018 15:48:11 -0700
Subject: [PATCH 0528/1890] ocfs2: don't use iocb when EIOCBQUEUED returns

When -EIOCBQUEUED returns, it means that aio_complete() will be called
from dio_complete(), which is an asynchronous progress against
write_iter.  Generally, IO is a very slow progress than executing
instruction, but we still can't take the risk to access a freed iocb.

And we do face a BUG crash issue.  Using the crash tool, iocb is
obviously freed already.

  crash> struct -x kiocb ffff881a350f5900
  struct kiocb {
    ki_filp = 0xffff881a350f5a80,
    ki_pos = 0x0,
    ki_complete = 0x0,
    private = 0x0,
    ki_flags = 0x0
  }

And the backtrace shows:
  ocfs2_file_write_iter+0xcaa/0xd00 [ocfs2]
  aio_run_iocb+0x229/0x2f0
  do_io_submit+0x291/0x540
  SyS_io_submit+0x10/0x20
  system_call_fastpath+0x16/0x75

Link: http://lkml.kernel.org/r/1523361653-14439-1-git-send-email-ge.changwei@h3c.com
Signed-off-by: Changwei Ge <ge.changwei@h3c.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Joseph Qi <jiangqi903@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ocfs2/file.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index fe570824b9913..d640c5f8a85da 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2343,7 +2343,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
 
 	written = __generic_file_write_iter(iocb, from);
 	/* buffered aio wouldn't have proper lock coverage today */
-	BUG_ON(written == -EIOCBQUEUED && !(iocb->ki_flags & IOCB_DIRECT));
+	BUG_ON(written == -EIOCBQUEUED && !direct_io);
 
 	/*
 	 * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io
@@ -2463,7 +2463,7 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
 	trace_generic_file_read_iter_ret(ret);
 
 	/* buffered aio wouldn't have proper lock coverage today */
-	BUG_ON(ret == -EIOCBQUEUED && !(iocb->ki_flags & IOCB_DIRECT));
+	BUG_ON(ret == -EIOCBQUEUED && !direct_io);
 
 	/* see ocfs2_file_write_iter */
 	if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) {
-- 
GitLab


From 29aa30167a0a2e6045a0d6d2e89d8168132333d5 Mon Sep 17 00:00:00 2001
From: Changwei Ge <ge.changwei@h3c.com>
Date: Fri, 2 Nov 2018 15:48:15 -0700
Subject: [PATCH 0529/1890] ocfs2: fix a misuse a of brelse after failing
 ocfs2_check_dir_entry

Somehow, file system metadata was corrupted, which causes
ocfs2_check_dir_entry() to fail in function ocfs2_dir_foreach_blk_el().

According to the original design intention, if above happens we should
skip the problematic block and continue to retrieve dir entry.  But
there is obviouse misuse of brelse around related code.

After failure of ocfs2_check_dir_entry(), current code just moves to
next position and uses the problematic buffer head again and again
during which the problematic buffer head is released for multiple times.
I suppose, this a serious issue which is long-lived in ocfs2.  This may
cause other file systems which is also used in a the same host insane.

So we should also consider about bakcporting this patch into linux
-stable.

Link: http://lkml.kernel.org/r/HK2PR06MB045211675B43EED794E597B6D56E0@HK2PR06MB0452.apcprd06.prod.outlook.com
Signed-off-by: Changwei Ge <ge.changwei@h3c.com>
Suggested-by: Changkuo Shi <shi.changkuo@h3c.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Joseph Qi <jiangqi903@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ocfs2/dir.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index b048d4fa39590..c121abbdfc7db 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -1897,8 +1897,7 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode,
 				/* On error, skip the f_pos to the
 				   next block. */
 				ctx->pos = (ctx->pos | (sb->s_blocksize - 1)) + 1;
-				brelse(bh);
-				continue;
+				break;
 			}
 			if (le64_to_cpu(de->inode)) {
 				unsigned char d_type = DT_UNKNOWN;
-- 
GitLab


From cf76c78595ca87548ca5e45c862ac9e0949c4687 Mon Sep 17 00:00:00 2001
From: Changwei Ge <ge.changwei@h3c.com>
Date: Fri, 2 Nov 2018 15:48:19 -0700
Subject: [PATCH 0530/1890] ocfs2: don't put and assigning null to bh allocated
 outside

ocfs2_read_blocks() and ocfs2_read_blocks_sync() are both used to read
several blocks from disk.  Currently, the input argument *bhs* can be
NULL or NOT.  It depends on the caller's behavior.  If the function
fails in reading blocks from disk, the corresponding bh will be assigned
to NULL and put.

Obviously, above process for non-NULL input bh is not appropriate.
Because the caller doesn't even know its bhs are put and re-assigned.

If buffer head is managed by caller, ocfs2_read_blocks and
ocfs2_read_blocks_sync() should not evaluate it to NULL.  It will cause
caller accessing illegal memory, thus crash.

Link: http://lkml.kernel.org/r/HK2PR06MB045285E0F4FBB561F9F2F9B3D5680@HK2PR06MB0452.apcprd06.prod.outlook.com
Signed-off-by: Changwei Ge <ge.changwei@h3c.com>
Reviewed-by: Guozhonghua <guozhonghua@h3c.com>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Joseph Qi <jiangqi903@gmail.com>
Cc: Changwei Ge <ge.changwei@h3c.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ocfs2/buffer_head_io.c | 77 ++++++++++++++++++++++++++++++---------
 1 file changed, 59 insertions(+), 18 deletions(-)

diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index 1d098c3c00e02..4ebbd57cbf846 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -99,25 +99,34 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
 	return ret;
 }
 
+/* Caller must provide a bhs[] with all NULL or non-NULL entries, so it
+ * will be easier to handle read failure.
+ */
 int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
 			   unsigned int nr, struct buffer_head *bhs[])
 {
 	int status = 0;
 	unsigned int i;
 	struct buffer_head *bh;
+	int new_bh = 0;
 
 	trace_ocfs2_read_blocks_sync((unsigned long long)block, nr);
 
 	if (!nr)
 		goto bail;
 
+	/* Don't put buffer head and re-assign it to NULL if it is allocated
+	 * outside since the caller can't be aware of this alternation!
+	 */
+	new_bh = (bhs[0] == NULL);
+
 	for (i = 0 ; i < nr ; i++) {
 		if (bhs[i] == NULL) {
 			bhs[i] = sb_getblk(osb->sb, block++);
 			if (bhs[i] == NULL) {
 				status = -ENOMEM;
 				mlog_errno(status);
-				goto bail;
+				break;
 			}
 		}
 		bh = bhs[i];
@@ -158,9 +167,26 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
 		submit_bh(REQ_OP_READ, 0, bh);
 	}
 
+read_failure:
 	for (i = nr; i > 0; i--) {
 		bh = bhs[i - 1];
 
+		if (unlikely(status)) {
+			if (new_bh && bh) {
+				/* If middle bh fails, let previous bh
+				 * finish its read and then put it to
+				 * aovoid bh leak
+				 */
+				if (!buffer_jbd(bh))
+					wait_on_buffer(bh);
+				put_bh(bh);
+				bhs[i - 1] = NULL;
+			} else if (bh && buffer_uptodate(bh)) {
+				clear_buffer_uptodate(bh);
+			}
+			continue;
+		}
+
 		/* No need to wait on the buffer if it's managed by JBD. */
 		if (!buffer_jbd(bh))
 			wait_on_buffer(bh);
@@ -170,8 +196,7 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
 			 * so we can safely record this and loop back
 			 * to cleanup the other buffers. */
 			status = -EIO;
-			put_bh(bh);
-			bhs[i - 1] = NULL;
+			goto read_failure;
 		}
 	}
 
@@ -179,6 +204,9 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
 	return status;
 }
 
+/* Caller must provide a bhs[] with all NULL or non-NULL entries, so it
+ * will be easier to handle read failure.
+ */
 int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
 		      struct buffer_head *bhs[], int flags,
 		      int (*validate)(struct super_block *sb,
@@ -188,6 +216,7 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
 	int i, ignore_cache = 0;
 	struct buffer_head *bh;
 	struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
+	int new_bh = 0;
 
 	trace_ocfs2_read_blocks_begin(ci, (unsigned long long)block, nr, flags);
 
@@ -213,6 +242,11 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
 		goto bail;
 	}
 
+	/* Don't put buffer head and re-assign it to NULL if it is allocated
+	 * outside since the caller can't be aware of this alternation!
+	 */
+	new_bh = (bhs[0] == NULL);
+
 	ocfs2_metadata_cache_io_lock(ci);
 	for (i = 0 ; i < nr ; i++) {
 		if (bhs[i] == NULL) {
@@ -221,7 +255,8 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
 				ocfs2_metadata_cache_io_unlock(ci);
 				status = -ENOMEM;
 				mlog_errno(status);
-				goto bail;
+				/* Don't forget to put previous bh! */
+				break;
 			}
 		}
 		bh = bhs[i];
@@ -316,16 +351,27 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
 		}
 	}
 
-	status = 0;
-
+read_failure:
 	for (i = (nr - 1); i >= 0; i--) {
 		bh = bhs[i];
 
 		if (!(flags & OCFS2_BH_READAHEAD)) {
-			if (status) {
-				/* Clear the rest of the buffers on error */
-				put_bh(bh);
-				bhs[i] = NULL;
+			if (unlikely(status)) {
+				/* Clear the buffers on error including those
+				 * ever succeeded in reading
+				 */
+				if (new_bh && bh) {
+					/* If middle bh fails, let previous bh
+					 * finish its read and then put it to
+					 * aovoid bh leak
+					 */
+					if (!buffer_jbd(bh))
+						wait_on_buffer(bh);
+					put_bh(bh);
+					bhs[i] = NULL;
+				} else if (bh && buffer_uptodate(bh)) {
+					clear_buffer_uptodate(bh);
+				}
 				continue;
 			}
 			/* We know this can't have changed as we hold the
@@ -343,9 +389,7 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
 				 * uptodate. */
 				status = -EIO;
 				clear_buffer_needs_validate(bh);
-				put_bh(bh);
-				bhs[i] = NULL;
-				continue;
+				goto read_failure;
 			}
 
 			if (buffer_needs_validate(bh)) {
@@ -355,11 +399,8 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
 				BUG_ON(buffer_jbd(bh));
 				clear_buffer_needs_validate(bh);
 				status = validate(sb, bh);
-				if (status) {
-					put_bh(bh);
-					bhs[i] = NULL;
-					continue;
-				}
+				if (status)
+					goto read_failure;
 			}
 		}
 
-- 
GitLab


From 3a3d1e51042895c58d4797831921c940b28d8c4b Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 2 Nov 2018 15:48:23 -0700
Subject: [PATCH 0531/1890] ocfs2: dlmglue: clean up timestamp handling

The handling of timestamps outside of the 1970..2038 range in the dlm
glue is rather inconsistent: on 32-bit architectures, this has always
wrapped around to negative timestamps in the 1902..1969 range, while on
64-bit kernels all timestamps are interpreted as positive 34 bit numbers
in the 1970..2514 year range.

Now that the VFS code handles 64-bit timestamps on all architectures, we
can make the behavior more consistent here, and return the same result
that we had on 64-bit already, making the file system y2038 safe in the
process.  Outside of dlmglue, it already uses 64-bit on-disk timestamps
anway, so that part is fine.

For consistency, I'm changing ocfs2_pack_timespec() to clamp anything
outside of the supported range to the minimum and maximum values.  This
avoids a possible ambiguity of values before 1970 in particular, which
used to be interpreted as times at the end of the 2514 range previously.

Link: http://lkml.kernel.org/r/20180619155826.4106487-1-arnd@arndb.de
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Joseph Qi <jiangqi903@gmail.com>
Cc: Changwei Ge <ge.changwei@h3c.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ocfs2/dlmglue.c | 26 +++++++++-----------------
 1 file changed, 9 insertions(+), 17 deletions(-)

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 178cb9e6772ac..7c835824247eb 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2123,10 +2123,10 @@ static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
 
 /* LVB only has room for 64 bits of time here so we pack it for
  * now. */
-static u64 ocfs2_pack_timespec(struct timespec *spec)
+static u64 ocfs2_pack_timespec(struct timespec64 *spec)
 {
 	u64 res;
-	u64 sec = spec->tv_sec;
+	u64 sec = clamp_t(time64_t, spec->tv_sec, 0, 0x3ffffffffull);
 	u32 nsec = spec->tv_nsec;
 
 	res = (sec << OCFS2_SEC_SHIFT) | (nsec & OCFS2_NSEC_MASK);
@@ -2142,7 +2142,6 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
 	struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
 	struct ocfs2_meta_lvb *lvb;
-	struct timespec ts;
 
 	lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
 
@@ -2163,15 +2162,12 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
 	lvb->lvb_igid      = cpu_to_be32(i_gid_read(inode));
 	lvb->lvb_imode     = cpu_to_be16(inode->i_mode);
 	lvb->lvb_inlink    = cpu_to_be16(inode->i_nlink);
-	ts = timespec64_to_timespec(inode->i_atime);
 	lvb->lvb_iatime_packed  =
-		cpu_to_be64(ocfs2_pack_timespec(&ts));
-	ts = timespec64_to_timespec(inode->i_ctime);
+		cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime));
 	lvb->lvb_ictime_packed =
-		cpu_to_be64(ocfs2_pack_timespec(&ts));
-	ts = timespec64_to_timespec(inode->i_mtime);
+		cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime));
 	lvb->lvb_imtime_packed =
-		cpu_to_be64(ocfs2_pack_timespec(&ts));
+		cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime));
 	lvb->lvb_iattr    = cpu_to_be32(oi->ip_attr);
 	lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features);
 	lvb->lvb_igeneration = cpu_to_be32(inode->i_generation);
@@ -2180,7 +2176,7 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
 	mlog_meta_lvb(0, lockres);
 }
 
-static void ocfs2_unpack_timespec(struct timespec *spec,
+static void ocfs2_unpack_timespec(struct timespec64 *spec,
 				  u64 packed_time)
 {
 	spec->tv_sec = packed_time >> OCFS2_SEC_SHIFT;
@@ -2189,7 +2185,6 @@ static void ocfs2_unpack_timespec(struct timespec *spec,
 
 static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
 {
-	struct timespec ts;
 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
 	struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
 	struct ocfs2_meta_lvb *lvb;
@@ -2217,15 +2212,12 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
 	i_gid_write(inode, be32_to_cpu(lvb->lvb_igid));
 	inode->i_mode    = be16_to_cpu(lvb->lvb_imode);
 	set_nlink(inode, be16_to_cpu(lvb->lvb_inlink));
-	ocfs2_unpack_timespec(&ts,
+	ocfs2_unpack_timespec(&inode->i_atime,
 			      be64_to_cpu(lvb->lvb_iatime_packed));
-	inode->i_atime = timespec_to_timespec64(ts);
-	ocfs2_unpack_timespec(&ts,
+	ocfs2_unpack_timespec(&inode->i_mtime,
 			      be64_to_cpu(lvb->lvb_imtime_packed));
-	inode->i_mtime = timespec_to_timespec64(ts);
-	ocfs2_unpack_timespec(&ts,
+	ocfs2_unpack_timespec(&inode->i_ctime,
 			      be64_to_cpu(lvb->lvb_ictime_packed));
-	inode->i_ctime = timespec_to_timespec64(ts);
 	spin_unlock(&oi->ip_lock);
 }
 
-- 
GitLab


From 6194ae4242dec0c9d604bc05df83aa9260a899e4 Mon Sep 17 00:00:00 2001
From: Larry Chen <lchen@suse.com>
Date: Fri, 2 Nov 2018 15:48:27 -0700
Subject: [PATCH 0532/1890] ocfs2: fix clusters leak in ocfs2_defrag_extent()

ocfs2_defrag_extent() might leak allocated clusters.  When the file
system has insufficient space, the number of claimed clusters might be
less than the caller wants.  If that happens, the original code might
directly commit the transaction without returning clusters.

This patch is based on code in ocfs2_add_clusters_in_btree().

[akpm@linux-foundation.org: include localalloc.h, reduce scope of data_ac]
Link: http://lkml.kernel.org/r/20180904041621.16874-3-lchen@suse.com
Signed-off-by: Larry Chen <lchen@suse.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Joseph Qi <jiangqi903@gmail.com>
Cc: Changwei Ge <ge.changwei@h3c.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ocfs2/move_extents.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 7eb3b0a6347ef..3f1685d7d43bf 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -25,6 +25,7 @@
 #include "ocfs2_ioctl.h"
 
 #include "alloc.h"
+#include "localalloc.h"
 #include "aops.h"
 #include "dlmglue.h"
 #include "extent_map.h"
@@ -233,6 +234,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
 	struct ocfs2_refcount_tree *ref_tree = NULL;
 	u32 new_phys_cpos, new_len;
 	u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
+	int need_free = 0;
 
 	if ((ext_flags & OCFS2_EXT_REFCOUNTED) && *len) {
 		BUG_ON(!ocfs2_is_refcount_inode(inode));
@@ -308,6 +310,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
 		if (!partial) {
 			context->range->me_flags &= ~OCFS2_MOVE_EXT_FL_COMPLETE;
 			ret = -ENOSPC;
+			need_free = 1;
 			goto out_commit;
 		}
 	}
@@ -332,6 +335,20 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
 		mlog_errno(ret);
 
 out_commit:
+	if (need_free && context->data_ac) {
+		struct ocfs2_alloc_context *data_ac = context->data_ac;
+
+		if (context->data_ac->ac_which == OCFS2_AC_USE_LOCAL)
+			ocfs2_free_local_alloc_bits(osb, handle, data_ac,
+					new_phys_cpos, new_len);
+		else
+			ocfs2_free_clusters(handle,
+					data_ac->ac_inode,
+					data_ac->ac_bh,
+					ocfs2_clusters_to_blocks(osb->sb, new_phys_cpos),
+					new_len);
+	}
+
 	ocfs2_commit_trans(osb, handle);
 
 out_unlock_mutex:
-- 
GitLab


From 89c83fb539f95491be80cdd5158e6f0ce329e317 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 2 Nov 2018 15:48:31 -0700
Subject: [PATCH 0533/1890] mm, thp: consolidate THP gfp handling into
 alloc_hugepage_direct_gfpmask

THP allocation mode is quite complex and it depends on the defrag mode.
This complexity is hidden in alloc_hugepage_direct_gfpmask from a large
part currently. The NUMA special casing (namely __GFP_THISNODE) is
however independent and placed in alloc_pages_vma currently. This both
adds an unnecessary branch to all vma based page allocation requests and
it makes the code more complex unnecessarily as well. Not to mention
that e.g. shmem THP used to do the node reclaiming unconditionally
regardless of the defrag mode until recently. This was not only
unexpected behavior but it was also hardly a good default behavior and I
strongly suspect it was just a side effect of the code sharing more than
a deliberate decision which suggests that such a layering is wrong.

Get rid of the thp special casing from alloc_pages_vma and move the
logic to alloc_hugepage_direct_gfpmask. __GFP_THISNODE is applied to the
resulting gfp mask only when the direct reclaim is not requested and
when there is no explicit numa binding to preserve the current logic.

Please note that there's also a slight difference wrt MPOL_BIND now. The
previous code would avoid using __GFP_THISNODE if the local node was
outside of policy_nodemask(). After this patch __GFP_THISNODE is avoided
for all MPOL_BIND policies. So there's a difference that if local node
is actually allowed by the bind policy's nodemask, previously
__GFP_THISNODE would be added, but now it won't be. From the behavior
POV this is still correct because the policy nodemask is used.

Link: http://lkml.kernel.org/r/20180925120326.24392-3-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Alex Williamson <alex.williamson@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Stefan Priebe - Profihost AG <s.priebe@profihost.ag>
Cc: Zi Yan <zi.yan@cs.rutgers.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h       | 12 +++-----
 include/linux/mempolicy.h |  2 ++
 mm/huge_memory.c          | 38 +++++++++++++++++------
 mm/mempolicy.c            | 63 +++------------------------------------
 mm/shmem.c                |  2 +-
 5 files changed, 40 insertions(+), 77 deletions(-)

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 24bcc5eec6b40..76f8db0b0e715 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -510,22 +510,18 @@ alloc_pages(gfp_t gfp_mask, unsigned int order)
 }
 extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
 			struct vm_area_struct *vma, unsigned long addr,
-			int node, bool hugepage);
-#define alloc_hugepage_vma(gfp_mask, vma, addr, order)	\
-	alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true)
+			int node);
 #else
 #define alloc_pages(gfp_mask, order) \
 		alloc_pages_node(numa_node_id(), gfp_mask, order)
-#define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\
-	alloc_pages(gfp_mask, order)
-#define alloc_hugepage_vma(gfp_mask, vma, addr, order)	\
+#define alloc_pages_vma(gfp_mask, order, vma, addr, node)\
 	alloc_pages(gfp_mask, order)
 #endif
 #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
 #define alloc_page_vma(gfp_mask, vma, addr)			\
-	alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false)
+	alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id())
 #define alloc_page_vma_node(gfp_mask, vma, addr, node)		\
-	alloc_pages_vma(gfp_mask, 0, vma, addr, node, false)
+	alloc_pages_vma(gfp_mask, 0, vma, addr, node)
 
 extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
 extern unsigned long get_zeroed_page(gfp_t gfp_mask);
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 5228c62af4165..bac395f1d00a0 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -139,6 +139,8 @@ struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,
 struct mempolicy *get_task_policy(struct task_struct *p);
 struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
 		unsigned long addr);
+struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
+						unsigned long addr);
 bool vma_policy_mof(struct vm_area_struct *vma);
 
 extern void numa_default_policy(void);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 4e4ef8fa479d5..55478ab3c83be 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -629,21 +629,40 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
  *	    available
  * never: never stall for any thp allocation
  */
-static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma)
+static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma, unsigned long addr)
 {
 	const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE);
+	gfp_t this_node = 0;
+
+#ifdef CONFIG_NUMA
+	struct mempolicy *pol;
+	/*
+	 * __GFP_THISNODE is used only when __GFP_DIRECT_RECLAIM is not
+	 * specified, to express a general desire to stay on the current
+	 * node for optimistic allocation attempts. If the defrag mode
+	 * and/or madvise hint requires the direct reclaim then we prefer
+	 * to fallback to other node rather than node reclaim because that
+	 * can lead to excessive reclaim even though there is free memory
+	 * on other nodes. We expect that NUMA preferences are specified
+	 * by memory policies.
+	 */
+	pol = get_vma_policy(vma, addr);
+	if (pol->mode != MPOL_BIND)
+		this_node = __GFP_THISNODE;
+	mpol_cond_put(pol);
+#endif
 
 	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
 		return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY);
 	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags))
-		return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM;
+		return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM | this_node;
 	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags))
 		return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM :
-							     __GFP_KSWAPD_RECLAIM);
+							     __GFP_KSWAPD_RECLAIM | this_node);
 	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags))
 		return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM :
-							     0);
-	return GFP_TRANSHUGE_LIGHT;
+							     this_node);
+	return GFP_TRANSHUGE_LIGHT | this_node;
 }
 
 /* Caller must hold page table lock. */
@@ -715,8 +734,8 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
 			pte_free(vma->vm_mm, pgtable);
 		return ret;
 	}
-	gfp = alloc_hugepage_direct_gfpmask(vma);
-	page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
+	gfp = alloc_hugepage_direct_gfpmask(vma, haddr);
+	page = alloc_pages_vma(gfp, HPAGE_PMD_ORDER, vma, haddr, numa_node_id());
 	if (unlikely(!page)) {
 		count_vm_event(THP_FAULT_FALLBACK);
 		return VM_FAULT_FALLBACK;
@@ -1286,8 +1305,9 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
 alloc:
 	if (transparent_hugepage_enabled(vma) &&
 	    !transparent_hugepage_debug_cow()) {
-		huge_gfp = alloc_hugepage_direct_gfpmask(vma);
-		new_page = alloc_hugepage_vma(huge_gfp, vma, haddr, HPAGE_PMD_ORDER);
+		huge_gfp = alloc_hugepage_direct_gfpmask(vma, haddr);
+		new_page = alloc_pages_vma(huge_gfp, HPAGE_PMD_ORDER, vma,
+				haddr, numa_node_id());
 	} else
 		new_page = NULL;
 
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 58fb833fce0ce..5837a067124d8 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1116,8 +1116,8 @@ static struct page *new_page(struct page *page, unsigned long start)
 	} else if (PageTransHuge(page)) {
 		struct page *thp;
 
-		thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address,
-					 HPAGE_PMD_ORDER);
+		thp = alloc_pages_vma(GFP_TRANSHUGE, HPAGE_PMD_ORDER, vma,
+				address, numa_node_id());
 		if (!thp)
 			return NULL;
 		prep_transhuge_page(thp);
@@ -1662,7 +1662,7 @@ struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
  * freeing by another task.  It is the caller's responsibility to free the
  * extra reference for shared policies.
  */
-static struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
+struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
 						unsigned long addr)
 {
 	struct mempolicy *pol = __get_vma_policy(vma, addr);
@@ -2011,7 +2011,6 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
  * 	@vma:  Pointer to VMA or NULL if not available.
  *	@addr: Virtual Address of the allocation. Must be inside the VMA.
  *	@node: Which node to prefer for allocation (modulo policy).
- *	@hugepage: for hugepages try only the preferred node if possible
  *
  * 	This function allocates a page from the kernel page pool and applies
  *	a NUMA policy associated with the VMA or the current process.
@@ -2022,7 +2021,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
  */
 struct page *
 alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
-		unsigned long addr, int node, bool hugepage)
+		unsigned long addr, int node)
 {
 	struct mempolicy *pol;
 	struct page *page;
@@ -2040,60 +2039,6 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
 		goto out;
 	}
 
-	if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) {
-		int hpage_node = node;
-
-		/*
-		 * For hugepage allocation and non-interleave policy which
-		 * allows the current node (or other explicitly preferred
-		 * node) we only try to allocate from the current/preferred
-		 * node and don't fall back to other nodes, as the cost of
-		 * remote accesses would likely offset THP benefits.
-		 *
-		 * If the policy is interleave, or does not allow the current
-		 * node in its nodemask, we allocate the standard way.
-		 */
-		if (pol->mode == MPOL_PREFERRED &&
-						!(pol->flags & MPOL_F_LOCAL))
-			hpage_node = pol->v.preferred_node;
-
-		nmask = policy_nodemask(gfp, pol);
-		if (!nmask || node_isset(hpage_node, *nmask)) {
-			mpol_cond_put(pol);
-			/*
-			 * We cannot invoke reclaim if __GFP_THISNODE
-			 * is set. Invoking reclaim with
-			 * __GFP_THISNODE set, would cause THP
-			 * allocations to trigger heavy swapping
-			 * despite there may be tons of free memory
-			 * (including potentially plenty of THP
-			 * already available in the buddy) on all the
-			 * other NUMA nodes.
-			 *
-			 * At most we could invoke compaction when
-			 * __GFP_THISNODE is set (but we would need to
-			 * refrain from invoking reclaim even if
-			 * compaction returned COMPACT_SKIPPED because
-			 * there wasn't not enough memory to succeed
-			 * compaction). For now just avoid
-			 * __GFP_THISNODE instead of limiting the
-			 * allocation path to a strict and single
-			 * compaction invocation.
-			 *
-			 * Supposedly if direct reclaim was enabled by
-			 * the caller, the app prefers THP regardless
-			 * of the node it comes from so this would be
-			 * more desiderable behavior than only
-			 * providing THP originated from the local
-			 * node in such case.
-			 */
-			if (!(gfp & __GFP_DIRECT_RECLAIM))
-				gfp |= __GFP_THISNODE;
-			page = __alloc_pages_node(hpage_node, gfp, order);
-			goto out;
-		}
-	}
-
 	nmask = policy_nodemask(gfp, pol);
 	preferred_nid = policy_node(gfp, pol, node);
 	page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask);
diff --git a/mm/shmem.c b/mm/shmem.c
index 56bf122e0bb4d..ea26d7a0342d7 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1435,7 +1435,7 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp,
 
 	shmem_pseudo_vma_init(&pvma, info, hindex);
 	page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN,
-			HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(), true);
+			HPAGE_PMD_ORDER, &pvma, 0, numa_node_id());
 	shmem_pseudo_vma_destroy(&pvma);
 	if (page)
 		prep_transhuge_page(page);
-- 
GitLab


From 3383b36040522505546cb112f0a543a5998edfb6 Mon Sep 17 00:00:00 2001
From: zhong jiang <zhongjiang@huawei.com>
Date: Fri, 2 Nov 2018 15:48:35 -0700
Subject: [PATCH 0534/1890] kernel/kexec_file.c: remove some duplicated
 includes

We include kexec.h and slab.h twice in kexec_file.c. It's unnecessary.
hence just remove them.

Link: http://lkml.kernel.org/r/1537498098-19171-1-git-send-email-zhongjiang@huawei.com
Signed-off-by: zhong jiang <zhongjiang@huawei.com>
Reviewed-by: Bhupesh Sharma <bhsharma@redhat.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Baoquan He <bhe@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/kexec_file.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index c6a3b6851372c..35cf0ad29718f 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -25,8 +25,6 @@
 #include <linux/elf.h>
 #include <linux/elfcore.h>
 #include <linux/kernel.h>
-#include <linux/kexec.h>
-#include <linux/slab.h>
 #include <linux/syscalls.h>
 #include <linux/vmalloc.h>
 #include "kexec_internal.h"
-- 
GitLab


From 6f0483d1f91b612186abeaebf3ce43bf805eb9f7 Mon Sep 17 00:00:00 2001
From: Michael Schupikov <michael@schupikov.de>
Date: Fri, 2 Nov 2018 15:48:38 -0700
Subject: [PATCH 0535/1890] kernel/sysctl.c: remove duplicated include

Remove one include of <linux/pipe_fs_i.h>.
No functional changes.

Link: http://lkml.kernel.org/r/20181004134223.17735-1-michael@schupikov.de
Signed-off-by: Michael Schupikov <michael@schupikov.de>
Reviewed-by: Richard Weinberger <richard@nod.at>
Acked-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/sysctl.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 3ae223f7b5dfa..5fc724e4e454c 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -66,7 +66,6 @@
 #include <linux/kexec.h>
 #include <linux/bpf.h>
 #include <linux/mount.h>
-#include <linux/pipe_fs_i.h>
 
 #include <linux/uaccess.h>
 #include <asm/processor.h>
-- 
GitLab


From 9f2df09a33aa2c76ce6385d382693f98d7f2f07e Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Fri, 2 Nov 2018 15:48:42 -0700
Subject: [PATCH 0536/1890] bfs: add sanity check at bfs_fill_super()

syzbot is reporting too large memory allocation at bfs_fill_super() [1].
Since file system image is corrupted such that bfs_sb->s_start == 0,
bfs_fill_super() is trying to allocate 8MB of continuous memory. Fix
this by adding a sanity check on bfs_sb->s_start, __GFP_NOWARN and
printf().

[1] https://syzkaller.appspot.com/bug?id=16a87c236b951351374a84c8a32f40edbc034e96

Link: http://lkml.kernel.org/r/1525862104-3407-1-git-send-email-penguin-kernel@I-love.SAKURA.ne.jp
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Reported-by: syzbot <syzbot+71c6b5d68e91149fc8a4@syzkaller.appspotmail.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Tigran Aivazian <aivazian.tigran@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/bfs/inode.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 9a69392f1fb37..d81c148682e71 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -350,7 +350,8 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
 
 	s->s_magic = BFS_MAGIC;
 
-	if (le32_to_cpu(bfs_sb->s_start) > le32_to_cpu(bfs_sb->s_end)) {
+	if (le32_to_cpu(bfs_sb->s_start) > le32_to_cpu(bfs_sb->s_end) ||
+	    le32_to_cpu(bfs_sb->s_start) < BFS_BSIZE) {
 		printf("Superblock is corrupted\n");
 		goto out1;
 	}
@@ -359,9 +360,11 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
 					sizeof(struct bfs_inode)
 					+ BFS_ROOT_INO - 1;
 	imap_len = (info->si_lasti / 8) + 1;
-	info->si_imap = kzalloc(imap_len, GFP_KERNEL);
-	if (!info->si_imap)
+	info->si_imap = kzalloc(imap_len, GFP_KERNEL | __GFP_NOWARN);
+	if (!info->si_imap) {
+		printf("Cannot allocate %u bytes\n", imap_len);
 		goto out1;
+	}
 	for (i = 0; i < BFS_ROOT_INO; i++)
 		set_bit(i, info->si_imap);
 
-- 
GitLab


From dd33ad7b251f900481701b2a82d25de583867708 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 2 Nov 2018 15:48:46 -0700
Subject: [PATCH 0537/1890] memory_hotplug: cond_resched in __remove_pages

We have received a bug report that unbinding a large pmem (>1TB) can
result in a soft lockup:

  NMI watchdog: BUG: soft lockup - CPU#9 stuck for 23s! [ndctl:4365]
  [...]
  Supported: Yes
  CPU: 9 PID: 4365 Comm: ndctl Not tainted 4.12.14-94.40-default #1 SLE12-SP4
  Hardware name: Intel Corporation S2600WFD/S2600WFD, BIOS SE5C620.86B.01.00.0833.051120182255 05/11/2018
  task: ffff9cce7d4410c0 task.stack: ffffbe9eb1bc4000
  RIP: 0010:__put_page+0x62/0x80
  Call Trace:
   devm_memremap_pages_release+0x152/0x260
   release_nodes+0x18d/0x1d0
   device_release_driver_internal+0x160/0x210
   unbind_store+0xb3/0xe0
   kernfs_fop_write+0x102/0x180
   __vfs_write+0x26/0x150
   vfs_write+0xad/0x1a0
   SyS_write+0x42/0x90
   do_syscall_64+0x74/0x150
   entry_SYSCALL_64_after_hwframe+0x3d/0xa2
  RIP: 0033:0x7fd13166b3d0

It has been reported on an older (4.12) kernel but the current upstream
code doesn't cond_resched in the hot remove code at all and the given
range to remove might be really large.  Fix the issue by calling
cond_resched once per memory section.

Link: http://lkml.kernel.org/r/20181031125840.23982-1-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Johannes Thumshirn <jthumshirn@suse.de>
Cc: Dan Williams <dan.j.williams@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory_hotplug.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 61972da38d93c..2b2b3ccbbfb57 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -586,6 +586,7 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
 	for (i = 0; i < sections_to_remove; i++) {
 		unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION;
 
+		cond_resched();
 		ret = __remove_section(zone, __pfn_to_section(pfn), map_offset,
 				altmap);
 		map_offset = 0;
-- 
GitLab


From e19e5be8b4cafa8b3f8b0cd1b1dfe20fa0145b83 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Fri, 2 Nov 2018 19:04:08 +0100
Subject: [PATCH 0538/1890] s390/qeth: sanitize strings in debug messages

As Documentation/s390/s390dbf.txt states quite clearly, using any
pointer in sprinf-formatted s390dbf debug entries is dangerous.
The pointers are dereferenced whenever the trace file is read from.
So if the referenced data has a shorter life-time than the trace file,
any read operation can result in a use-after-free.

So rip out all hazardous use of indirect data, and replace any usage of
dev_name() and such by the Bus ID number.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core.h      |  15 +++-
 drivers/s390/net/qeth_core_main.c | 127 ++++++++++++++----------------
 drivers/s390/net/qeth_l2_main.c   |  24 +++---
 drivers/s390/net/qeth_l3_main.c   | 104 ++++++++----------------
 4 files changed, 119 insertions(+), 151 deletions(-)

diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 6843bc7ee9f24..884ba9dfb3416 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -87,6 +87,18 @@ struct qeth_dbf_info {
 #define SENSE_RESETTING_EVENT_BYTE 1
 #define SENSE_RESETTING_EVENT_FLAG 0x80
 
+static inline u32 qeth_get_device_id(struct ccw_device *cdev)
+{
+	struct ccw_dev_id dev_id;
+	u32 id;
+
+	ccw_device_get_id(cdev, &dev_id);
+	id = dev_id.devno;
+	id |= (u32) (dev_id.ssid << 16);
+
+	return id;
+}
+
 /*
  * Common IO related definitions
  */
@@ -97,7 +109,8 @@ struct qeth_dbf_info {
 #define CARD_RDEV_ID(card) dev_name(&card->read.ccwdev->dev)
 #define CARD_WDEV_ID(card) dev_name(&card->write.ccwdev->dev)
 #define CARD_DDEV_ID(card) dev_name(&card->data.ccwdev->dev)
-#define CHANNEL_ID(channel) dev_name(&channel->ccwdev->dev)
+#define CCW_DEVID(cdev)		(qeth_get_device_id(cdev))
+#define CARD_DEVID(card)	(CCW_DEVID(CARD_RDEV(card)))
 
 /**
  * card stuff
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 3274f13aad576..639ac0aca1e9d 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -554,8 +554,8 @@ static int __qeth_issue_next_read(struct qeth_card *card)
 	if (!iob) {
 		dev_warn(&card->gdev->dev, "The qeth device driver "
 			"failed to recover an error on the device\n");
-		QETH_DBF_MESSAGE(2, "%s issue_next_read failed: no iob "
-			"available\n", dev_name(&card->gdev->dev));
+		QETH_DBF_MESSAGE(2, "issue_next_read on device %x failed: no iob available\n",
+				 CARD_DEVID(card));
 		return -ENOMEM;
 	}
 	qeth_setup_ccw(channel->ccw, CCW_CMD_READ, QETH_BUFSIZE, iob->data);
@@ -563,8 +563,8 @@ static int __qeth_issue_next_read(struct qeth_card *card)
 	rc = ccw_device_start(channel->ccwdev, channel->ccw,
 			      (addr_t) iob, 0, 0);
 	if (rc) {
-		QETH_DBF_MESSAGE(2, "%s error in starting next read ccw! "
-			"rc=%i\n", dev_name(&card->gdev->dev), rc);
+		QETH_DBF_MESSAGE(2, "error %i on device %x when starting next read ccw!\n",
+				 rc, CARD_DEVID(card));
 		atomic_set(&channel->irq_pending, 0);
 		card->read_or_write_problem = 1;
 		qeth_schedule_recovery(card);
@@ -613,16 +613,14 @@ static void qeth_issue_ipa_msg(struct qeth_ipa_cmd *cmd, int rc,
 	const char *ipa_name;
 	int com = cmd->hdr.command;
 	ipa_name = qeth_get_ipa_cmd_name(com);
+
 	if (rc)
-		QETH_DBF_MESSAGE(2, "IPA: %s(x%X) for %s/%s returned "
-				"x%X \"%s\"\n",
-				ipa_name, com, dev_name(&card->gdev->dev),
-				QETH_CARD_IFNAME(card), rc,
-				qeth_get_ipa_msg(rc));
+		QETH_DBF_MESSAGE(2, "IPA: %s(%#x) for device %x returned %#x \"%s\"\n",
+				 ipa_name, com, CARD_DEVID(card), rc,
+				 qeth_get_ipa_msg(rc));
 	else
-		QETH_DBF_MESSAGE(5, "IPA: %s(x%X) for %s/%s succeeded\n",
-				ipa_name, com, dev_name(&card->gdev->dev),
-				QETH_CARD_IFNAME(card));
+		QETH_DBF_MESSAGE(5, "IPA: %s(%#x) for device %x succeeded\n",
+				 ipa_name, com, CARD_DEVID(card));
 }
 
 static struct qeth_ipa_cmd *qeth_check_ipa_data(struct qeth_card *card,
@@ -711,7 +709,7 @@ static int qeth_check_idx_response(struct qeth_card *card,
 
 	QETH_DBF_HEX(CTRL, 2, buffer, QETH_DBF_CTRL_LEN);
 	if ((buffer[2] & 0xc0) == 0xc0) {
-		QETH_DBF_MESSAGE(2, "received an IDX TERMINATE with cause code %#02x\n",
+		QETH_DBF_MESSAGE(2, "received an IDX TERMINATE with cause code %#04x\n",
 				 buffer[4]);
 		QETH_CARD_TEXT(card, 2, "ckidxres");
 		QETH_CARD_TEXT(card, 2, " idxterm");
@@ -972,8 +970,8 @@ static int qeth_get_problem(struct qeth_card *card, struct ccw_device *cdev,
 		QETH_CARD_TEXT(card, 2, "CGENCHK");
 		dev_warn(&cdev->dev, "The qeth device driver "
 			"failed to recover an error on the device\n");
-		QETH_DBF_MESSAGE(2, "%s check on device dstat=x%x, cstat=x%x\n",
-			dev_name(&cdev->dev), dstat, cstat);
+		QETH_DBF_MESSAGE(2, "check on channel %x with dstat=%#x, cstat=%#x\n",
+				 CCW_DEVID(cdev), dstat, cstat);
 		print_hex_dump(KERN_WARNING, "qeth: irb ", DUMP_PREFIX_OFFSET,
 				16, 1, irb, 64, 1);
 		return 1;
@@ -1013,8 +1011,8 @@ static long qeth_check_irb_error(struct qeth_card *card,
 
 	switch (PTR_ERR(irb)) {
 	case -EIO:
-		QETH_DBF_MESSAGE(2, "%s i/o-error on device\n",
-			dev_name(&cdev->dev));
+		QETH_DBF_MESSAGE(2, "i/o-error on channel %x\n",
+				 CCW_DEVID(cdev));
 		QETH_CARD_TEXT(card, 2, "ckirberr");
 		QETH_CARD_TEXT_(card, 2, "  rc%d", -EIO);
 		break;
@@ -1031,8 +1029,8 @@ static long qeth_check_irb_error(struct qeth_card *card,
 		}
 		break;
 	default:
-		QETH_DBF_MESSAGE(2, "%s unknown error %ld on device\n",
-			dev_name(&cdev->dev), PTR_ERR(irb));
+		QETH_DBF_MESSAGE(2, "unknown error %ld on channel %x\n",
+				 PTR_ERR(irb), CCW_DEVID(cdev));
 		QETH_CARD_TEXT(card, 2, "ckirberr");
 		QETH_CARD_TEXT(card, 2, "  rc???");
 	}
@@ -1114,9 +1112,9 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm,
 			dev_warn(&channel->ccwdev->dev,
 				"The qeth device driver failed to recover "
 				"an error on the device\n");
-			QETH_DBF_MESSAGE(2, "%s sense data available. cstat "
-				"0x%X dstat 0x%X\n",
-				dev_name(&channel->ccwdev->dev), cstat, dstat);
+			QETH_DBF_MESSAGE(2, "sense data available on channel %x: cstat %#X dstat %#X\n",
+					 CCW_DEVID(channel->ccwdev), cstat,
+					 dstat);
 			print_hex_dump(KERN_WARNING, "qeth: irb ",
 				DUMP_PREFIX_OFFSET, 16, 1, irb, 32, 1);
 			print_hex_dump(KERN_WARNING, "qeth: sense data ",
@@ -1890,8 +1888,8 @@ static int qeth_idx_activate_channel(struct qeth_card *card,
 	if (channel->state != CH_STATE_ACTIVATING) {
 		dev_warn(&channel->ccwdev->dev, "The qeth device driver"
 			" failed to recover an error on the device\n");
-		QETH_DBF_MESSAGE(2, "%s IDX activate timed out\n",
-			dev_name(&channel->ccwdev->dev));
+		QETH_DBF_MESSAGE(2, "IDX activate timed out on channel %x\n",
+				 CCW_DEVID(channel->ccwdev));
 		QETH_DBF_TEXT_(SETUP, 2, "2err%d", -ETIME);
 		return -ETIME;
 	}
@@ -1926,17 +1924,15 @@ static void qeth_idx_write_cb(struct qeth_card *card,
 				"The adapter is used exclusively by another "
 				"host\n");
 		else
-			QETH_DBF_MESSAGE(2, "%s IDX_ACTIVATE on write channel:"
-				" negative reply\n",
-				dev_name(&channel->ccwdev->dev));
+			QETH_DBF_MESSAGE(2, "IDX_ACTIVATE on channel %x: negative reply\n",
+					 CCW_DEVID(channel->ccwdev));
 		goto out;
 	}
 	memcpy(&temp, QETH_IDX_ACT_FUNC_LEVEL(iob->data), 2);
 	if ((temp & ~0x0100) != qeth_peer_func_level(card->info.func_level)) {
-		QETH_DBF_MESSAGE(2, "%s IDX_ACTIVATE on write channel: "
-			"function level mismatch (sent: 0x%x, received: "
-			"0x%x)\n", dev_name(&channel->ccwdev->dev),
-			card->info.func_level, temp);
+		QETH_DBF_MESSAGE(2, "IDX_ACTIVATE on channel %x: function level mismatch (sent: %#x, received: %#x)\n",
+				 CCW_DEVID(channel->ccwdev),
+				 card->info.func_level, temp);
 		goto out;
 	}
 	channel->state = CH_STATE_UP;
@@ -1973,9 +1969,8 @@ static void qeth_idx_read_cb(struct qeth_card *card,
 				"insufficient authorization\n");
 			break;
 		default:
-			QETH_DBF_MESSAGE(2, "%s IDX_ACTIVATE on read channel:"
-				" negative reply\n",
-				dev_name(&channel->ccwdev->dev));
+			QETH_DBF_MESSAGE(2, "IDX_ACTIVATE on channel %x: negative reply\n",
+					 CCW_DEVID(channel->ccwdev));
 		}
 		QETH_CARD_TEXT_(card, 2, "idxread%c",
 			QETH_IDX_ACT_CAUSE_CODE(iob->data));
@@ -1984,10 +1979,9 @@ static void qeth_idx_read_cb(struct qeth_card *card,
 
 	memcpy(&temp, QETH_IDX_ACT_FUNC_LEVEL(iob->data), 2);
 	if (temp != qeth_peer_func_level(card->info.func_level)) {
-		QETH_DBF_MESSAGE(2, "%s IDX_ACTIVATE on read channel: function "
-			"level mismatch (sent: 0x%x, received: 0x%x)\n",
-			dev_name(&channel->ccwdev->dev),
-			card->info.func_level, temp);
+		QETH_DBF_MESSAGE(2, "IDX_ACTIVATE on channel %x: function level mismatch (sent: %#x, received: %#x)\n",
+				 CCW_DEVID(channel->ccwdev),
+				 card->info.func_level, temp);
 		goto out;
 	}
 	memcpy(&card->token.issuer_rm_r,
@@ -2096,9 +2090,8 @@ int qeth_send_control_data(struct qeth_card *card, int len,
 				      (addr_t) iob, 0, 0, event_timeout);
 	spin_unlock_irq(get_ccwdev_lock(channel->ccwdev));
 	if (rc) {
-		QETH_DBF_MESSAGE(2, "%s qeth_send_control_data: "
-			"ccw_device_start rc = %i\n",
-			dev_name(&channel->ccwdev->dev), rc);
+		QETH_DBF_MESSAGE(2, "qeth_send_control_data on device %x: ccw_device_start rc = %i\n",
+				 CARD_DEVID(card), rc);
 		QETH_CARD_TEXT_(card, 2, " err%d", rc);
 		spin_lock_irq(&card->lock);
 		list_del_init(&reply->list);
@@ -2853,8 +2846,8 @@ struct qeth_cmd_buffer *qeth_get_ipacmd_buffer(struct qeth_card *card,
 	} else {
 		dev_warn(&card->gdev->dev,
 			 "The qeth driver ran out of channel command buffers\n");
-		QETH_DBF_MESSAGE(1, "%s The qeth driver ran out of channel command buffers",
-				 dev_name(&card->gdev->dev));
+		QETH_DBF_MESSAGE(1, "device %x ran out of channel command buffers",
+				 CARD_DEVID(card));
 	}
 
 	return iob;
@@ -2989,10 +2982,9 @@ static int qeth_query_ipassists_cb(struct qeth_card *card,
 		return 0;
 	default:
 		if (cmd->hdr.return_code) {
-			QETH_DBF_MESSAGE(1, "%s IPA_CMD_QIPASSIST: Unhandled "
-						"rc=%d\n",
-						dev_name(&card->gdev->dev),
-						cmd->hdr.return_code);
+			QETH_DBF_MESSAGE(1, "IPA_CMD_QIPASSIST on device %x: Unhandled rc=%#x\n",
+					 CARD_DEVID(card),
+					 cmd->hdr.return_code);
 			return 0;
 		}
 	}
@@ -3004,8 +2996,8 @@ static int qeth_query_ipassists_cb(struct qeth_card *card,
 		card->options.ipa6.supported_funcs = cmd->hdr.ipa_supported;
 		card->options.ipa6.enabled_funcs = cmd->hdr.ipa_enabled;
 	} else
-		QETH_DBF_MESSAGE(1, "%s IPA_CMD_QIPASSIST: Flawed LIC detected"
-					"\n", dev_name(&card->gdev->dev));
+		QETH_DBF_MESSAGE(1, "IPA_CMD_QIPASSIST on device %x: Flawed LIC detected\n",
+				 CARD_DEVID(card));
 	return 0;
 }
 
@@ -4297,10 +4289,9 @@ static int qeth_setadpparms_set_access_ctrl_cb(struct qeth_card *card,
 		cmd->data.setadapterparms.hdr.return_code);
 	if (cmd->data.setadapterparms.hdr.return_code !=
 						SET_ACCESS_CTRL_RC_SUCCESS)
-		QETH_DBF_MESSAGE(3, "ERR:SET_ACCESS_CTRL(%s,%d)==%d\n",
-				card->gdev->dev.kobj.name,
-				access_ctrl_req->subcmd_code,
-				cmd->data.setadapterparms.hdr.return_code);
+		QETH_DBF_MESSAGE(3, "ERR:SET_ACCESS_CTRL(%#x) on device %x: %#x\n",
+				 access_ctrl_req->subcmd_code, CARD_DEVID(card),
+				 cmd->data.setadapterparms.hdr.return_code);
 	switch (cmd->data.setadapterparms.hdr.return_code) {
 	case SET_ACCESS_CTRL_RC_SUCCESS:
 		if (card->options.isolation == ISOLATION_MODE_NONE) {
@@ -4312,14 +4303,14 @@ static int qeth_setadpparms_set_access_ctrl_cb(struct qeth_card *card,
 		}
 		break;
 	case SET_ACCESS_CTRL_RC_ALREADY_NOT_ISOLATED:
-		QETH_DBF_MESSAGE(2, "%s QDIO data connection isolation already "
-				"deactivated\n", dev_name(&card->gdev->dev));
+		QETH_DBF_MESSAGE(2, "QDIO data connection isolation on device %x already deactivated\n",
+				 CARD_DEVID(card));
 		if (fallback)
 			card->options.isolation = card->options.prev_isolation;
 		break;
 	case SET_ACCESS_CTRL_RC_ALREADY_ISOLATED:
-		QETH_DBF_MESSAGE(2, "%s QDIO data connection isolation already"
-				" activated\n", dev_name(&card->gdev->dev));
+		QETH_DBF_MESSAGE(2, "QDIO data connection isolation on device %x already activated\n",
+				 CARD_DEVID(card));
 		if (fallback)
 			card->options.isolation = card->options.prev_isolation;
 		break;
@@ -4405,10 +4396,8 @@ int qeth_set_access_ctrl_online(struct qeth_card *card, int fallback)
 		rc = qeth_setadpparms_set_access_ctrl(card,
 			card->options.isolation, fallback);
 		if (rc) {
-			QETH_DBF_MESSAGE(3,
-				"IPA(SET_ACCESS_CTRL,%s,%d) sent failed\n",
-				card->gdev->dev.kobj.name,
-				rc);
+			QETH_DBF_MESSAGE(3, "IPA(SET_ACCESS_CTRL(%d) on device %x: sent failed\n",
+					 rc, CARD_DEVID(card));
 			rc = -EOPNOTSUPP;
 		}
 	} else if (card->options.isolation != ISOLATION_MODE_NONE) {
@@ -4634,8 +4623,8 @@ static int qeth_snmp_command(struct qeth_card *card, char __user *udata)
 	rc = qeth_send_ipa_snmp_cmd(card, iob, QETH_SETADP_BASE_LEN + req_len,
 				    qeth_snmp_command_cb, (void *)&qinfo);
 	if (rc)
-		QETH_DBF_MESSAGE(2, "SNMP command failed on %s: (0x%x)\n",
-			   QETH_CARD_IFNAME(card), rc);
+		QETH_DBF_MESSAGE(2, "SNMP command failed on device %x: (%#x)\n",
+				 CARD_DEVID(card), rc);
 	else {
 		if (copy_to_user(udata, qinfo.udata, qinfo.udata_len))
 			rc = -EFAULT;
@@ -4869,8 +4858,8 @@ static void qeth_determine_capabilities(struct qeth_card *card)
 
 	rc = qeth_read_conf_data(card, (void **) &prcd, &length);
 	if (rc) {
-		QETH_DBF_MESSAGE(2, "%s qeth_read_conf_data returned %i\n",
-			dev_name(&card->gdev->dev), rc);
+		QETH_DBF_MESSAGE(2, "qeth_read_conf_data on device %x returned %i\n",
+				 CARD_DEVID(card), rc);
 		QETH_DBF_TEXT_(SETUP, 2, "5err%d", rc);
 		goto out_offline;
 	}
@@ -5096,8 +5085,8 @@ int qeth_core_hardsetup_card(struct qeth_card *card)
 	qeth_update_from_chp_desc(card);
 retry:
 	if (retries < 3)
-		QETH_DBF_MESSAGE(2, "%s Retrying to do IDX activates.\n",
-			dev_name(&card->gdev->dev));
+		QETH_DBF_MESSAGE(2, "Retrying to do IDX activates on device %x.\n",
+				 CARD_DEVID(card));
 	rc = qeth_qdio_clear_card(card, card->info.type != QETH_CARD_TYPE_IQD);
 	ccw_device_set_offline(CARD_DDEV(card));
 	ccw_device_set_offline(CARD_WDEV(card));
@@ -5201,8 +5190,8 @@ int qeth_core_hardsetup_card(struct qeth_card *card)
 out:
 	dev_warn(&card->gdev->dev, "The qeth device driver failed to recover "
 		"an error on the device\n");
-	QETH_DBF_MESSAGE(2, "%s Initialization in hardsetup failed! rc=%d\n",
-		dev_name(&card->gdev->dev), rc);
+	QETH_DBF_MESSAGE(2, "Initialization for device %x failed in hardsetup! rc=%d\n",
+			 CARD_DEVID(card), rc);
 	return rc;
 }
 EXPORT_SYMBOL_GPL(qeth_core_hardsetup_card);
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 23aaf373f631e..5b67fd1f2b778 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -146,11 +146,11 @@ static int qeth_l2_write_mac(struct qeth_card *card, u8 *mac)
 	QETH_CARD_TEXT(card, 2, "L2Wmac");
 	rc = qeth_l2_send_setdelmac(card, mac, cmd);
 	if (rc == -EEXIST)
-		QETH_DBF_MESSAGE(2, "MAC %pM already registered on %s\n",
-				 mac, QETH_CARD_IFNAME(card));
+		QETH_DBF_MESSAGE(2, "MAC already registered on device %x\n",
+				 CARD_DEVID(card));
 	else if (rc)
-		QETH_DBF_MESSAGE(2, "Failed to register MAC %pM on %s: %d\n",
-				 mac, QETH_CARD_IFNAME(card), rc);
+		QETH_DBF_MESSAGE(2, "Failed to register MAC on device %x: %d\n",
+				 CARD_DEVID(card), rc);
 	return rc;
 }
 
@@ -163,8 +163,8 @@ static int qeth_l2_remove_mac(struct qeth_card *card, u8 *mac)
 	QETH_CARD_TEXT(card, 2, "L2Rmac");
 	rc = qeth_l2_send_setdelmac(card, mac, cmd);
 	if (rc)
-		QETH_DBF_MESSAGE(2, "Failed to delete MAC %pM on %s: %d\n",
-				 mac, QETH_CARD_IFNAME(card), rc);
+		QETH_DBF_MESSAGE(2, "Failed to delete MAC on device %u: %d\n",
+				 CARD_DEVID(card), rc);
 	return rc;
 }
 
@@ -260,9 +260,9 @@ static int qeth_l2_send_setdelvlan_cb(struct qeth_card *card,
 
 	QETH_CARD_TEXT(card, 2, "L2sdvcb");
 	if (cmd->hdr.return_code) {
-		QETH_DBF_MESSAGE(2, "Error in processing VLAN %i on %s: 0x%x.\n",
+		QETH_DBF_MESSAGE(2, "Error in processing VLAN %u on device %x: %#x.\n",
 				 cmd->data.setdelvlan.vlan_id,
-				 QETH_CARD_IFNAME(card), cmd->hdr.return_code);
+				 CARD_DEVID(card), cmd->hdr.return_code);
 		QETH_CARD_TEXT_(card, 2, "L2VL%4x", cmd->hdr.command);
 		QETH_CARD_TEXT_(card, 2, "err%d", cmd->hdr.return_code);
 	}
@@ -455,8 +455,8 @@ static int qeth_l2_request_initial_mac(struct qeth_card *card)
 		rc = qeth_vm_request_mac(card);
 		if (!rc)
 			goto out;
-		QETH_DBF_MESSAGE(2, "z/VM MAC Service failed on device %s: x%x\n",
-				 CARD_BUS_ID(card), rc);
+		QETH_DBF_MESSAGE(2, "z/VM MAC Service failed on device %x: %#x\n",
+				 CARD_DEVID(card), rc);
 		QETH_DBF_TEXT_(SETUP, 2, "err%04x", rc);
 		/* fall back to alternative mechanism: */
 	}
@@ -468,8 +468,8 @@ static int qeth_l2_request_initial_mac(struct qeth_card *card)
 		rc = qeth_setadpparms_change_macaddr(card);
 		if (!rc)
 			goto out;
-		QETH_DBF_MESSAGE(2, "READ_MAC Assist failed on device %s: x%x\n",
-				 CARD_BUS_ID(card), rc);
+		QETH_DBF_MESSAGE(2, "READ_MAC Assist failed on device %x: %#x\n",
+				 CARD_DEVID(card), rc);
 		QETH_DBF_TEXT_(SETUP, 2, "1err%04x", rc);
 		/* fall back once more: */
 	}
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 0b161cc1fd2e6..ffa2aa1dd4c51 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -494,9 +494,8 @@ int qeth_l3_setrouting_v4(struct qeth_card *card)
 				  QETH_PROT_IPV4);
 	if (rc) {
 		card->options.route4.type = NO_ROUTER;
-		QETH_DBF_MESSAGE(2, "Error (0x%04x) while setting routing type"
-			" on %s. Type set to 'no router'.\n", rc,
-			QETH_CARD_IFNAME(card));
+		QETH_DBF_MESSAGE(2, "Error (%#06x) while setting routing type on device %x. Type set to 'no router'.\n",
+				 rc, CARD_DEVID(card));
 	}
 	return rc;
 }
@@ -518,9 +517,8 @@ int qeth_l3_setrouting_v6(struct qeth_card *card)
 				  QETH_PROT_IPV6);
 	if (rc) {
 		card->options.route6.type = NO_ROUTER;
-		QETH_DBF_MESSAGE(2, "Error (0x%04x) while setting routing type"
-			" on %s. Type set to 'no router'.\n", rc,
-			QETH_CARD_IFNAME(card));
+		QETH_DBF_MESSAGE(2, "Error (%#06x) while setting routing type on device %x. Type set to 'no router'.\n",
+				 rc, CARD_DEVID(card));
 	}
 	return rc;
 }
@@ -1070,8 +1068,8 @@ qeth_diags_trace_cb(struct qeth_card *card, struct qeth_reply *reply,
 		}
 		break;
 	default:
-		QETH_DBF_MESSAGE(2, "Unknown sniffer action (0x%04x) on %s\n",
-			cmd->data.diagass.action, QETH_CARD_IFNAME(card));
+		QETH_DBF_MESSAGE(2, "Unknown sniffer action (%#06x) on device %x\n",
+				 cmd->data.diagass.action, CARD_DEVID(card));
 	}
 
 	return 0;
@@ -1517,32 +1515,25 @@ static void qeth_l3_set_rx_mode(struct net_device *dev)
 	qeth_l3_handle_promisc_mode(card);
 }
 
-static const char *qeth_l3_arp_get_error_cause(int *rc)
+static int qeth_l3_arp_makerc(int rc)
 {
-	switch (*rc) {
-	case QETH_IPA_ARP_RC_FAILED:
-		*rc = -EIO;
-		return "operation failed";
+	switch (rc) {
+	case IPA_RC_SUCCESS:
+		return 0;
 	case QETH_IPA_ARP_RC_NOTSUPP:
-		*rc = -EOPNOTSUPP;
-		return "operation not supported";
-	case QETH_IPA_ARP_RC_OUT_OF_RANGE:
-		*rc = -EINVAL;
-		return "argument out of range";
 	case QETH_IPA_ARP_RC_Q_NOTSUPP:
-		*rc = -EOPNOTSUPP;
-		return "query operation not supported";
+		return -EOPNOTSUPP;
+	case QETH_IPA_ARP_RC_OUT_OF_RANGE:
+		return -EINVAL;
 	case QETH_IPA_ARP_RC_Q_NO_DATA:
-		*rc = -ENOENT;
-		return "no query data available";
+		return -ENOENT;
 	default:
-		return "unknown error";
+		return -EIO;
 	}
 }
 
 static int qeth_l3_arp_set_no_entries(struct qeth_card *card, int no_entries)
 {
-	int tmp;
 	int rc;
 
 	QETH_CARD_TEXT(card, 3, "arpstnoe");
@@ -1560,13 +1551,10 @@ static int qeth_l3_arp_set_no_entries(struct qeth_card *card, int no_entries)
 	rc = qeth_send_simple_setassparms(card, IPA_ARP_PROCESSING,
 					  IPA_CMD_ASS_ARP_SET_NO_ENTRIES,
 					  no_entries);
-	if (rc) {
-		tmp = rc;
-		QETH_DBF_MESSAGE(2, "Could not set number of ARP entries on "
-			"%s: %s (0x%x/%d)\n", QETH_CARD_IFNAME(card),
-			qeth_l3_arp_get_error_cause(&rc), tmp, tmp);
-	}
-	return rc;
+	if (rc)
+		QETH_DBF_MESSAGE(2, "Could not set number of ARP entries on device %x: %#x\n",
+				 CARD_DEVID(card), rc);
+	return qeth_l3_arp_makerc(rc);
 }
 
 static __u32 get_arp_entry_size(struct qeth_card *card,
@@ -1716,7 +1704,6 @@ static int qeth_l3_query_arp_cache_info(struct qeth_card *card,
 {
 	struct qeth_cmd_buffer *iob;
 	struct qeth_ipa_cmd *cmd;
-	int tmp;
 	int rc;
 
 	QETH_CARD_TEXT_(card, 3, "qarpipv%i", prot);
@@ -1735,15 +1722,10 @@ static int qeth_l3_query_arp_cache_info(struct qeth_card *card,
 	rc = qeth_l3_send_ipa_arp_cmd(card, iob,
 			   QETH_SETASS_BASE_LEN+QETH_ARP_CMD_LEN,
 			   qeth_l3_arp_query_cb, (void *)qinfo);
-	if (rc) {
-		tmp = rc;
-		QETH_DBF_MESSAGE(2,
-			"Error while querying ARP cache on %s: %s "
-			"(0x%x/%d)\n", QETH_CARD_IFNAME(card),
-			qeth_l3_arp_get_error_cause(&rc), tmp, tmp);
-	}
-
-	return rc;
+	if (rc)
+		QETH_DBF_MESSAGE(2, "Error while querying ARP cache on device %x: %#x\n",
+				 CARD_DEVID(card), rc);
+	return qeth_l3_arp_makerc(rc);
 }
 
 static int qeth_l3_arp_query(struct qeth_card *card, char __user *udata)
@@ -1797,8 +1779,6 @@ static int qeth_l3_arp_add_entry(struct qeth_card *card,
 				struct qeth_arp_cache_entry *entry)
 {
 	struct qeth_cmd_buffer *iob;
-	char buf[16];
-	int tmp;
 	int rc;
 
 	QETH_CARD_TEXT(card, 3, "arpadent");
@@ -1824,14 +1804,10 @@ static int qeth_l3_arp_add_entry(struct qeth_card *card,
 				   sizeof(struct qeth_arp_cache_entry),
 				   (unsigned long) entry,
 				   qeth_setassparms_cb, NULL);
-	if (rc) {
-		tmp = rc;
-		qeth_l3_ipaddr4_to_string((u8 *)entry->ipaddr, buf);
-		QETH_DBF_MESSAGE(2, "Could not add ARP entry for address %s "
-			"on %s: %s (0x%x/%d)\n", buf, QETH_CARD_IFNAME(card),
-			qeth_l3_arp_get_error_cause(&rc), tmp, tmp);
-	}
-	return rc;
+	if (rc)
+		QETH_DBF_MESSAGE(2, "Could not add ARP entry on device %x: %#x\n",
+				 CARD_DEVID(card), rc);
+	return qeth_l3_arp_makerc(rc);
 }
 
 static int qeth_l3_arp_remove_entry(struct qeth_card *card,
@@ -1839,7 +1815,6 @@ static int qeth_l3_arp_remove_entry(struct qeth_card *card,
 {
 	struct qeth_cmd_buffer *iob;
 	char buf[16] = {0, };
-	int tmp;
 	int rc;
 
 	QETH_CARD_TEXT(card, 3, "arprment");
@@ -1864,21 +1839,15 @@ static int qeth_l3_arp_remove_entry(struct qeth_card *card,
 	rc = qeth_send_setassparms(card, iob,
 				   12, (unsigned long)buf,
 				   qeth_setassparms_cb, NULL);
-	if (rc) {
-		tmp = rc;
-		memset(buf, 0, 16);
-		qeth_l3_ipaddr4_to_string((u8 *)entry->ipaddr, buf);
-		QETH_DBF_MESSAGE(2, "Could not delete ARP entry for address %s"
-			" on %s: %s (0x%x/%d)\n", buf, QETH_CARD_IFNAME(card),
-			qeth_l3_arp_get_error_cause(&rc), tmp, tmp);
-	}
-	return rc;
+	if (rc)
+		QETH_DBF_MESSAGE(2, "Could not delete ARP entry on device %x: %#x\n",
+				 CARD_DEVID(card), rc);
+	return qeth_l3_arp_makerc(rc);
 }
 
 static int qeth_l3_arp_flush_cache(struct qeth_card *card)
 {
 	int rc;
-	int tmp;
 
 	QETH_CARD_TEXT(card, 3, "arpflush");
 
@@ -1894,13 +1863,10 @@ static int qeth_l3_arp_flush_cache(struct qeth_card *card)
 	}
 	rc = qeth_send_simple_setassparms(card, IPA_ARP_PROCESSING,
 					  IPA_CMD_ASS_ARP_FLUSH_CACHE, 0);
-	if (rc) {
-		tmp = rc;
-		QETH_DBF_MESSAGE(2, "Could not flush ARP cache on %s: %s "
-			"(0x%x/%d)\n", QETH_CARD_IFNAME(card),
-			qeth_l3_arp_get_error_cause(&rc), tmp, tmp);
-	}
-	return rc;
+	if (rc)
+		QETH_DBF_MESSAGE(2, "Could not flush ARP cache on device %x: %#x\n",
+				 CARD_DEVID(card), rc);
+	return qeth_l3_arp_makerc(rc);
 }
 
 static int qeth_l3_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
-- 
GitLab


From bd74a7f9cc033cf4d405788f80292268987dc0c5 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Fri, 2 Nov 2018 19:04:09 +0100
Subject: [PATCH 0539/1890] s390/qeth: fix HiperSockets sniffer

Sniffing mode for L3 HiperSockets requires that no IP addresses are
registered with the HW. The preferred way to achieve this is for
userspace to delete all the IPs on the interface. But qeth is expected
to also tolerate a configuration where that is not the case, by skipping
the IP registration when in sniffer mode.
Since commit 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback")
reworked the IP registration logic in the L3 subdriver, this no longer
works. When the qeth device is set online, qeth_l3_recover_ip() now
unconditionally registers all unicast addresses from our internal
IP table.

While we could fix this particular problem by skipping
qeth_l3_recover_ip() on a sniffer device, the more future-proof change
is to skip the IP address registration at the lowest level. This way we
a) catch any future code path that attempts to register an IP address
   without considering the sniffer scenario, and
b) continue to build up our internal IP table, so that if sniffer mode
   is switched off later we can operate just like normal.

Fixes: 5f78e29ceebf ("qeth: optimize IP handling in rx_mode callback")
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_l3_main.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index ffa2aa1dd4c51..968e344a240b9 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -278,9 +278,6 @@ static void qeth_l3_clear_ip_htable(struct qeth_card *card, int recover)
 
 	QETH_CARD_TEXT(card, 4, "clearip");
 
-	if (recover && card->options.sniffer)
-		return;
-
 	spin_lock_bh(&card->ip_lock);
 
 	hash_for_each_safe(card->ip_htable, i, tmp, addr, hnode) {
@@ -661,6 +658,8 @@ static int qeth_l3_register_addr_entry(struct qeth_card *card,
 	int rc = 0;
 	int cnt = 3;
 
+	if (card->options.sniffer)
+		return 0;
 
 	if (addr->proto == QETH_PROT_IPV4) {
 		QETH_CARD_TEXT(card, 2, "setaddr4");
@@ -695,6 +694,9 @@ static int qeth_l3_deregister_addr_entry(struct qeth_card *card,
 {
 	int rc = 0;
 
+	if (card->options.sniffer)
+		return 0;
+
 	if (addr->proto == QETH_PROT_IPV4) {
 		QETH_CARD_TEXT(card, 2, "deladdr4");
 		QETH_CARD_HEX(card, 3, &addr->u.a4.addr, sizeof(int));
-- 
GitLab


From 30356d08159d7899438e94503ae322a8b881e205 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Fri, 2 Nov 2018 19:04:10 +0100
Subject: [PATCH 0540/1890] s390/qeth: unregister netdevice only when
 registered

qeth only registers its netdevice when the qeth device is first set
online. Thus a device that has never been set online will trigger
a WARN ("network todo 'hsi%d' but state 0") in unregister_netdev() when
removed.

Fix this by protecting the unregister step, just like we already protect
against repeated registering of the netdevice.

Fixes: d3d1b205e89f ("s390/qeth: allocate netdevice early")
Reported-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core.h    | 5 +++++
 drivers/s390/net/qeth_l2_main.c | 5 +++--
 drivers/s390/net/qeth_l3_main.c | 5 +++--
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 884ba9dfb3416..b3a0b8838d2f2 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -843,6 +843,11 @@ struct qeth_trap_id {
 /*some helper functions*/
 #define QETH_CARD_IFNAME(card) (((card)->dev)? (card)->dev->name : "")
 
+static inline bool qeth_netdev_is_registered(struct net_device *dev)
+{
+	return dev->netdev_ops != NULL;
+}
+
 static inline void qeth_scrub_qdio_buffer(struct qdio_buffer *buf,
 					  unsigned int elements)
 {
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 5b67fd1f2b778..2b978eba7e30c 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -826,7 +826,8 @@ static void qeth_l2_remove_device(struct ccwgroup_device *cgdev)
 
 	if (cgdev->state == CCWGROUP_ONLINE)
 		qeth_l2_set_offline(cgdev);
-	unregister_netdev(card->dev);
+	if (qeth_netdev_is_registered(card->dev))
+		unregister_netdev(card->dev);
 }
 
 static const struct ethtool_ops qeth_l2_ethtool_ops = {
@@ -866,7 +867,7 @@ static int qeth_l2_setup_netdev(struct qeth_card *card)
 {
 	int rc;
 
-	if (card->dev->netdev_ops)
+	if (qeth_netdev_is_registered(card->dev))
 		return 0;
 
 	card->dev->priv_flags |= IFF_UNICAST_FLT;
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 968e344a240b9..a719c5ec41710 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -2356,7 +2356,7 @@ static int qeth_l3_setup_netdev(struct qeth_card *card)
 	unsigned int headroom;
 	int rc;
 
-	if (card->dev->netdev_ops)
+	if (qeth_netdev_is_registered(card->dev))
 		return 0;
 
 	if (card->info.type == QETH_CARD_TYPE_OSD ||
@@ -2465,7 +2465,8 @@ static void qeth_l3_remove_device(struct ccwgroup_device *cgdev)
 	if (cgdev->state == CCWGROUP_ONLINE)
 		qeth_l3_set_offline(cgdev);
 
-	unregister_netdev(card->dev);
+	if (qeth_netdev_is_registered(card->dev))
+		unregister_netdev(card->dev);
 	qeth_l3_clear_ip_htable(card, 0);
 	qeth_l3_clear_ipato_list(card);
 }
-- 
GitLab


From 9fae5c3b60396b8586881a0e7c028ae5bcaeaa3f Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Fri, 2 Nov 2018 19:04:11 +0100
Subject: [PATCH 0541/1890] s390/qeth: fix initial operstate

Setting the carrier 'on' for an unregistered netdevice doesn't update
its operstate. Fix this by delaying the update until the netdevice has
been registered.

Fixes: 91cc98f51e3d ("s390/qeth: remove duplicated carrier state tracking")
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core.h      |  2 +-
 drivers/s390/net/qeth_core_main.c | 13 ++++++++++---
 drivers/s390/net/qeth_l2_main.c   | 10 +++++++---
 drivers/s390/net/qeth_l3_main.c   | 10 +++++++---
 4 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index b3a0b8838d2f2..90cb213b0d558 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -991,7 +991,7 @@ int qeth_wait_for_threads(struct qeth_card *, unsigned long);
 int qeth_do_run_thread(struct qeth_card *, unsigned long);
 void qeth_clear_thread_start_bit(struct qeth_card *, unsigned long);
 void qeth_clear_thread_running_bit(struct qeth_card *, unsigned long);
-int qeth_core_hardsetup_card(struct qeth_card *);
+int qeth_core_hardsetup_card(struct qeth_card *card, bool *carrier_ok);
 void qeth_print_status_message(struct qeth_card *);
 int qeth_init_qdio_queues(struct qeth_card *);
 int qeth_send_ipa_cmd(struct qeth_card *, struct qeth_cmd_buffer *,
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 639ac0aca1e9d..aed1a7961553c 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -5075,7 +5075,7 @@ static struct ccw_driver qeth_ccw_driver = {
 	.remove = ccwgroup_remove_ccwdev,
 };
 
-int qeth_core_hardsetup_card(struct qeth_card *card)
+int qeth_core_hardsetup_card(struct qeth_card *card, bool *carrier_ok)
 {
 	int retries = 3;
 	int rc;
@@ -5150,13 +5150,20 @@ int qeth_core_hardsetup_card(struct qeth_card *card)
 		if (rc == IPA_RC_LAN_OFFLINE) {
 			dev_warn(&card->gdev->dev,
 				"The LAN is offline\n");
-			netif_carrier_off(card->dev);
+			*carrier_ok = false;
 		} else {
 			rc = -ENODEV;
 			goto out;
 		}
 	} else {
-		netif_carrier_on(card->dev);
+		*carrier_ok = true;
+	}
+
+	if (qeth_netdev_is_registered(card->dev)) {
+		if (*carrier_ok)
+			netif_carrier_on(card->dev);
+		else
+			netif_carrier_off(card->dev);
 	}
 
 	card->options.ipa4.supported_funcs = 0;
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 2b978eba7e30c..2914a1a69f830 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -863,7 +863,7 @@ static const struct net_device_ops qeth_l2_netdev_ops = {
 	.ndo_set_features	= qeth_set_features
 };
 
-static int qeth_l2_setup_netdev(struct qeth_card *card)
+static int qeth_l2_setup_netdev(struct qeth_card *card, bool carrier_ok)
 {
 	int rc;
 
@@ -920,6 +920,9 @@ static int qeth_l2_setup_netdev(struct qeth_card *card)
 	qeth_l2_request_initial_mac(card);
 	netif_napi_add(card->dev, &card->napi, qeth_poll, QETH_NAPI_WEIGHT);
 	rc = register_netdev(card->dev);
+	if (!rc && carrier_ok)
+		netif_carrier_on(card->dev);
+
 	if (rc)
 		card->dev->netdev_ops = NULL;
 	return rc;
@@ -950,6 +953,7 @@ static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode)
 	struct qeth_card *card = dev_get_drvdata(&gdev->dev);
 	int rc = 0;
 	enum qeth_card_states recover_flag;
+	bool carrier_ok;
 
 	mutex_lock(&card->discipline_mutex);
 	mutex_lock(&card->conf_mutex);
@@ -957,7 +961,7 @@ static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode)
 	QETH_DBF_HEX(SETUP, 2, &card, sizeof(void *));
 
 	recover_flag = card->state;
-	rc = qeth_core_hardsetup_card(card);
+	rc = qeth_core_hardsetup_card(card, &carrier_ok);
 	if (rc) {
 		QETH_DBF_TEXT_(SETUP, 2, "2err%04x", rc);
 		rc = -ENODEV;
@@ -968,7 +972,7 @@ static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode)
 		dev_info(&card->gdev->dev,
 		"The device represents a Bridge Capable Port\n");
 
-	rc = qeth_l2_setup_netdev(card);
+	rc = qeth_l2_setup_netdev(card, carrier_ok);
 	if (rc)
 		goto out_remove;
 
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index a719c5ec41710..b26f7d7a2ca0d 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -2351,7 +2351,7 @@ static const struct net_device_ops qeth_l3_osa_netdev_ops = {
 	.ndo_neigh_setup	= qeth_l3_neigh_setup,
 };
 
-static int qeth_l3_setup_netdev(struct qeth_card *card)
+static int qeth_l3_setup_netdev(struct qeth_card *card, bool carrier_ok)
 {
 	unsigned int headroom;
 	int rc;
@@ -2425,6 +2425,9 @@ static int qeth_l3_setup_netdev(struct qeth_card *card)
 
 	netif_napi_add(card->dev, &card->napi, qeth_poll, QETH_NAPI_WEIGHT);
 	rc = register_netdev(card->dev);
+	if (!rc && carrier_ok)
+		netif_carrier_on(card->dev);
+
 out:
 	if (rc)
 		card->dev->netdev_ops = NULL;
@@ -2476,6 +2479,7 @@ static int __qeth_l3_set_online(struct ccwgroup_device *gdev, int recovery_mode)
 	struct qeth_card *card = dev_get_drvdata(&gdev->dev);
 	int rc = 0;
 	enum qeth_card_states recover_flag;
+	bool carrier_ok;
 
 	mutex_lock(&card->discipline_mutex);
 	mutex_lock(&card->conf_mutex);
@@ -2483,14 +2487,14 @@ static int __qeth_l3_set_online(struct ccwgroup_device *gdev, int recovery_mode)
 	QETH_DBF_HEX(SETUP, 2, &card, sizeof(void *));
 
 	recover_flag = card->state;
-	rc = qeth_core_hardsetup_card(card);
+	rc = qeth_core_hardsetup_card(card, &carrier_ok);
 	if (rc) {
 		QETH_DBF_TEXT_(SETUP, 2, "2err%04x", rc);
 		rc = -ENODEV;
 		goto out_remove;
 	}
 
-	rc = qeth_l3_setup_netdev(card);
+	rc = qeth_l3_setup_netdev(card, carrier_ok);
 	if (rc)
 		goto out_remove;
 
-- 
GitLab


From 125d7d30111738a5bdafacc1ed87cd3d7f32b4ea Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Fri, 2 Nov 2018 19:04:12 +0100
Subject: [PATCH 0542/1890] s390/qeth: sanitize ARP requests

The ARP_{ADD,REMOVE}_ENTRY cmd structs contain reserved fields.
Introduce a common helper that doesn't raw-copy the user-provided data
into the cmd, but only sets those fields that are strictly needed for
the command.

This also sets the correct command length for ARP_REMOVE_ENTRY.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core.h      |  5 --
 drivers/s390/net/qeth_core_main.c | 12 ++--
 drivers/s390/net/qeth_core_mpc.h  |  2 +-
 drivers/s390/net/qeth_l3_main.c   | 94 +++++++++----------------------
 4 files changed, 34 insertions(+), 79 deletions(-)

diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 90cb213b0d558..04e294d1d16d7 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -1046,11 +1046,6 @@ int qeth_configure_cq(struct qeth_card *, enum qeth_cq);
 int qeth_hw_trap(struct qeth_card *, enum qeth_diags_trap_action);
 void qeth_trace_features(struct qeth_card *);
 void qeth_close_dev(struct qeth_card *);
-int qeth_send_setassparms(struct qeth_card *, struct qeth_cmd_buffer *, __u16,
-			  long,
-			  int (*reply_cb)(struct qeth_card *,
-					  struct qeth_reply *, unsigned long),
-			  void *);
 int qeth_setassparms_cb(struct qeth_card *, struct qeth_reply *, unsigned long);
 struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct qeth_card *,
 						 enum qeth_ipa_funcs,
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index aed1a7961553c..82282b2092d87 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -5477,11 +5477,12 @@ struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct qeth_card *card,
 }
 EXPORT_SYMBOL_GPL(qeth_get_setassparms_cmd);
 
-int qeth_send_setassparms(struct qeth_card *card,
-			  struct qeth_cmd_buffer *iob, __u16 len, long data,
-			  int (*reply_cb)(struct qeth_card *,
-					  struct qeth_reply *, unsigned long),
-			  void *reply_param)
+static int qeth_send_setassparms(struct qeth_card *card,
+				 struct qeth_cmd_buffer *iob, u16 len,
+				 long data, int (*reply_cb)(struct qeth_card *,
+							    struct qeth_reply *,
+							    unsigned long),
+				 void *reply_param)
 {
 	int rc;
 	struct qeth_ipa_cmd *cmd;
@@ -5497,7 +5498,6 @@ int qeth_send_setassparms(struct qeth_card *card,
 	rc = qeth_send_ipa_cmd(card, iob, reply_cb, reply_param);
 	return rc;
 }
-EXPORT_SYMBOL_GPL(qeth_send_setassparms);
 
 int qeth_send_simple_setassparms_prot(struct qeth_card *card,
 				      enum qeth_ipa_funcs ipa_func,
diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h
index e85090467afe0..80c036acf5630 100644
--- a/drivers/s390/net/qeth_core_mpc.h
+++ b/drivers/s390/net/qeth_core_mpc.h
@@ -436,7 +436,7 @@ struct qeth_ipacmd_setassparms {
 		__u32 flags_32bit;
 		struct qeth_ipa_caps caps;
 		struct qeth_checksum_cmd chksum;
-		struct qeth_arp_cache_entry add_arp_entry;
+		struct qeth_arp_cache_entry arp_entry;
 		struct qeth_arp_query_data query_arp;
 		struct qeth_tso_start_data tso;
 		__u8 ip[16];
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index b26f7d7a2ca0d..f08b745c20073 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -1777,13 +1777,18 @@ static int qeth_l3_arp_query(struct qeth_card *card, char __user *udata)
 	return rc;
 }
 
-static int qeth_l3_arp_add_entry(struct qeth_card *card,
-				struct qeth_arp_cache_entry *entry)
+static int qeth_l3_arp_modify_entry(struct qeth_card *card,
+				    struct qeth_arp_cache_entry *entry,
+				    enum qeth_arp_process_subcmds arp_cmd)
 {
+	struct qeth_arp_cache_entry *cmd_entry;
 	struct qeth_cmd_buffer *iob;
 	int rc;
 
-	QETH_CARD_TEXT(card, 3, "arpadent");
+	if (arp_cmd == IPA_CMD_ASS_ARP_ADD_ENTRY)
+		QETH_CARD_TEXT(card, 3, "arpadd");
+	else
+		QETH_CARD_TEXT(card, 3, "arpdel");
 
 	/*
 	 * currently GuestLAN only supports the ARP assist function
@@ -1796,54 +1801,19 @@ static int qeth_l3_arp_add_entry(struct qeth_card *card,
 		return -EOPNOTSUPP;
 	}
 
-	iob = qeth_get_setassparms_cmd(card, IPA_ARP_PROCESSING,
-				       IPA_CMD_ASS_ARP_ADD_ENTRY,
-				       sizeof(struct qeth_arp_cache_entry),
-				       QETH_PROT_IPV4);
+	iob = qeth_get_setassparms_cmd(card, IPA_ARP_PROCESSING, arp_cmd,
+				       sizeof(*cmd_entry), QETH_PROT_IPV4);
 	if (!iob)
 		return -ENOMEM;
-	rc = qeth_send_setassparms(card, iob,
-				   sizeof(struct qeth_arp_cache_entry),
-				   (unsigned long) entry,
-				   qeth_setassparms_cb, NULL);
-	if (rc)
-		QETH_DBF_MESSAGE(2, "Could not add ARP entry on device %x: %#x\n",
-				 CARD_DEVID(card), rc);
-	return qeth_l3_arp_makerc(rc);
-}
-
-static int qeth_l3_arp_remove_entry(struct qeth_card *card,
-				struct qeth_arp_cache_entry *entry)
-{
-	struct qeth_cmd_buffer *iob;
-	char buf[16] = {0, };
-	int rc;
 
-	QETH_CARD_TEXT(card, 3, "arprment");
-
-	/*
-	 * currently GuestLAN only supports the ARP assist function
-	 * IPA_CMD_ASS_ARP_QUERY_INFO, but not IPA_CMD_ASS_ARP_REMOVE_ENTRY;
-	 * thus we say EOPNOTSUPP for this ARP function
-	 */
-	if (card->info.guestlan)
-		return -EOPNOTSUPP;
-	if (!qeth_is_supported(card, IPA_ARP_PROCESSING)) {
-		return -EOPNOTSUPP;
-	}
-	memcpy(buf, entry, 12);
-	iob = qeth_get_setassparms_cmd(card, IPA_ARP_PROCESSING,
-				       IPA_CMD_ASS_ARP_REMOVE_ENTRY,
-				       12,
-				       QETH_PROT_IPV4);
-	if (!iob)
-		return -ENOMEM;
-	rc = qeth_send_setassparms(card, iob,
-				   12, (unsigned long)buf,
-				   qeth_setassparms_cb, NULL);
+	cmd_entry = &__ipa_cmd(iob)->data.setassparms.data.arp_entry;
+	ether_addr_copy(cmd_entry->macaddr, entry->macaddr);
+	memcpy(cmd_entry->ipaddr, entry->ipaddr, 4);
+	rc = qeth_send_ipa_cmd(card, iob, qeth_setassparms_cb, NULL);
 	if (rc)
-		QETH_DBF_MESSAGE(2, "Could not delete ARP entry on device %x: %#x\n",
-				 CARD_DEVID(card), rc);
+		QETH_DBF_MESSAGE(2, "Could not modify (cmd: %#x) ARP entry on device %x: %#x\n",
+				 arp_cmd, CARD_DEVID(card), rc);
+
 	return qeth_l3_arp_makerc(rc);
 }
 
@@ -1875,6 +1845,7 @@ static int qeth_l3_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
 	struct qeth_card *card = dev->ml_priv;
 	struct qeth_arp_cache_entry arp_entry;
+	enum qeth_arp_process_subcmds arp_cmd;
 	int rc = 0;
 
 	switch (cmd) {
@@ -1893,27 +1864,16 @@ static int qeth_l3_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 		rc = qeth_l3_arp_query(card, rq->ifr_ifru.ifru_data);
 		break;
 	case SIOC_QETH_ARP_ADD_ENTRY:
-		if (!capable(CAP_NET_ADMIN)) {
-			rc = -EPERM;
-			break;
-		}
-		if (copy_from_user(&arp_entry, rq->ifr_ifru.ifru_data,
-				   sizeof(struct qeth_arp_cache_entry)))
-			rc = -EFAULT;
-		else
-			rc = qeth_l3_arp_add_entry(card, &arp_entry);
-		break;
 	case SIOC_QETH_ARP_REMOVE_ENTRY:
-		if (!capable(CAP_NET_ADMIN)) {
-			rc = -EPERM;
-			break;
-		}
-		if (copy_from_user(&arp_entry, rq->ifr_ifru.ifru_data,
-				   sizeof(struct qeth_arp_cache_entry)))
-			rc = -EFAULT;
-		else
-			rc = qeth_l3_arp_remove_entry(card, &arp_entry);
-		break;
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+		if (copy_from_user(&arp_entry, rq->ifr_data, sizeof(arp_entry)))
+			return -EFAULT;
+
+		arp_cmd = (cmd == SIOC_QETH_ARP_ADD_ENTRY) ?
+				IPA_CMD_ASS_ARP_ADD_ENTRY :
+				IPA_CMD_ASS_ARP_REMOVE_ENTRY;
+		return qeth_l3_arp_modify_entry(card, &arp_entry, arp_cmd);
 	case SIOC_QETH_ARP_FLUSH_CACHE:
 		if (!capable(CAP_NET_ADMIN)) {
 			rc = -EPERM;
-- 
GitLab


From 54e049c227d9968ff6a7d80aae5fec27b54d39da Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Fri, 2 Nov 2018 19:04:13 +0100
Subject: [PATCH 0543/1890] s390/qeth: report 25Gbit link speed

This adds the various identifiers for 25Gbit cards, and wires them up
into sysfs and ethtool.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core_main.c | 20 ++++++++++++++++++--
 drivers/s390/net/qeth_core_mpc.h  |  2 ++
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 82282b2092d87..4bce5ae65a55c 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -167,6 +167,8 @@ const char *qeth_get_cardname_short(struct qeth_card *card)
 				return "OSD_1000";
 			case QETH_LINK_TYPE_10GBIT_ETH:
 				return "OSD_10GIG";
+			case QETH_LINK_TYPE_25GBIT_ETH:
+				return "OSD_25GIG";
 			case QETH_LINK_TYPE_LANE_ETH100:
 				return "OSD_FE_LANE";
 			case QETH_LINK_TYPE_LANE_TR:
@@ -4432,7 +4434,8 @@ static int qeth_mdio_read(struct net_device *dev, int phy_id, int regnum)
 		rc = BMCR_FULLDPLX;
 		if ((card->info.link_type != QETH_LINK_TYPE_GBIT_ETH) &&
 		    (card->info.link_type != QETH_LINK_TYPE_OSN) &&
-		    (card->info.link_type != QETH_LINK_TYPE_10GBIT_ETH))
+		    (card->info.link_type != QETH_LINK_TYPE_10GBIT_ETH) &&
+		    (card->info.link_type != QETH_LINK_TYPE_25GBIT_ETH))
 			rc |= BMCR_SPEED100;
 		break;
 	case MII_BMSR: /* Basic mode status register */
@@ -6166,8 +6169,14 @@ static void qeth_set_cmd_adv_sup(struct ethtool_link_ksettings *cmd,
 		WARN_ON_ONCE(1);
 	}
 
-	/* fallthrough from high to low, to select all legal speeds: */
+	/* partially does fall through, to also select lower speeds */
 	switch (maxspeed) {
+	case SPEED_25000:
+		ethtool_link_ksettings_add_link_mode(cmd, supported,
+						     25000baseSR_Full);
+		ethtool_link_ksettings_add_link_mode(cmd, advertising,
+						     25000baseSR_Full);
+		break;
 	case SPEED_10000:
 		ethtool_link_ksettings_add_link_mode(cmd, supported,
 						     10000baseT_Full);
@@ -6250,6 +6259,10 @@ int qeth_core_ethtool_get_link_ksettings(struct net_device *netdev,
 		cmd->base.speed = SPEED_10000;
 		cmd->base.port = PORT_FIBRE;
 		break;
+	case QETH_LINK_TYPE_25GBIT_ETH:
+		cmd->base.speed = SPEED_25000;
+		cmd->base.port = PORT_FIBRE;
+		break;
 	default:
 		cmd->base.speed = SPEED_10;
 		cmd->base.port = PORT_TP;
@@ -6316,6 +6329,9 @@ int qeth_core_ethtool_get_link_ksettings(struct net_device *netdev,
 	case CARD_INFO_PORTS_10G:
 		cmd->base.speed = SPEED_10000;
 		break;
+	case CARD_INFO_PORTS_25G:
+		cmd->base.speed = SPEED_25000;
+		break;
 	}
 
 	return 0;
diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h
index 80c036acf5630..3e54be201b279 100644
--- a/drivers/s390/net/qeth_core_mpc.h
+++ b/drivers/s390/net/qeth_core_mpc.h
@@ -90,6 +90,7 @@ enum qeth_link_types {
 	QETH_LINK_TYPE_GBIT_ETH     = 0x03,
 	QETH_LINK_TYPE_OSN          = 0x04,
 	QETH_LINK_TYPE_10GBIT_ETH   = 0x10,
+	QETH_LINK_TYPE_25GBIT_ETH   = 0x12,
 	QETH_LINK_TYPE_LANE_ETH100  = 0x81,
 	QETH_LINK_TYPE_LANE_TR      = 0x82,
 	QETH_LINK_TYPE_LANE_ETH1000 = 0x83,
@@ -347,6 +348,7 @@ enum qeth_card_info_port_speed {
 	CARD_INFO_PORTS_100M		= 0x00000006,
 	CARD_INFO_PORTS_1G		= 0x00000007,
 	CARD_INFO_PORTS_10G		= 0x00000008,
+	CARD_INFO_PORTS_25G		= 0x0000000A,
 };
 
 /* (SET)DELIP(M) IPA stuff ***************************************************/
-- 
GitLab


From 9e4028935cca3f9ef9b6a90df9da6f1f94853536 Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@virtuozzo.com>
Date: Sat, 3 Nov 2018 16:13:17 -0400
Subject: [PATCH 0544/1890] ext4: avoid potential extra brelse in
 setup_new_flex_group_blocks()

Currently bh is set to NULL only during first iteration of for cycle,
then this pointer is not cleared after end of using.
Therefore rollback after errors can lead to extra brelse(bh) call,
decrements bh counter and later trigger an unexpected warning in __brelse()

Patch moves brelse() calls in body of cycle to exclude requirement of
brelse() call in rollback.

Fixes: 33afdcc5402d ("ext4: add a function which sets up group blocks ...")
Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org # 3.3+
---
 fs/ext4/resize.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index ebbc663d07985..c3fa30878ca83 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -605,7 +605,6 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
 		bh = bclean(handle, sb, block);
 		if (IS_ERR(bh)) {
 			err = PTR_ERR(bh);
-			bh = NULL;
 			goto out;
 		}
 		overhead = ext4_group_overhead_blocks(sb, group);
@@ -618,9 +617,9 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
 		ext4_mark_bitmap_end(EXT4_B2C(sbi, group_data[i].blocks_count),
 				     sb->s_blocksize * 8, bh->b_data);
 		err = ext4_handle_dirty_metadata(handle, NULL, bh);
+		brelse(bh);
 		if (err)
 			goto out;
-		brelse(bh);
 
 handle_ib:
 		if (bg_flags[i] & EXT4_BG_INODE_UNINIT)
@@ -635,18 +634,16 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
 		bh = bclean(handle, sb, block);
 		if (IS_ERR(bh)) {
 			err = PTR_ERR(bh);
-			bh = NULL;
 			goto out;
 		}
 
 		ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb),
 				     sb->s_blocksize * 8, bh->b_data);
 		err = ext4_handle_dirty_metadata(handle, NULL, bh);
+		brelse(bh);
 		if (err)
 			goto out;
-		brelse(bh);
 	}
-	bh = NULL;
 
 	/* Mark group tables in block bitmap */
 	for (j = 0; j < GROUP_TABLE_COUNT; j++) {
@@ -685,7 +682,6 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
 	}
 
 out:
-	brelse(bh);
 	err2 = ext4_journal_stop(handle);
 	if (err2 && !err)
 		err = err2;
-- 
GitLab


From cea5794122125bf67559906a0762186cf417099c Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@virtuozzo.com>
Date: Sat, 3 Nov 2018 16:22:10 -0400
Subject: [PATCH 0545/1890] ext4: add missing brelse() in
 set_flexbg_block_bitmap()'s error path

Fixes: 33afdcc5402d ("ext4: add a function which sets up group blocks ...")
Cc: stable@kernel.org # 3.3
Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/resize.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index c3fa30878ca83..0a4dc6217e783 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -459,16 +459,18 @@ static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle,
 
 		BUFFER_TRACE(bh, "get_write_access");
 		err = ext4_journal_get_write_access(handle, bh);
-		if (err)
+		if (err) {
+			brelse(bh);
 			return err;
+		}
 		ext4_debug("mark block bitmap %#04llx (+%llu/%u)\n",
 			   first_cluster, first_cluster - start, count2);
 		ext4_set_bits(bh->b_data, first_cluster - start, count2);
 
 		err = ext4_handle_dirty_metadata(handle, NULL, bh);
+		brelse(bh);
 		if (unlikely(err))
 			return err;
-		brelse(bh);
 	}
 
 	return 0;
-- 
GitLab


From 61a9c11e5e7a0dab5381afa5d9d4dd5ebf18f7a0 Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@virtuozzo.com>
Date: Sat, 3 Nov 2018 16:50:08 -0400
Subject: [PATCH 0546/1890] ext4: add missing brelse() add_new_gdb_meta_bg()'s
 error path

Fixes: 01f795f9e0d6 ("ext4: add online resizing support for meta_bg ...")
Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org # 3.7
---
 fs/ext4/resize.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 0a4dc6217e783..7131f35b62d9a 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -922,6 +922,7 @@ static int add_new_gdb_meta_bg(struct super_block *sb,
 				     sizeof(struct buffer_head *),
 				     GFP_NOFS);
 	if (!n_group_desc) {
+		brelse(gdb_bh);
 		err = -ENOMEM;
 		ext4_warning(sb, "not enough memory for %lu groups",
 			     gdb_num + 1);
@@ -937,8 +938,6 @@ static int add_new_gdb_meta_bg(struct super_block *sb,
 	kvfree(o_group_desc);
 	BUFFER_TRACE(gdb_bh, "get_write_access");
 	err = ext4_journal_get_write_access(handle, gdb_bh);
-	if (unlikely(err))
-		brelse(gdb_bh);
 	return err;
 }
 
-- 
GitLab


From ea0abbb648452cdb6e1734b702b6330a7448fcf8 Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@virtuozzo.com>
Date: Sat, 3 Nov 2018 17:11:19 -0400
Subject: [PATCH 0547/1890] ext4: add missing brelse() update_backups()'s error
 path

Fixes: ac27a0ec112a ("ext4: initial copy of files from ext3")
Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org # 2.6.19
---
 fs/ext4/resize.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 7131f35b62d9a..3df326ee6d506 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1121,8 +1121,10 @@ static void update_backups(struct super_block *sb, sector_t blk_off, char *data,
 			   backup_block, backup_block -
 			   ext4_group_first_block_no(sb, group));
 		BUFFER_TRACE(bh, "get_write_access");
-		if ((err = ext4_journal_get_write_access(handle, bh)))
+		if ((err = ext4_journal_get_write_access(handle, bh))) {
+			brelse(bh);
 			break;
+		}
 		lock_buffer(bh);
 		memcpy(bh->b_data, data, size);
 		if (rest)
-- 
GitLab


From 3e59020abf0f88182730527ee5b862e786eb485a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 31 Oct 2018 08:39:12 -0700
Subject: [PATCH 0548/1890] net: bql: add __netdev_tx_sent_queue()

When qdisc_run() tries to use BQL budget to bulk-dequeue a batch
of packets, GSO can later transform this list in another list
of skbs, and each skb is sent to device ndo_start_xmit(),
one at a time, with skb->xmit_more being set to one but
for last skb.

Problem is that very often, BQL limit is hit in the middle of
the packet train, forcing dev_hard_start_xmit() to stop the
bulk send and requeue the end of the list.

BQL role is to avoid head of line blocking, making sure
a qdisc can deliver high priority packets before low priority ones.

But there is no way requeued packets can be bypassed by fresh
packets in the qdisc.

Aborting the bulk send increases TX softirqs, and hot cache
lines (after skb_segment()) are wasted.

Note that for TSO packets, we never split a packet in the middle
because of BQL limit being hit.

Drivers should be able to update BQL counters without
flipping/caring about BQL status, if the current skb
has xmit_more set.

Upper layers are ultimately responsible to stop sending another
packet train when BQL limit is hit.

Code template in a driver might look like the following :

	send_doorbell = __netdev_tx_sent_queue(tx_queue, nr_bytes, skb->xmit_more);

Note that __netdev_tx_sent_queue() use is not mandatory,
since following patch will change dev_hard_start_xmit()
to not care about BQL status.

But it is highly recommended so that xmit_more full benefits
can be reached (less doorbells sent, and less atomic operations as well)

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index dc1d9ed33b319..857f8abf7b91b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3190,6 +3190,26 @@ static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue,
 #endif
 }
 
+/* Variant of netdev_tx_sent_queue() for drivers that are aware
+ * that they should not test BQL status themselves.
+ * We do want to change __QUEUE_STATE_STACK_XOFF only for the last
+ * skb of a batch.
+ * Returns true if the doorbell must be used to kick the NIC.
+ */
+static inline bool __netdev_tx_sent_queue(struct netdev_queue *dev_queue,
+					  unsigned int bytes,
+					  bool xmit_more)
+{
+	if (xmit_more) {
+#ifdef CONFIG_BQL
+		dql_queued(&dev_queue->dql, bytes);
+#endif
+		return netif_tx_queue_stopped(dev_queue);
+	}
+	netdev_tx_sent_queue(dev_queue, bytes);
+	return true;
+}
+
 /**
  * 	netdev_sent_queue - report the number of bytes queued to hardware
  * 	@dev: network device
-- 
GitLab


From fe60faa5063822f2d555f4f326c7dd72a60929bf Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 31 Oct 2018 08:39:13 -0700
Subject: [PATCH 0549/1890] net: do not abort bulk send on BQL status

Before calling dev_hard_start_xmit(), upper layers tried
to cook optimal skb list based on BQL budget.

Problem is that GSO packets can end up comsuming more than
the BQL budget.

Breaking the loop is not useful, since requeued packets
are ahead of any packets still in the qdisc.

It is also more expensive, since next TX completion will
push these packets later, while skbs are not in cpu caches.

It is also a behavior difference with TSO packets, that can
break the BQL limit by a large amount.

Note that drivers should use __netdev_tx_sent_queue()
in order to have optimal xmit_more support, and avoid
useless atomic operations as shown in the following patch.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 77d43ae2a7bbe..0ffcbdd55fa9e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3272,7 +3272,7 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *first, struct net_device *de
 		}
 
 		skb = next;
-		if (netif_xmit_stopped(txq) && skb) {
+		if (netif_tx_queue_stopped(txq) && skb) {
 			rc = NETDEV_TX_BUSY;
 			break;
 		}
-- 
GitLab


From c29734443511aa3c53844e1941805fc761aa80ba Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 31 Oct 2018 08:39:14 -0700
Subject: [PATCH 0550/1890] net/mlx4_en: use __netdev_tx_sent_queue()

doorbell only depends on xmit_more and netif_tx_queue_stopped()

Using __netdev_tx_sent_queue() avoids messing with BQL stop flag,
and is more generic.

This patch increases performance on GSO workload by keeping
doorbells to the minimum required.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Tariq Toukan <tariqt@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_tx.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 1857ee0f0871d..6f5153afcab4d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -1006,7 +1006,6 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 		ring->packets++;
 	}
 	ring->bytes += tx_info->nr_bytes;
-	netdev_tx_sent_queue(ring->tx_queue, tx_info->nr_bytes);
 	AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, skb->len);
 
 	if (tx_info->inl)
@@ -1044,7 +1043,10 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 		netif_tx_stop_queue(ring->tx_queue);
 		ring->queue_stopped++;
 	}
-	send_doorbell = !skb->xmit_more || netif_xmit_stopped(ring->tx_queue);
+
+	send_doorbell = __netdev_tx_sent_queue(ring->tx_queue,
+					       tx_info->nr_bytes,
+					       skb->xmit_more);
 
 	real_size = (real_size / 16) & 0x3f;
 
-- 
GitLab


From e8ccbb7d2f53c62e14b889faaa3f6f809b657278 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Fri, 2 Nov 2018 17:47:48 +0800
Subject: [PATCH 0551/1890] net: hns3: Fix for out-of-bounds access when
 setting pfc back pressure

The vport should be initialized to hdev->vport for each bp group,
otherwise it will cause out-of-bounds access and bp setting not
correct problem.

[   35.254124] BUG: KASAN: slab-out-of-bounds in hclge_pause_setup_hw+0x2a0/0x3f8 [hclge]
[   35.254126] Read of size 2 at addr ffff803b6651581a by task kworker/0:1/14

[   35.254132] CPU: 0 PID: 14 Comm: kworker/0:1 Not tainted 4.19.0-rc7-hulk+ #85
[   35.254133] Hardware name: Huawei D06/D06, BIOS Hisilicon D06 UEFI RC0 - B052 (V0.52) 09/14/2018
[   35.254141] Workqueue: events work_for_cpu_fn
[   35.254144] Call trace:
[   35.254147]  dump_backtrace+0x0/0x2f0
[   35.254149]  show_stack+0x24/0x30
[   35.254154]  dump_stack+0x110/0x184
[   35.254157]  print_address_description+0x168/0x2b0
[   35.254160]  kasan_report+0x184/0x310
[   35.254162]  __asan_load2+0x7c/0xa0
[   35.254170]  hclge_pause_setup_hw+0x2a0/0x3f8 [hclge]
[   35.254177]  hclge_tm_init_hw+0x794/0x9f0 [hclge]
[   35.254184]  hclge_tm_schd_init+0x48/0x58 [hclge]
[   35.254191]  hclge_init_ae_dev+0x778/0x1168 [hclge]
[   35.254196]  hnae3_register_ae_dev+0x14c/0x298 [hnae3]
[   35.254206]  hns3_probe+0x88/0xa8 [hns3]
[   35.254210]  local_pci_probe+0x7c/0xf0
[   35.254212]  work_for_cpu_fn+0x34/0x50
[   35.254214]  process_one_work+0x4d4/0xa38
[   35.254216]  worker_thread+0x55c/0x8d8
[   35.254219]  kthread+0x1b0/0x1b8
[   35.254222]  ret_from_fork+0x10/0x1c

[   35.254224] The buggy address belongs to the page:
[   35.254228] page:ffff7e00ed994400 count:1 mapcount:0 mapping:0000000000000000 index:0x0 compound_mapcount: 0
[   35.273835] flags: 0xfffff8000008000(head)
[   35.282007] raw: 0fffff8000008000 dead000000000100 dead000000000200 0000000000000000
[   35.282010] raw: 0000000000000000 0000000000000000 00000001ffffffff 0000000000000000
[   35.282012] page dumped because: kasan: bad access detected

[   35.282014] Memory state around the buggy address:
[   35.282017]  ffff803b66515700: fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe
[   35.282019]  ffff803b66515780: fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe
[   35.282021] >ffff803b66515800: fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe
[   35.282022]                             ^
[   35.282024]  ffff803b66515880: fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe
[   35.282026]  ffff803b66515900: fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe
[   35.282028] ==================================================================
[   35.282029] Disabling lock debugging due to kernel taint
[   35.282747] hclge driver initialization finished.

Fixes: 67bf2541f4b9 ("net: hns3: Fixes the back pressure setting when sriov is enabled")
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index aa5cb9834d73a..494e562fe8c7e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -1168,14 +1168,14 @@ static int hclge_pfc_setup_hw(struct hclge_dev *hdev)
  */
 static int hclge_bp_setup_hw(struct hclge_dev *hdev, u8 tc)
 {
-	struct hclge_vport *vport = hdev->vport;
-	u32 i, k, qs_bitmap;
-	int ret;
+	int i;
 
 	for (i = 0; i < HCLGE_BP_GRP_NUM; i++) {
-		qs_bitmap = 0;
+		u32 qs_bitmap = 0;
+		int k, ret;
 
 		for (k = 0; k < hdev->num_alloc_vport; k++) {
+			struct hclge_vport *vport = &hdev->vport[k];
 			u16 qs_id = vport->qs_offset + tc;
 			u8 grp, sub_grp;
 
@@ -1185,8 +1185,6 @@ static int hclge_bp_setup_hw(struct hclge_dev *hdev, u8 tc)
 						  HCLGE_BP_SUB_GRP_ID_S);
 			if (i == grp)
 				qs_bitmap |= (1 << sub_grp);
-
-			vport++;
 		}
 
 		ret = hclge_tm_qs_bp_cfg(hdev, tc, i, qs_bitmap);
-- 
GitLab


From 993f0b0510dad98b4e6e39506834dab0d13fd539 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 2 Nov 2018 14:22:25 +0100
Subject: [PATCH 0552/1890] sched/topology: Fix off by one bug

With the addition of the NUMA identity level, we increased @level by
one and will run off the end of the array in the distance sort loop.

Fixed: 051f3ca02e46 ("sched/topology: Introduce NUMA identity node sched domain")
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/topology.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 9d74371e4aad8..8d7f15ba59163 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -1337,7 +1337,7 @@ void sched_init_numa(void)
 	int level = 0;
 	int i, j, k;
 
-	sched_domains_numa_distance = kzalloc(sizeof(int) * nr_node_ids, GFP_KERNEL);
+	sched_domains_numa_distance = kzalloc(sizeof(int) * (nr_node_ids + 1), GFP_KERNEL);
 	if (!sched_domains_numa_distance)
 		return;
 
-- 
GitLab


From b987ffc18fb3b3b76b059aa9e372dbee26f7c4f2 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 2 Nov 2018 14:26:53 +0100
Subject: [PATCH 0553/1890] x86/qspinlock: Fix compile error

With a compiler that has asm-goto but not asm-cc-output and
CONFIG_PROFILE_ALL_BRANCHES=y we get a compiler error:

  arch/x86/include/asm/rmwcc.h:23:17: error: jump into statement expression

Fix this by writing the if() as a boolean multiplication instead.

Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-kernel@vger.kernel.org
Fixes: 7aa54be29765 ("locking/qspinlock, x86: Provide liveness guarantee")
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/qspinlock.h | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h
index 87623c6b13db5..bd5ac6cc37db5 100644
--- a/arch/x86/include/asm/qspinlock.h
+++ b/arch/x86/include/asm/qspinlock.h
@@ -13,12 +13,15 @@
 #define queued_fetch_set_pending_acquire queued_fetch_set_pending_acquire
 static __always_inline u32 queued_fetch_set_pending_acquire(struct qspinlock *lock)
 {
-	u32 val = 0;
-
-	if (GEN_BINARY_RMWcc(LOCK_PREFIX "btsl", lock->val.counter, c,
-			     "I", _Q_PENDING_OFFSET))
-		val |= _Q_PENDING_VAL;
+	u32 val;
 
+	/*
+	 * We can't use GEN_BINARY_RMWcc() inside an if() stmt because asm goto
+	 * and CONFIG_PROFILE_ALL_BRANCHES=y results in a label inside a
+	 * statement expression, which GCC doesn't like.
+	 */
+	val = GEN_BINARY_RMWcc(LOCK_PREFIX "btsl", lock->val.counter, c,
+			       "I", _Q_PENDING_OFFSET) * _Q_PENDING_VAL;
 	val |= atomic_read(&lock->val) & ~_Q_PENDING_MASK;
 
 	return val;
-- 
GitLab


From 40fa3780bac2b654edf23f6b13f4e2dd550aea10 Mon Sep 17 00:00:00 2001
From: Valentin Schneider <valentin.schneider@arm.com>
Date: Tue, 23 Oct 2018 14:37:31 +0100
Subject: [PATCH 0554/1890] sched/core: Take the hotplug lock in
 sched_init_smp()

When running on linux-next (8c60c36d0b8c ("Add linux-next specific files
for 20181019")) + CONFIG_PROVE_LOCKING=y on a big.LITTLE system (e.g.
Juno or HiKey960), we get the following report:

 [    0.748225] Call trace:
 [    0.750685]  lockdep_assert_cpus_held+0x30/0x40
 [    0.755236]  static_key_enable_cpuslocked+0x20/0xc8
 [    0.760137]  build_sched_domains+0x1034/0x1108
 [    0.764601]  sched_init_domains+0x68/0x90
 [    0.768628]  sched_init_smp+0x30/0x80
 [    0.772309]  kernel_init_freeable+0x278/0x51c
 [    0.776685]  kernel_init+0x10/0x108
 [    0.780190]  ret_from_fork+0x10/0x18

The static_key in question is 'sched_asym_cpucapacity' introduced by
commit:

  df054e8445a4 ("sched/topology: Add static_key for asymmetric CPU capacity optimizations")

In this particular case, we enable it because smp_prepare_cpus() will
end up fetching the capacity-dmips-mhz entry from the devicetree,
so we already have some asymmetry detected when entering sched_init_smp().

This didn't get detected in tip/sched/core because we were missing:

  commit cb538267ea1e ("jump_label/lockdep: Assert we hold the hotplug lock for _cpuslocked() operations")

Calls to build_sched_domains() post sched_init_smp() will hold the
hotplug lock, it just so happens that this very first call is a
special case. As stated by a comment in sched_init_smp(), "There's no
userspace yet to cause hotplug operations" so this is a harmless
warning.

However, to both respect the semantics of underlying
callees and make lockdep happy, take the hotplug lock in
sched_init_smp(). This also satisfies the comment atop
sched_init_domains() that says "Callers must hold the hotplug lock".

Reported-by: Sudeep Holla <sudeep.holla@arm.com>
Tested-by: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Valentin Schneider <valentin.schneider@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Dietmar.Eggemann@arm.com
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: morten.rasmussen@arm.com
Cc: quentin.perret@arm.com
Link: http://lkml.kernel.org/r/1540301851-3048-1-git-send-email-valentin.schneider@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/core.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fd2fce8a001be..02a20ef196a65 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5859,11 +5859,14 @@ void __init sched_init_smp(void)
 	/*
 	 * There's no userspace yet to cause hotplug operations; hence all the
 	 * CPU masks are stable and all blatant races in the below code cannot
-	 * happen.
+	 * happen. The hotplug lock is nevertheless taken to satisfy lockdep,
+	 * but there won't be any contention on it.
 	 */
+	cpus_read_lock();
 	mutex_lock(&sched_domains_mutex);
 	sched_init_domains(cpu_active_mask);
 	mutex_unlock(&sched_domains_mutex);
+	cpus_read_unlock();
 
 	/* Move init over to a non-isolated CPU */
 	if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_FLAG_DOMAIN)) < 0)
-- 
GitLab


From 92619210529a6b77b2cbedbadba3ff5eaa6e28ed Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 2 Nov 2018 16:36:24 +0100
Subject: [PATCH 0555/1890] qed: fix link config error handling

gcc-8 notices that qed_mcp_get_transceiver_data() may fail to
return a result to the caller:

drivers/net/ethernet/qlogic/qed/qed_mcp.c: In function 'qed_mcp_trans_speed_mask':
drivers/net/ethernet/qlogic/qed/qed_mcp.c:1955:2: error: 'transceiver_type' may be used uninitialized in this function [-Werror=maybe-uninitialized]

When an error is returned by qed_mcp_get_transceiver_data(), we
should propagate that to the caller of qed_mcp_trans_speed_mask()
rather than continuing with uninitialized data.

Fixes: c56a8be7e7aa ("qed: Add supported link and advertise link to display in ethtool.")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_mcp.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index f40f654398a07..a96364df43203 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -1944,9 +1944,12 @@ int qed_mcp_trans_speed_mask(struct qed_hwfn *p_hwfn,
 			     struct qed_ptt *p_ptt, u32 *p_speed_mask)
 {
 	u32 transceiver_type, transceiver_state;
+	int ret;
 
-	qed_mcp_get_transceiver_data(p_hwfn, p_ptt, &transceiver_state,
-				     &transceiver_type);
+	ret = qed_mcp_get_transceiver_data(p_hwfn, p_ptt, &transceiver_state,
+					   &transceiver_type);
+	if (ret)
+		return ret;
 
 	if (qed_is_transceiver_ready(transceiver_state, transceiver_type) ==
 				     false)
-- 
GitLab


From a277d516de5f498c91d91189717ef7e01102ad27 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 2 Nov 2018 16:36:55 +0100
Subject: [PATCH 0556/1890] openvswitch: fix linking without
 CONFIG_NF_CONNTRACK_LABELS

When CONFIG_CC_OPTIMIZE_FOR_DEBUGGING is enabled, the compiler
fails to optimize out a dead code path, which leads to a link failure:

net/openvswitch/conntrack.o: In function `ovs_ct_set_labels':
conntrack.c:(.text+0x2e60): undefined reference to `nf_connlabels_replace'

In this configuration, we can take a shortcut, and completely
remove the contrack label code. This may also help the regular
optimization.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/conntrack.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 6bec37ab44727..a4660c48ff014 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -1203,7 +1203,8 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
 					 &info->labels.mask);
 		if (err)
 			return err;
-	} else if (labels_nonzero(&info->labels.mask)) {
+	} else if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
+		   labels_nonzero(&info->labels.mask)) {
 		err = ovs_ct_set_labels(ct, key, &info->labels.value,
 					&info->labels.mask);
 		if (err)
-- 
GitLab


From 96801552f846460fe9ac10f1b189602992f004e1 Mon Sep 17 00:00:00 2001
From: Shalom Toledo <shalomt@mellanox.com>
Date: Fri, 2 Nov 2018 19:49:15 +0000
Subject: [PATCH 0557/1890] mlxsw: spectrum: Fix IP2ME CPU policer
 configuration

The CPU policer used to police packets being trapped via a local route
(IP2ME) was incorrectly configured to police based on bytes per second
instead of packets per second.

Change the policer to police based on packets per second and avoid
packet loss under certain circumstances.

Fixes: 9148e7cf73ce ("mlxsw: spectrum: Add policers for trap groups")
Signed-off-by: Shalom Toledo <shalomt@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index a2df12b79f8e9..9bec940330a45 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -3568,7 +3568,6 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core)
 			burst_size = 7;
 			break;
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME:
-			is_bytes = true;
 			rate = 4 * 1024;
 			burst_size = 4;
 			break;
-- 
GitLab


From fd82d61ba142f0b83463e47064bf5460aac57b6e Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sat, 3 Nov 2018 13:59:45 +0800
Subject: [PATCH 0558/1890] sctp: fix strchange_flags name for Stream Change
 Event

As defined in rfc6525#section-6.1.3, SCTP_STREAM_CHANGE_DENIED
and SCTP_STREAM_CHANGE_FAILED should be used instead of
SCTP_ASSOC_CHANGE_DENIED and SCTP_ASSOC_CHANGE_FAILED.

To keep the compatibility, fix it by adding two macros.

Fixes: b444153fb5a6 ("sctp: add support for generating add stream change event notification")
Reported-by: Jianwen Ji <jiji@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/sctp.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index 34dd3d497f2cc..680ecc3bf2a98 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -568,6 +568,8 @@ struct sctp_assoc_reset_event {
 
 #define SCTP_ASSOC_CHANGE_DENIED	0x0004
 #define SCTP_ASSOC_CHANGE_FAILED	0x0008
+#define SCTP_STREAM_CHANGE_DENIED	SCTP_ASSOC_CHANGE_DENIED
+#define SCTP_STREAM_CHANGE_FAILED	SCTP_ASSOC_CHANGE_FAILED
 struct sctp_stream_change_event {
 	__u16 strchange_type;
 	__u16 strchange_flags;
-- 
GitLab


From 12480e3b16982c4026de10dd8155823219cd6391 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sat, 3 Nov 2018 14:01:31 +0800
Subject: [PATCH 0559/1890] sctp: define SCTP_SS_DEFAULT for Stream schedulers

According to rfc8260#section-4.3.2, SCTP_SS_DEFAULT is required to
defined as SCTP_SS_FCFS or SCTP_SS_RR.

SCTP_SS_FCFS is used for SCTP_SS_DEFAULT's value in this patch.

Fixes: 5bbbbe32a431 ("sctp: introduce stream scheduler foundations")
Reported-by: Jianwen Ji <jiji@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/sctp.h | 1 +
 net/sctp/outqueue.c       | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index 680ecc3bf2a98..c81feb373d3ea 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -1153,6 +1153,7 @@ struct sctp_add_streams {
 /* SCTP Stream schedulers */
 enum sctp_sched_type {
 	SCTP_SS_FCFS,
+	SCTP_SS_DEFAULT = SCTP_SS_FCFS,
 	SCTP_SS_PRIO,
 	SCTP_SS_RR,
 	SCTP_SS_MAX = SCTP_SS_RR
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 9cb854b05342e..c37e1c2dec9d4 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -212,7 +212,7 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q)
 	INIT_LIST_HEAD(&q->retransmit);
 	INIT_LIST_HEAD(&q->sacked);
 	INIT_LIST_HEAD(&q->abandoned);
-	sctp_sched_set_sched(asoc, SCTP_SS_FCFS);
+	sctp_sched_set_sched(asoc, SCTP_SS_DEFAULT);
 }
 
 /* Free the outqueue structure and any related pending chunks.
-- 
GitLab


From 35b69a420bfb56b7b74cb635ea903db05e357bec Mon Sep 17 00:00:00 2001
From: Michael Kelley <mikelley@microsoft.com>
Date: Sun, 4 Nov 2018 03:48:54 +0000
Subject: [PATCH 0560/1890] clockevents/drivers/i8253: Add support for PIT
 shutdown quirk

Add support for platforms where pit_shutdown() doesn't work because of a
quirk in the PIT emulation. On these platforms setting the counter register
to zero causes the PIT to start running again, negating the shutdown.

Provide a global variable that controls whether the counter register is
zero'ed, which platform specific code can override.

Signed-off-by: Michael Kelley <mikelley@microsoft.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: "gregkh@linuxfoundation.org" <gregkh@linuxfoundation.org>
Cc: "devel@linuxdriverproject.org" <devel@linuxdriverproject.org>
Cc: "daniel.lezcano@linaro.org" <daniel.lezcano@linaro.org>
Cc: "virtualization@lists.linux-foundation.org" <virtualization@lists.linux-foundation.org>
Cc: "jgross@suse.com" <jgross@suse.com>
Cc: "akataria@vmware.com" <akataria@vmware.com>
Cc: "olaf@aepfle.de" <olaf@aepfle.de>
Cc: "apw@canonical.com" <apw@canonical.com>
Cc: vkuznets <vkuznets@redhat.com>
Cc: "jasowang@redhat.com" <jasowang@redhat.com>
Cc: "marcelo.cerri@canonical.com" <marcelo.cerri@canonical.com>
Cc: KY Srinivasan <kys@microsoft.com>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/1541303219-11142-2-git-send-email-mikelley@microsoft.com
---
 drivers/clocksource/i8253.c | 14 ++++++++++++--
 include/linux/i8253.h       |  1 +
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/clocksource/i8253.c b/drivers/clocksource/i8253.c
index 9c38895542f4a..d4350bb10b83a 100644
--- a/drivers/clocksource/i8253.c
+++ b/drivers/clocksource/i8253.c
@@ -20,6 +20,13 @@
 DEFINE_RAW_SPINLOCK(i8253_lock);
 EXPORT_SYMBOL(i8253_lock);
 
+/*
+ * Handle PIT quirk in pit_shutdown() where zeroing the counter register
+ * restarts the PIT, negating the shutdown. On platforms with the quirk,
+ * platform specific code can set this to false.
+ */
+bool i8253_clear_counter_on_shutdown __ro_after_init = true;
+
 #ifdef CONFIG_CLKSRC_I8253
 /*
  * Since the PIT overflows every tick, its not very useful
@@ -109,8 +116,11 @@ static int pit_shutdown(struct clock_event_device *evt)
 	raw_spin_lock(&i8253_lock);
 
 	outb_p(0x30, PIT_MODE);
-	outb_p(0, PIT_CH0);
-	outb_p(0, PIT_CH0);
+
+	if (i8253_clear_counter_on_shutdown) {
+		outb_p(0, PIT_CH0);
+		outb_p(0, PIT_CH0);
+	}
 
 	raw_spin_unlock(&i8253_lock);
 	return 0;
diff --git a/include/linux/i8253.h b/include/linux/i8253.h
index e6bb36a97519b..8336b2f6f8346 100644
--- a/include/linux/i8253.h
+++ b/include/linux/i8253.h
@@ -21,6 +21,7 @@
 #define PIT_LATCH	((PIT_TICK_RATE + HZ/2) / HZ)
 
 extern raw_spinlock_t i8253_lock;
+extern bool i8253_clear_counter_on_shutdown;
 extern struct clock_event_device i8253_clockevent;
 extern void clockevent_i8253_init(bool oneshot);
 
-- 
GitLab


From 1de72c706488b7be664a601cf3843bd01e327e58 Mon Sep 17 00:00:00 2001
From: Michael Kelley <mikelley@microsoft.com>
Date: Sun, 4 Nov 2018 03:48:57 +0000
Subject: [PATCH 0561/1890] x86/hyper-v: Enable PIT shutdown quirk

Hyper-V emulation of the PIT has a quirk such that the normal PIT shutdown
path doesn't work, because clearing the counter register restarts the
timer.

Disable the counter clearing on PIT shutdown.

Signed-off-by: Michael Kelley <mikelley@microsoft.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: "gregkh@linuxfoundation.org" <gregkh@linuxfoundation.org>
Cc: "devel@linuxdriverproject.org" <devel@linuxdriverproject.org>
Cc: "daniel.lezcano@linaro.org" <daniel.lezcano@linaro.org>
Cc: "virtualization@lists.linux-foundation.org" <virtualization@lists.linux-foundation.org>
Cc: "jgross@suse.com" <jgross@suse.com>
Cc: "akataria@vmware.com" <akataria@vmware.com>
Cc: "olaf@aepfle.de" <olaf@aepfle.de>
Cc: "apw@canonical.com" <apw@canonical.com>
Cc: vkuznets <vkuznets@redhat.com>
Cc: "jasowang@redhat.com" <jasowang@redhat.com>
Cc: "marcelo.cerri@canonical.com" <marcelo.cerri@canonical.com>
Cc: KY Srinivasan <kys@microsoft.com>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/1541303219-11142-3-git-send-email-mikelley@microsoft.com
---
 arch/x86/kernel/cpu/mshyperv.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 1c72f3819eb12..e81a2db42df7b 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -20,6 +20,7 @@
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/kexec.h>
+#include <linux/i8253.h>
 #include <asm/processor.h>
 #include <asm/hypervisor.h>
 #include <asm/hyperv-tlfs.h>
@@ -295,6 +296,16 @@ static void __init ms_hyperv_init_platform(void)
 	if (efi_enabled(EFI_BOOT))
 		x86_platform.get_nmi_reason = hv_get_nmi_reason;
 
+	/*
+	 * Hyper-V VMs have a PIT emulation quirk such that zeroing the
+	 * counter register during PIT shutdown restarts the PIT. So it
+	 * continues to interrupt @18.2 HZ. Setting i8253_clear_counter
+	 * to false tells pit_shutdown() not to zero the counter so that
+	 * the PIT really is shutdown. Generation 2 VMs don't have a PIT,
+	 * and setting this value has no effect.
+	 */
+	i8253_clear_counter_on_shutdown = false;
+
 #if IS_ENABLED(CONFIG_HYPERV)
 	/*
 	 * Setup the hook to get control post apic initialization.
-- 
GitLab


From 651022382c7f8da46cb4872a545ee1da6d097d2a Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 4 Nov 2018 15:37:52 -0800
Subject: [PATCH 0562/1890] Linux 4.20-rc1

---
 Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index bce41f4180fcb..9fce8b91c15f6 100644
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0
 VERSION = 4
-PATCHLEVEL = 19
+PATCHLEVEL = 20
 SUBLEVEL = 0
-EXTRAVERSION =
+EXTRAVERSION = -rc1
 NAME = "People's Front"
 
 # *DOCUMENTATION*
-- 
GitLab


From 74e3512731bd5c9673176425a76a7cc5efa8ddb6 Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Wed, 24 Oct 2018 22:37:13 +0300
Subject: [PATCH 0563/1890] hwmon: (core) Fix double-free in
 __hwmon_device_register()

Fix double-free that happens when thermal zone setup fails, see KASAN log
below.

==================================================================
BUG: KASAN: double-free or invalid-free in __hwmon_device_register+0x5dc/0xa7c

CPU: 0 PID: 132 Comm: kworker/0:2 Tainted: G    B             4.19.0-rc8-next-20181016-00042-gb52cd80401e9-dirty #41
Hardware name: NVIDIA Tegra SoC (Flattened Device Tree)
Workqueue: events deferred_probe_work_func
Backtrace:
[<c0110540>] (dump_backtrace) from [<c0110944>] (show_stack+0x20/0x24)
[<c0110924>] (show_stack) from [<c105cb08>] (dump_stack+0x9c/0xb0)
[<c105ca6c>] (dump_stack) from [<c02fdaec>] (print_address_description+0x68/0x250)
[<c02fda84>] (print_address_description) from [<c02fd4ac>] (kasan_report_invalid_free+0x68/0x88)
[<c02fd444>] (kasan_report_invalid_free) from [<c02fc85c>] (__kasan_slab_free+0x1f4/0x200)
[<c02fc668>] (__kasan_slab_free) from [<c02fd0c0>] (kasan_slab_free+0x14/0x18)
[<c02fd0ac>] (kasan_slab_free) from [<c02f9c6c>] (kfree+0x90/0x294)
[<c02f9bdc>] (kfree) from [<c0b41bbc>] (__hwmon_device_register+0x5dc/0xa7c)
[<c0b415e0>] (__hwmon_device_register) from [<c0b421e8>] (hwmon_device_register_with_info+0xa0/0xa8)
[<c0b42148>] (hwmon_device_register_with_info) from [<c0b42324>] (devm_hwmon_device_register_with_info+0x74/0xb4)
[<c0b422b0>] (devm_hwmon_device_register_with_info) from [<c0b4481c>] (lm90_probe+0x414/0x578)
[<c0b44408>] (lm90_probe) from [<c0aeeff4>] (i2c_device_probe+0x35c/0x384)
[<c0aeec98>] (i2c_device_probe) from [<c08776cc>] (really_probe+0x290/0x3e4)
[<c087743c>] (really_probe) from [<c0877a2c>] (driver_probe_device+0x80/0x1c4)
[<c08779ac>] (driver_probe_device) from [<c0877da8>] (__device_attach_driver+0x104/0x11c)
[<c0877ca4>] (__device_attach_driver) from [<c0874dd8>] (bus_for_each_drv+0xa4/0xc8)
[<c0874d34>] (bus_for_each_drv) from [<c08773b0>] (__device_attach+0xf0/0x15c)
[<c08772c0>] (__device_attach) from [<c0877e24>] (device_initial_probe+0x1c/0x20)
[<c0877e08>] (device_initial_probe) from [<c08762f4>] (bus_probe_device+0xdc/0xec)
[<c0876218>] (bus_probe_device) from [<c0876a08>] (deferred_probe_work_func+0xa8/0xd4)
[<c0876960>] (deferred_probe_work_func) from [<c01527c4>] (process_one_work+0x3dc/0x96c)
[<c01523e8>] (process_one_work) from [<c01541e0>] (worker_thread+0x4ec/0x8bc)
[<c0153cf4>] (worker_thread) from [<c015b238>] (kthread+0x230/0x240)
[<c015b008>] (kthread) from [<c01010bc>] (ret_from_fork+0x14/0x38)
Exception stack(0xcf743fb0 to 0xcf743ff8)
3fa0:                                     00000000 00000000 00000000 00000000
3fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
3fe0: 00000000 00000000 00000000 00000000 00000013 00000000

Allocated by task 132:
 kasan_kmalloc.part.1+0x58/0xf4
 kasan_kmalloc+0x90/0xa4
 kmem_cache_alloc_trace+0x90/0x2a0
 __hwmon_device_register+0xbc/0xa7c
 hwmon_device_register_with_info+0xa0/0xa8
 devm_hwmon_device_register_with_info+0x74/0xb4
 lm90_probe+0x414/0x578
 i2c_device_probe+0x35c/0x384
 really_probe+0x290/0x3e4
 driver_probe_device+0x80/0x1c4
 __device_attach_driver+0x104/0x11c
 bus_for_each_drv+0xa4/0xc8
 __device_attach+0xf0/0x15c
 device_initial_probe+0x1c/0x20
 bus_probe_device+0xdc/0xec
 deferred_probe_work_func+0xa8/0xd4
 process_one_work+0x3dc/0x96c
 worker_thread+0x4ec/0x8bc
 kthread+0x230/0x240
 ret_from_fork+0x14/0x38
   (null)

Freed by task 132:
 __kasan_slab_free+0x12c/0x200
 kasan_slab_free+0x14/0x18
 kfree+0x90/0x294
 hwmon_dev_release+0x1c/0x20
 device_release+0x4c/0xe8
 kobject_put+0xac/0x11c
 device_unregister+0x2c/0x30
 __hwmon_device_register+0xa58/0xa7c
 hwmon_device_register_with_info+0xa0/0xa8
 devm_hwmon_device_register_with_info+0x74/0xb4
 lm90_probe+0x414/0x578
 i2c_device_probe+0x35c/0x384
 really_probe+0x290/0x3e4
 driver_probe_device+0x80/0x1c4
 __device_attach_driver+0x104/0x11c
 bus_for_each_drv+0xa4/0xc8
 __device_attach+0xf0/0x15c
 device_initial_probe+0x1c/0x20
 bus_probe_device+0xdc/0xec
 deferred_probe_work_func+0xa8/0xd4
 process_one_work+0x3dc/0x96c
 worker_thread+0x4ec/0x8bc
 kthread+0x230/0x240
 ret_from_fork+0x14/0x38
   (null)

Cc: <stable@vger.kernel.org> # v4.15+
Fixes: 47c332deb8e8 ("hwmon: Deal with errors from the thermal subsystem")
Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/hwmon.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c
index 975c951698846..84f61cec6319c 100644
--- a/drivers/hwmon/hwmon.c
+++ b/drivers/hwmon/hwmon.c
@@ -649,8 +649,10 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata,
 				if (info[i]->config[j] & HWMON_T_INPUT) {
 					err = hwmon_thermal_add_sensor(dev,
 								hwdev, j);
-					if (err)
-						goto free_device;
+					if (err) {
+						device_unregister(hdev);
+						goto ida_remove;
+					}
 				}
 			}
 		}
@@ -658,8 +660,6 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata,
 
 	return hdev;
 
-free_device:
-	device_unregister(hdev);
 free_hwmon:
 	kfree(hwdev);
 ida_remove:
-- 
GitLab


From e3e61f01d755188cb6c2dcf5a244b9c0937c258e Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Sun, 28 Oct 2018 18:16:51 +0100
Subject: [PATCH 0564/1890] hwmon: (ibmpowernv) Remove bogus __init annotations

If gcc decides not to inline make_sensor_label():

    WARNING: vmlinux.o(.text+0x4df549c): Section mismatch in reference from the function .create_device_attrs() to the function .init.text:.make_sensor_label()
    The function .create_device_attrs() references
    the function __init .make_sensor_label().
    This is often because .create_device_attrs lacks a __init
    annotation or the annotation of .make_sensor_label is wrong.

As .probe() can be called after freeing of __init memory, all __init
annotiations in the driver are bogus, and should be removed.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/ibmpowernv.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/hwmon/ibmpowernv.c b/drivers/hwmon/ibmpowernv.c
index 0ccca87f52719..293dd1c6c7b36 100644
--- a/drivers/hwmon/ibmpowernv.c
+++ b/drivers/hwmon/ibmpowernv.c
@@ -181,7 +181,7 @@ static ssize_t show_label(struct device *dev, struct device_attribute *devattr,
 	return sprintf(buf, "%s\n", sdata->label);
 }
 
-static int __init get_logical_cpu(int hwcpu)
+static int get_logical_cpu(int hwcpu)
 {
 	int cpu;
 
@@ -192,9 +192,8 @@ static int __init get_logical_cpu(int hwcpu)
 	return -ENOENT;
 }
 
-static void __init make_sensor_label(struct device_node *np,
-				     struct sensor_data *sdata,
-				     const char *label)
+static void make_sensor_label(struct device_node *np,
+			      struct sensor_data *sdata, const char *label)
 {
 	u32 id;
 	size_t n;
-- 
GitLab


From 0432e833191ad4d17b7fc2364941f91dad51db1a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Holger=20Hoffst=C3=A4tte?= <holger@applied-asynchrony.com>
Date: Sun, 4 Nov 2018 19:02:42 +0100
Subject: [PATCH 0565/1890] net: phy: realtek: fix RTL8201F sysfs name
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since 4.19 the following error in sysfs has appeared when using the
r8169 NIC driver:

$cd /sys/module/realtek/drivers
$ls -l
ls: cannot access 'mdio_bus:RTL8201F 10/100Mbps Ethernet': No such file or directory
[..garbled dir entries follow..]

Apparently the forward slash in "10/100Mbps Ethernet" is interpreted
as directory separator that leads nowhere, and was introduced in commit
513588dd44b ("net: phy: realtek: add RTL8201F phy-id and functions").

Fix this by removing the offending slash in the driver name.

Other drivers in net/phy seem to have the same problem, but I cannot
test/verify them.

Fixes: 513588dd44b ("net: phy: realtek: add RTL8201F phy-id and functions")
Signed-off-by: Holger HoffstÃ¤tte <holger@applied-asynchrony.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/realtek.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c
index 7fc8508b5231d..271e8adc39f10 100644
--- a/drivers/net/phy/realtek.c
+++ b/drivers/net/phy/realtek.c
@@ -220,7 +220,7 @@ static struct phy_driver realtek_drvs[] = {
 		.flags          = PHY_HAS_INTERRUPT,
 	}, {
 		.phy_id		= 0x001cc816,
-		.name		= "RTL8201F 10/100Mbps Ethernet",
+		.name		= "RTL8201F Fast Ethernet",
 		.phy_id_mask	= 0x001fffff,
 		.features	= PHY_BASIC_FEATURES,
 		.flags		= PHY_HAS_INTERRUPT,
-- 
GitLab


From ea53abfab960909d622ca37bcfb8e1c5378d21cc Mon Sep 17 00:00:00 2001
From: Jarod Wilson <jarod@redhat.com>
Date: Sun, 4 Nov 2018 14:59:46 -0500
Subject: [PATCH 0566/1890] bonding/802.3ad: fix link_failure_count tracking

Commit 4d2c0cda07448ea6980f00102dc3964eb25e241c set slave->link to
BOND_LINK_DOWN for 802.3ad bonds whenever invalid speed/duplex values
were read, to fix a problem with slaves getting into weird states, but
in the process, broke tracking of link failures, as going straight to
BOND_LINK_DOWN when a link is indeed down (cable pulled, switch rebooted)
means we broke out of bond_miimon_inspect()'s BOND_LINK_DOWN case because
!link_state was already true, we never incremented commit, and never got
a chance to call bond_miimon_commit(), where slave->link_failure_count
would be incremented. I believe the simple fix here is to mark the slave
as BOND_LINK_FAIL, and let bond_miimon_inspect() transition the link from
_FAIL to either _UP or _DOWN, and in the latter case, we now get proper
incrementing of link_failure_count again.

Fixes: 4d2c0cda0744 ("bonding: speed/duplex update at NETDEV_UP event")
CC: Mahesh Bandewar <maheshb@google.com>
CC: David S. Miller <davem@davemloft.net>
CC: netdev@vger.kernel.org
CC: stable@vger.kernel.org
Signed-off-by: Jarod Wilson <jarod@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index ffa37adb76817..333387f1f1fe6 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3112,13 +3112,13 @@ static int bond_slave_netdev_event(unsigned long event,
 	case NETDEV_CHANGE:
 		/* For 802.3ad mode only:
 		 * Getting invalid Speed/Duplex values here will put slave
-		 * in weird state. So mark it as link-down for the time
+		 * in weird state. So mark it as link-fail for the time
 		 * being and let link-monitoring (miimon) set it right when
 		 * correct speeds/duplex are available.
 		 */
 		if (bond_update_speed_duplex(slave) &&
 		    BOND_MODE(bond) == BOND_MODE_8023AD)
-			slave->link = BOND_LINK_DOWN;
+			slave->link = BOND_LINK_FAIL;
 
 		if (BOND_MODE(bond) == BOND_MODE_8023AD)
 			bond_3ad_adapter_speed_duplex_changed(slave);
-- 
GitLab


From 438ad09af5581b7024850b5dbb6353c7f2f7d8a9 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Thu, 18 Oct 2018 13:26:13 +0200
Subject: [PATCH 0567/1890] ARM: dts: imx6sll: fix typo for fsl,imx6sll-i2c
 node
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix the type of compatible string "fs,imx6sll-i2c" which should be
"fsl,imx6sll-i2c".

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Acked-by: Uwe Kleine-KÃ¶nig <u.kleine-koenig@pengutronix.de>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm/boot/dts/imx6sll.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/imx6sll.dtsi b/arch/arm/boot/dts/imx6sll.dtsi
index ed9a980bce850..beefa1b2049d7 100644
--- a/arch/arm/boot/dts/imx6sll.dtsi
+++ b/arch/arm/boot/dts/imx6sll.dtsi
@@ -740,7 +740,7 @@ usdhc3: mmc@2198000 {
 			i2c1: i2c@21a0000 {
 				#address-cells = <1>;
 				#size-cells = <0>;
-				compatible = "fs,imx6sll-i2c", "fsl,imx21-i2c";
+				compatible = "fsl,imx6sll-i2c", "fsl,imx21-i2c";
 				reg = <0x021a0000 0x4000>;
 				interrupts = <GIC_SPI 36 IRQ_TYPE_LEVEL_HIGH>;
 				clocks = <&clks IMX6SLL_CLK_I2C1>;
-- 
GitLab


From 1af6ab3bac8458fc2e92ad7bb97b62de4a1fddef Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 30 Oct 2018 20:02:30 -0500
Subject: [PATCH 0568/1890] ARM: dts: fsl: Fix improperly quoted stdout-path
 values

A quoted label reference doesn't expand to the node path and is taken as
a literal string. Dropping the quotes can fix this unless the baudrate
string is appended in which case we have to use the alias.

At least on VF610, the problem was masked by setting the console in
bootargs. Use the alias syntax with baudrate parameter so we can drop
setting the console in bootargs.

Cc: Shawn Guo <shawnguo@kernel.org>
Cc: Sascha Hauer <s.hauer@pengutronix.de>
Cc: Pengutronix Kernel Team <kernel@pengutronix.de>
Cc: NXP Linux Team <linux-imx@nxp.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Fabio Estevam <festevam@gmail.com>
Reviewed-by: Stefan Agner <stefan@agner.ch>
Signed-off-by: Rob Herring <robh@kernel.org>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm/boot/dts/imx53-ppd.dts       | 2 +-
 arch/arm/boot/dts/vf610m4-colibri.dts | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm/boot/dts/imx53-ppd.dts b/arch/arm/boot/dts/imx53-ppd.dts
index b560ff88459bf..5ff9a179c83c3 100644
--- a/arch/arm/boot/dts/imx53-ppd.dts
+++ b/arch/arm/boot/dts/imx53-ppd.dts
@@ -55,7 +55,7 @@ aliases {
 	};
 
 	chosen {
-		stdout-path = "&uart1:115200n8";
+		stdout-path = "serial0:115200n8";
 	};
 
 	memory@70000000 {
diff --git a/arch/arm/boot/dts/vf610m4-colibri.dts b/arch/arm/boot/dts/vf610m4-colibri.dts
index 41ec66a969907..ca62495587602 100644
--- a/arch/arm/boot/dts/vf610m4-colibri.dts
+++ b/arch/arm/boot/dts/vf610m4-colibri.dts
@@ -50,8 +50,8 @@ / {
 	compatible = "fsl,vf610m4";
 
 	chosen {
-		bootargs = "console=ttyLP2,115200 clk_ignore_unused init=/linuxrc rw";
-		stdout-path = "&uart2";
+		bootargs = "clk_ignore_unused init=/linuxrc rw";
+		stdout-path = "serial2:115200";
 	};
 
 	memory@8c000000 {
-- 
GitLab


From 1ad9fb750a104f51851c092edd7b3553f0218428 Mon Sep 17 00:00:00 2001
From: Leonard Crestez <leonard.crestez@nxp.com>
Date: Mon, 8 Oct 2018 15:28:01 +0000
Subject: [PATCH 0569/1890] ARM: dts: imx6sx-sdb: Fix enet phy regulator

Bindings for "fixed-regulator" only explicitly support "gpio" property,
not "gpios". Fix by correcting the property name.

The enet PHYs on imx6sx-sdb needs to be explicitly reset after a power
cycle, this can be handled by the phy-reset-gpios property. Sadly this
is not handled on suspend: the fec driver turns phy-supply off but
doesn't assert phy-reset-gpios again on resume.

Since additional phy-level work is required to support powering off the
phy in suspend fix the problem by just marking the regulator as
"boot-on" "always-on" so that it's never turned off. This behavior is
equivalent to older releases.

Keep the phy-reset-gpios property on fec anyway because it is a correct
description of board design.

This issue was exposed by commit efdfeb079cc3 ("regulator: fixed:
Convert to use GPIO descriptor only") which causes the "gpios" property
to also be parsed. Before that commit the "gpios" property had no
effect, PHY reset was only handled in the the bootloader.

This fixes linux-next boot failures previously reported here:
 https://lore.kernel.org/patchwork/patch/982437/#1177900
 https://lore.kernel.org/patchwork/patch/994091/#1178304

Signed-off-by: Leonard Crestez <leonard.crestez@nxp.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm/boot/dts/imx6sx-sdb.dtsi | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/imx6sx-sdb.dtsi b/arch/arm/boot/dts/imx6sx-sdb.dtsi
index 53b3408b5fab1..7d7d679945d28 100644
--- a/arch/arm/boot/dts/imx6sx-sdb.dtsi
+++ b/arch/arm/boot/dts/imx6sx-sdb.dtsi
@@ -117,7 +117,9 @@ reg_enet_3v3: regulator-enet-3v3 {
 		regulator-name = "enet_3v3";
 		regulator-min-microvolt = <3300000>;
 		regulator-max-microvolt = <3300000>;
-		gpios = <&gpio2 6 GPIO_ACTIVE_LOW>;
+		gpio = <&gpio2 6 GPIO_ACTIVE_LOW>;
+		regulator-boot-on;
+		regulator-always-on;
 	};
 
 	reg_pcie_gpio: regulator-pcie-gpio {
@@ -180,6 +182,7 @@ &fec1 {
 	phy-supply = <&reg_enet_3v3>;
 	phy-mode = "rgmii";
 	phy-handle = <&ethphy1>;
+	phy-reset-gpios = <&gpio2 7 GPIO_ACTIVE_LOW>;
 	status = "okay";
 
 	mdio {
@@ -373,6 +376,8 @@ MX6SX_PAD_RGMII1_RD2__ENET1_RX_DATA_2	0x3081
 				MX6SX_PAD_RGMII1_RD3__ENET1_RX_DATA_3	0x3081
 				MX6SX_PAD_RGMII1_RX_CTL__ENET1_RX_EN	0x3081
 				MX6SX_PAD_ENET2_RX_CLK__ENET2_REF_CLK_25M	0x91
+				/* phy reset */
+				MX6SX_PAD_ENET2_CRS__GPIO2_IO_7		0x10b0
 			>;
 		};
 
-- 
GitLab


From 3182215dd0b2120fb942ed88430cfb7c12d583e0 Mon Sep 17 00:00:00 2001
From: Alistair Popple <alistair@popple.id.au>
Date: Tue, 30 Oct 2018 22:02:03 +1100
Subject: [PATCH 0570/1890] powerpc/powernv/npu: Remove NPU DMA ops

The NPU IOMMU is setup to mirror the parent PCIe device IOMMU
setup. Therefore it does not make sense to call dma operations such as
dma_map_page(), etc. directly on these devices. The existing dma_ops
simply print a warning if they are ever called, however this is
unnecessary and the warnings are likely to go unnoticed.

It is instead simpler to remove these operations and let the generic
DMA code print warnings (eg. via a NULL pointer deref) in cases of
buggy drivers attempting dma operations on NVLink devices.

Signed-off-by: Alistair Popple <alistair@popple.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/npu-dma.c | 64 ++----------------------
 1 file changed, 4 insertions(+), 60 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index 6f60e09319223..75b9352529818 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -102,63 +102,6 @@ struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index)
 }
 EXPORT_SYMBOL(pnv_pci_get_npu_dev);
 
-#define NPU_DMA_OP_UNSUPPORTED()					\
-	dev_err_once(dev, "%s operation unsupported for NVLink devices\n", \
-		__func__)
-
-static void *dma_npu_alloc(struct device *dev, size_t size,
-			   dma_addr_t *dma_handle, gfp_t flag,
-			   unsigned long attrs)
-{
-	NPU_DMA_OP_UNSUPPORTED();
-	return NULL;
-}
-
-static void dma_npu_free(struct device *dev, size_t size,
-			 void *vaddr, dma_addr_t dma_handle,
-			 unsigned long attrs)
-{
-	NPU_DMA_OP_UNSUPPORTED();
-}
-
-static dma_addr_t dma_npu_map_page(struct device *dev, struct page *page,
-				   unsigned long offset, size_t size,
-				   enum dma_data_direction direction,
-				   unsigned long attrs)
-{
-	NPU_DMA_OP_UNSUPPORTED();
-	return 0;
-}
-
-static int dma_npu_map_sg(struct device *dev, struct scatterlist *sglist,
-			  int nelems, enum dma_data_direction direction,
-			  unsigned long attrs)
-{
-	NPU_DMA_OP_UNSUPPORTED();
-	return 0;
-}
-
-static int dma_npu_dma_supported(struct device *dev, u64 mask)
-{
-	NPU_DMA_OP_UNSUPPORTED();
-	return 0;
-}
-
-static u64 dma_npu_get_required_mask(struct device *dev)
-{
-	NPU_DMA_OP_UNSUPPORTED();
-	return 0;
-}
-
-static const struct dma_map_ops dma_npu_ops = {
-	.map_page		= dma_npu_map_page,
-	.map_sg			= dma_npu_map_sg,
-	.alloc			= dma_npu_alloc,
-	.free			= dma_npu_free,
-	.dma_supported		= dma_npu_dma_supported,
-	.get_required_mask	= dma_npu_get_required_mask,
-};
-
 /*
  * Returns the PE assoicated with the PCI device of the given
  * NPU. Returns the linked pci device if pci_dev != NULL.
@@ -270,10 +213,11 @@ static void pnv_npu_dma_set_32(struct pnv_ioda_pe *npe)
 	rc = pnv_npu_set_window(npe, 0, gpe->table_group.tables[0]);
 
 	/*
-	 * We don't initialise npu_pe->tce32_table as we always use
-	 * dma_npu_ops which are nops.
+	 * NVLink devices use the same TCE table configuration as
+	 * their parent device so drivers shouldn't be doing DMA
+	 * operations directly on these devices.
 	 */
-	set_dma_ops(&npe->pdev->dev, &dma_npu_ops);
+	set_dma_ops(&npe->pdev->dev, NULL);
 }
 
 /*
-- 
GitLab


From e1ff516a56ad56c476b47795d3811eef79d25fbe Mon Sep 17 00:00:00 2001
From: Yi Wang <wang.yi59@zte.com.cn>
Date: Mon, 5 Nov 2018 08:50:13 +0800
Subject: [PATCH 0571/1890] sched/fair: Fix a comment in task_numa_fault()

Duplicated 'case it'.

Signed-off-by: Yi Wang <wang.yi59@zte.com.cn>
Reviewed-by: Xi Xu <xu.xi8@zte.com.cn>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: zhong.weidong@zte.com.cn
Link: http://lkml.kernel.org/r/1541379013-11352-1-git-send-email-wang.yi59@zte.com.cn
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/fair.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ee271bb661cc9..3648d0300fdf9 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2400,8 +2400,8 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags)
 		local = 1;
 
 	/*
-	 * Retry task to preferred node migration periodically, in case it
-	 * case it previously failed, or the scheduler moved us.
+	 * Retry to migrate task to preferred node periodically, in case it
+	 * previously failed, or the scheduler moved us.
 	 */
 	if (time_after(jiffies, p->numa_migrate_retry)) {
 		task_numa_placement(p);
-- 
GitLab


From f75d651587f719a813ebbbfeee570e6570731d55 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sun, 4 Nov 2018 18:40:14 -0800
Subject: [PATCH 0572/1890] resource/docs: Fix new kernel-doc warnings

The first group of warnings is caused by a "/**" kernel-doc notation
marker but the function comments are not in kernel-doc format.
Also add another error return value here.

  ../kernel/resource.c:337: warning: Function parameter or member 'start' not described in 'find_next_iomem_res'
  ../kernel/resource.c:337: warning: Function parameter or member 'end' not described in 'find_next_iomem_res'
  ../kernel/resource.c:337: warning: Function parameter or member 'flags' not described in 'find_next_iomem_res'
  ../kernel/resource.c:337: warning: Function parameter or member 'desc' not described in 'find_next_iomem_res'
  ../kernel/resource.c:337: warning: Function parameter or member 'first_lvl' not described in 'find_next_iomem_res'
  ../kernel/resource.c:337: warning: Function parameter or member 'res' not described in 'find_next_iomem_res'

Add the missing function parameter documentation for the other warnings:

  ../kernel/resource.c:409: warning: Function parameter or member 'arg' not described in 'walk_iomem_res_desc'
  ../kernel/resource.c:409: warning: Function parameter or member 'func' not described in 'walk_iomem_res_desc'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Borislav Petkov <bp@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: b69c2e20f6e4 ("resource: Clean it up a bit")
Link: http://lkml.kernel.org/r/dda2e4d8-bedd-3167-20fe-8c7d2d35b354@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/resource.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/kernel/resource.c b/kernel/resource.c
index b3a3a1fc499ea..17bcb189d5306 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -318,14 +318,14 @@ int release_resource(struct resource *old)
 
 EXPORT_SYMBOL(release_resource);
 
-/**
+/*
  * Finds the lowest iomem resource that covers part of [start..end].  The
  * caller must specify start, end, flags, and desc (which may be
  * IORES_DESC_NONE).
  *
  * If a resource is found, returns 0 and *res is overwritten with the part
  * of the resource that's within [start..end]; if none is found, returns
- * -1.
+ * -1. Returns -EINVAL for other invalid parameters.
  *
  * This function walks the whole tree and not just first level children
  * unless @first_lvl is true.
@@ -390,7 +390,9 @@ static int __walk_iomem_res_desc(resource_size_t start, resource_size_t end,
 }
 
 /**
- * Walks through iomem resources and calls func() with matching resource
+ * walk_iomem_res_desc - walk through iomem resources
+ *
+ * Walks through iomem resources and calls @func() with matching resource
  * ranges. This walks through whole tree and not just first level children.
  * All the memory ranges which overlap start,end and also match flags and
  * desc are valid candidates.
@@ -399,6 +401,8 @@ static int __walk_iomem_res_desc(resource_size_t start, resource_size_t end,
  * @flags: I/O resource flags
  * @start: start addr
  * @end: end addr
+ * @arg: function argument for the callback @func
+ * @func: callback function that is called for each qualifying resource area
  *
  * NOTE: For a new descriptor search, define a new IORES_DESC in
  * <linux/ioport.h> and set it in 'desc' of a target resource entry.
-- 
GitLab


From b068621a53f92f42400581d69ad0e84c56620b0a Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sun, 4 Nov 2018 14:03:56 -0800
Subject: [PATCH 0573/1890] Documentation/x86: Fix typo in zero-page.txt

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-doc@vger.kernel.org
Link: http://lkml.kernel.org/r/f259b21b-1f2b-f215-00d2-23388bed2530@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 Documentation/x86/zero-page.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/x86/zero-page.txt b/Documentation/x86/zero-page.txt
index 97b7adbceda48..68aed077f7b62 100644
--- a/Documentation/x86/zero-page.txt
+++ b/Documentation/x86/zero-page.txt
@@ -25,7 +25,7 @@ Offset	Proto	Name		Meaning
 0C8/004	ALL	ext_cmd_line_ptr  cmd_line_ptr high 32bits
 140/080	ALL	edid_info	Video mode setup (struct edid_info)
 1C0/020	ALL	efi_info	EFI 32 information (struct efi_info)
-1E0/004	ALL	alk_mem_k	Alternative mem check, in KB
+1E0/004	ALL	alt_mem_k	Alternative mem check, in KB
 1E4/004	ALL	scratch		Scratch field for the kernel setup code
 1E8/001	ALL	e820_entries	Number of entries in e820_table (below)
 1E9/001	ALL	eddbuf_entries	Number of entries in eddbuf (below)
-- 
GitLab


From 13682e524167cbd7e2a26c5e91bec765f0f96273 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Wed, 17 Oct 2018 11:18:30 +0200
Subject: [PATCH 0574/1890] arm64: dts: rockchip: remove vdd_log from rock960
 to fix a stability issues

When the performance governor is set as default, the rock960 hangs
around one minute after booting, whatever the activity is (idle, key
pressed, loaded, ...).

Based on the commit log found at https://patchwork.kernel.org/patch/10092377/

"vdd_log has no consumer and therefore will not be set to a specific
voltage. Still the PWM output pin gets configured and thence the vdd_log
output voltage will changed from it's default. Depending on the idle
state of the PWM this will slightly over or undervoltage the logic supply
of the RK3399 and cause instability with GbE (undervoltage) and PCIe
(overvoltage). Since the default value set by a voltage divider is the
correct supply voltage and we don't need to change it during runtime we
remove the rail from the devicetree completely so the PWM pin will not
be configured."

After removing the vdd-log from the rock960's specific DT, the board
does no longer hang and shows a stable behavior.

Apply the same change for the rock960 by removing the vdd-log from the
DT.

Fixes: 874846f1fccd ("arm64: dts: rockchip: add 96boards RK3399 Ficus board")
Cc: stable@vger.kernel.org
Tested-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
---
 arch/arm64/boot/dts/rockchip/rk3399-rock960.dtsi | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock960.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-rock960.dtsi
index 6c8c4ab044aaf..56abbb08c133b 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-rock960.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399-rock960.dtsi
@@ -57,18 +57,6 @@ vcc5v0_host: vcc5v0-host-regulator {
 		regulator-always-on;
 		vin-supply = <&vcc_sys>;
 	};
-
-	vdd_log: vdd-log {
-		compatible = "pwm-regulator";
-		pwms = <&pwm2 0 25000 0>;
-		regulator-name = "vdd_log";
-		regulator-min-microvolt = <800000>;
-		regulator-max-microvolt = <1400000>;
-		regulator-always-on;
-		regulator-boot-on;
-		vin-supply = <&vcc_sys>;
-	};
-
 };
 
 &cpu_l0 {
-- 
GitLab


From 8727b230f665cadb9349a915c60e6abb18fb083c Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 13 Oct 2018 13:21:40 +0300
Subject: [PATCH 0575/1890] drm/exynos: checking for NULL instead of IS_ERR()

The of_drm_find_panel() function returns error pointers and never NULL
but we the driver assumes that ->panel is NULL when it's not present.

Fixes: 6afb7721e2a0 ("drm/exynos: move connector creation to attach callback")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_dsi.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
index 07af7758066db..32f2567497890 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
@@ -1527,7 +1527,9 @@ static int exynos_dsi_host_attach(struct mipi_dsi_host *host,
 		}
 
 		dsi->panel = of_drm_find_panel(device->dev.of_node);
-		if (dsi->panel) {
+		if (IS_ERR(dsi->panel)) {
+			dsi->panel = NULL;
+		} else {
 			drm_panel_attach(dsi->panel, &dsi->connector);
 			dsi->connector.status = connector_status_connected;
 		}
-- 
GitLab


From 6ca469e22a30992b4478d2ab88737c70667c1e00 Mon Sep 17 00:00:00 2001
From: Inki Dae <inki.dae@samsung.com>
Date: Fri, 5 Oct 2018 11:50:20 +0900
Subject: [PATCH 0576/1890] Revert "drm/exynos/decon5433: implement frame
 counter"

This reverts commit 0586feba322e1de05075700eb4b835c8b683e62b

This patch makes it to need get_vblank_counter callback in crtc
to get frame counter from decon driver.

However, drm_dev->max_vblank_count is a member unique to
vendor's DRM driver but in case of ARM DRM, some CRTC devices
don't provide the frame counter value. As a result, this patch
made extension and clone mode not working.

Instead of this patch, we may need separated max_vblank_count
which belongs to each CRTC device, or need to implement frame
counter emulation for them who don't support HW frame counter.

Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos5433_drm_decon.c |  9 ---------
 drivers/gpu/drm/exynos/exynos_drm_crtc.c      | 11 -----------
 drivers/gpu/drm/exynos/exynos_drm_drv.h       |  1 -
 3 files changed, 21 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
index 94529aa823392..aef487dd87315 100644
--- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
@@ -164,13 +164,6 @@ static u32 decon_get_frame_count(struct decon_context *ctx, bool end)
 	return frm;
 }
 
-static u32 decon_get_vblank_counter(struct exynos_drm_crtc *crtc)
-{
-	struct decon_context *ctx = crtc->ctx;
-
-	return decon_get_frame_count(ctx, false);
-}
-
 static void decon_setup_trigger(struct decon_context *ctx)
 {
 	if (!ctx->crtc->i80_mode && !(ctx->out_type & I80_HW_TRG))
@@ -536,7 +529,6 @@ static const struct exynos_drm_crtc_ops decon_crtc_ops = {
 	.disable		= decon_disable,
 	.enable_vblank		= decon_enable_vblank,
 	.disable_vblank		= decon_disable_vblank,
-	.get_vblank_counter	= decon_get_vblank_counter,
 	.atomic_begin		= decon_atomic_begin,
 	.update_plane		= decon_update_plane,
 	.disable_plane		= decon_disable_plane,
@@ -554,7 +546,6 @@ static int decon_bind(struct device *dev, struct device *master, void *data)
 	int ret;
 
 	ctx->drm_dev = drm_dev;
-	drm_dev->max_vblank_count = 0xffffffff;
 
 	for (win = ctx->first_win; win < WINDOWS_NR; win++) {
 		ctx->configs[win].pixel_formats = decon_formats;
diff --git a/drivers/gpu/drm/exynos/exynos_drm_crtc.c b/drivers/gpu/drm/exynos/exynos_drm_crtc.c
index eea90251808fa..2696289ecc78f 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_crtc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_crtc.c
@@ -162,16 +162,6 @@ static void exynos_drm_crtc_disable_vblank(struct drm_crtc *crtc)
 		exynos_crtc->ops->disable_vblank(exynos_crtc);
 }
 
-static u32 exynos_drm_crtc_get_vblank_counter(struct drm_crtc *crtc)
-{
-	struct exynos_drm_crtc *exynos_crtc = to_exynos_crtc(crtc);
-
-	if (exynos_crtc->ops->get_vblank_counter)
-		return exynos_crtc->ops->get_vblank_counter(exynos_crtc);
-
-	return 0;
-}
-
 static const struct drm_crtc_funcs exynos_crtc_funcs = {
 	.set_config	= drm_atomic_helper_set_config,
 	.page_flip	= drm_atomic_helper_page_flip,
@@ -181,7 +171,6 @@ static const struct drm_crtc_funcs exynos_crtc_funcs = {
 	.atomic_destroy_state = drm_atomic_helper_crtc_destroy_state,
 	.enable_vblank = exynos_drm_crtc_enable_vblank,
 	.disable_vblank = exynos_drm_crtc_disable_vblank,
-	.get_vblank_counter = exynos_drm_crtc_get_vblank_counter,
 };
 
 struct exynos_drm_crtc *exynos_drm_crtc_create(struct drm_device *drm_dev,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h
index ec9604f1272b5..5e61e707f9555 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h
@@ -135,7 +135,6 @@ struct exynos_drm_crtc_ops {
 	void (*disable)(struct exynos_drm_crtc *crtc);
 	int (*enable_vblank)(struct exynos_drm_crtc *crtc);
 	void (*disable_vblank)(struct exynos_drm_crtc *crtc);
-	u32 (*get_vblank_counter)(struct exynos_drm_crtc *crtc);
 	enum drm_mode_status (*mode_valid)(struct exynos_drm_crtc *crtc,
 		const struct drm_display_mode *mode);
 	bool (*mode_fixup)(struct exynos_drm_crtc *crtc,
-- 
GitLab


From deee3284cba3145a4ee03cbe8541d7a3bfc499e2 Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <a.hajda@samsung.com>
Date: Fri, 26 Oct 2018 12:40:03 +0200
Subject: [PATCH 0577/1890] drm/exynos/dsi: register connector if it is created
 after drm bind

DSI device can be attached after DRM device is registered. In such
case newly created connector must be registered by exynos_dsi.
The patch fixes exynos_drm on rinato and trats boards.

Fixes: 6afb7721e2a0 ("drm/exynos: move connector creation to attach callback")
Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_dsi.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
index 32f2567497890..d81e62ae286ae 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
@@ -14,6 +14,7 @@
 
 #include <drm/drmP.h>
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_fb_helper.h>
 #include <drm/drm_mipi_dsi.h>
 #include <drm/drm_panel.h>
 #include <drm/drm_atomic_helper.h>
@@ -1474,12 +1475,12 @@ static int exynos_dsi_create_connector(struct drm_encoder *encoder)
 {
 	struct exynos_dsi *dsi = encoder_to_dsi(encoder);
 	struct drm_connector *connector = &dsi->connector;
+	struct drm_device *drm = encoder->dev;
 	int ret;
 
 	connector->polled = DRM_CONNECTOR_POLL_HPD;
 
-	ret = drm_connector_init(encoder->dev, connector,
-				 &exynos_dsi_connector_funcs,
+	ret = drm_connector_init(drm, connector, &exynos_dsi_connector_funcs,
 				 DRM_MODE_CONNECTOR_DSI);
 	if (ret) {
 		DRM_ERROR("Failed to initialize connector with drm\n");
@@ -1489,7 +1490,12 @@ static int exynos_dsi_create_connector(struct drm_encoder *encoder)
 	connector->status = connector_status_disconnected;
 	drm_connector_helper_add(connector, &exynos_dsi_connector_helper_funcs);
 	drm_connector_attach_encoder(connector, encoder);
+	if (!drm->registered)
+		return 0;
 
+	connector->funcs->reset(connector);
+	drm_fb_helper_add_one_connector(drm->fb_helper, connector);
+	drm_connector_register(connector);
 	return 0;
 }
 
-- 
GitLab


From 989534cfcac89f927fa46b1e0861d92ffbd2c7e4 Mon Sep 17 00:00:00 2001
From: Andrzej Hajda <a.hajda@samsung.com>
Date: Fri, 26 Oct 2018 12:13:28 +0200
Subject: [PATCH 0578/1890] drm/exynos/fbdev: do not skip fbdev init if there
 are no connectors

Since connectors can be created dynamically, fbdev should be initialized
even if there are no connectors at the moment. Otherwise fbdev will
not be created even after connector's appearance.
The patch fixes lack of fbdev on rinato and trats boards.

Fixes: 6afb7721e2a0 ("drm/exynos: move connector creation to attach callback")
Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Inki Dae <inki.dae@samsung.com>
---
 drivers/gpu/drm/exynos/exynos_drm_fbdev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
index 918dd2c822098..01d182289efa3 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
@@ -192,7 +192,7 @@ int exynos_drm_fbdev_init(struct drm_device *dev)
 	struct drm_fb_helper *helper;
 	int ret;
 
-	if (!dev->mode_config.num_crtc || !dev->mode_config.num_connector)
+	if (!dev->mode_config.num_crtc)
 		return 0;
 
 	fbdev = kzalloc(sizeof(*fbdev), GFP_KERNEL);
-- 
GitLab


From defeea5ee23d9cdd296801a0a9ea885c037ff9c9 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Wed, 24 Oct 2018 17:38:43 +0200
Subject: [PATCH 0579/1890] MAINTAINERS: add myself as co-maintainer of gpiolib
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As discussed with Linus Walleij - I'm adding myself as
the co-maintainer.

Cc: BenoÃ®t Cousson <bcousson@baylibre.com>
Cc: Kevin Hilman <khilman@baylibre.com>
Cc: Michael Turquette <mturquette@baylibre.com>
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Acked-by: Michael Turquette <mturquette@baylibre.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index f4855974f3250..e1bacf0f8b085 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6299,6 +6299,7 @@ F:	tools/testing/selftests/gpio/
 
 GPIO SUBSYSTEM
 M:	Linus Walleij <linus.walleij@linaro.org>
+M:	Bartosz Golaszewski <bgolaszewski@baylibre.com>
 L:	linux-gpio@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio.git
 S:	Maintained
-- 
GitLab


From be2e1c9dcf76886a83fb1c433a316e26d4ca2550 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 11 Oct 2018 13:06:16 +0200
Subject: [PATCH 0580/1890] mtd: docg3: don't set conflicting BCH_CONST_PARAMS
 option

I noticed during the creation of another bugfix that the BCH_CONST_PARAMS
option that is set by DOCG3 breaks setting variable parameters for any
other users of the BCH library code.

The only other user we have today is the MTD_NAND software BCH
implementation (most flash controllers use hardware BCH these days
and are not affected). I considered removing BCH_CONST_PARAMS entirely
because of the inherent conflict, but according to the description in
lib/bch.c there is a significant performance benefit in keeping it.

To avoid the immediate problem of the conflict between MTD_NAND_BCH
and DOCG3, this only sets the constant parameters if MTD_NAND_BCH
is disabled, which should fix the problem for all cases that
are affected. This should also work for all stable kernels.

Note that there is only one machine that actually seems to use the
DOCG3 driver (arch/arm/mach-pxa/mioa701.c), so most users should have
the driver disabled, but it almost certainly shows up if we wanted
to test random kernels on machines that use software BCH in MTD.

Fixes: d13d19ece39f ("mtd: docg3: add ECC correction code")
Cc: stable@vger.kernel.org
Cc: Robert Jarzmik <robert.jarzmik@free.fr>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
---
 drivers/mtd/devices/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/devices/Kconfig b/drivers/mtd/devices/Kconfig
index e514d57a0419d..aa983422aa970 100644
--- a/drivers/mtd/devices/Kconfig
+++ b/drivers/mtd/devices/Kconfig
@@ -207,7 +207,7 @@ comment "Disk-On-Chip Device Drivers"
 config MTD_DOCG3
 	tristate "M-Systems Disk-On-Chip G3"
 	select BCH
-	select BCH_CONST_PARAMS
+	select BCH_CONST_PARAMS if !MTD_NAND_BCH
 	select BITREVERSE
 	help
 	  This provides an MTD device driver for the M-Systems DiskOnChip
-- 
GitLab


From ce97e2bb6687d3af675ecf2e836a9122def53578 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sun, 4 Nov 2018 18:38:35 -0800
Subject: [PATCH 0581/1890] mtd: nand: drop kernel-doc notation for a deleted
 function parameter

Remove kernel-doc notation for a deleted function parameter to prevent
a kernel-doc warning:

../drivers/mtd/nand/raw/nand_base.c:603: warning: Excess function parameter 'mtd' description in 'panic_nand_wait'

Fixes: f1d46942e823 ("mtd: rawnand: Pass a nand_chip object to chip->waitfunc()")

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Boris Brezillon <boris.brezillon@bootlin.com>
Cc: Miquel Raynal <miquel.raynal@bootlin.com>
Cc: Richard Weinberger <richard@nod.at>
Reviewed-by: Miquel Raynal <miquel.raynal@bootlin.com>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
---
 drivers/mtd/nand/raw/nand_base.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 05bd0779fe9bf..71050a0b31dfe 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -590,7 +590,6 @@ nand_get_device(struct mtd_info *mtd, int new_state)
 
 /**
  * panic_nand_wait - [GENERIC] wait until the command is done
- * @mtd: MTD device structure
  * @chip: NAND chip structure
  * @timeo: timeout
  *
-- 
GitLab


From d098093ba06eb032057d1aca1c2e45889e099d00 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Sun, 28 Oct 2018 12:29:55 +0100
Subject: [PATCH 0582/1890] mtd: nand: Fix nanddev_neraseblocks()

nanddev_neraseblocks() currently returns the number pages per LUN
instead of the total number of eraseblocks.

Fixes: 9c3736a3de21 ("mtd: nand: Add core infrastructure to deal with NAND devices")
Cc: <stable@vger.kernel.org>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Reviewed-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 include/linux/mtd/nand.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index abe975c87b900..78b86dea2f294 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -324,9 +324,8 @@ static inline unsigned int nanddev_ntargets(const struct nand_device *nand)
  */
 static inline unsigned int nanddev_neraseblocks(const struct nand_device *nand)
 {
-	return (u64)nand->memorg.luns_per_target *
-	       nand->memorg.eraseblocks_per_lun *
-	       nand->memorg.pages_per_eraseblock;
+	return nand->memorg.ntargets * nand->memorg.luns_per_target *
+	       nand->memorg.eraseblocks_per_lun;
 }
 
 /**
-- 
GitLab


From e39f9dd8206ad66992ac0e6218ef1ba746f2cce9 Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Tue, 23 Oct 2018 18:03:19 +0200
Subject: [PATCH 0583/1890] pinctrl: meson: fix pinconf bias disable

If a bias is enabled on a pin of an Amlogic SoC, calling .pin_config_set()
with PIN_CONFIG_BIAS_DISABLE will not disable the bias. Instead it will
force a pull-down bias on the pin.

Instead of the pull type register bank, the driver should access the pull
enable register bank.

Fixes: 6ac730951104 ("pinctrl: add driver for Amlogic Meson SoCs")
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Acked-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/meson/pinctrl-meson.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pinctrl/meson/pinctrl-meson.c b/drivers/pinctrl/meson/pinctrl-meson.c
index f8b778a7d4717..53d449076dee3 100644
--- a/drivers/pinctrl/meson/pinctrl-meson.c
+++ b/drivers/pinctrl/meson/pinctrl-meson.c
@@ -192,7 +192,7 @@ static int meson_pinconf_set(struct pinctrl_dev *pcdev, unsigned int pin,
 			dev_dbg(pc->dev, "pin %u: disable bias\n", pin);
 
 			meson_calc_reg_and_bit(bank, pin, REG_PULL, &reg, &bit);
-			ret = regmap_update_bits(pc->reg_pull, reg,
+			ret = regmap_update_bits(pc->reg_pullen, reg,
 						 BIT(bit), 0);
 			if (ret)
 				return ret;
-- 
GitLab


From 4bc51e1e350cd4707ce6e551a93eae26d40b9889 Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Mon, 29 Oct 2018 16:13:37 +0100
Subject: [PATCH 0584/1890] pinctrl: meson: fix gxbb ao pull register bits

AO pull register definition is inverted between pull (up/down) and
pull enable. Fixing this allows to properly apply bias setting
through pinconf

Fixes: 468c234f9ed7 ("pinctrl: amlogic: Add support for Amlogic Meson GXBB SoC")
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/meson/pinctrl-meson-gxbb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pinctrl/meson/pinctrl-meson-gxbb.c b/drivers/pinctrl/meson/pinctrl-meson-gxbb.c
index 4ceb06f8a33c9..4edeb4cae72aa 100644
--- a/drivers/pinctrl/meson/pinctrl-meson-gxbb.c
+++ b/drivers/pinctrl/meson/pinctrl-meson-gxbb.c
@@ -830,7 +830,7 @@ static struct meson_bank meson_gxbb_periphs_banks[] = {
 
 static struct meson_bank meson_gxbb_aobus_banks[] = {
 	/*   name    first      last       irq    pullen  pull    dir     out     in  */
-	BANK("AO",   GPIOAO_0,  GPIOAO_13, 0, 13, 0,  0,  0, 16,  0,  0,  0, 16,  1,  0),
+	BANK("AO",   GPIOAO_0,  GPIOAO_13, 0, 13, 0,  16, 0, 0,   0,  0,  0, 16,  1,  0),
 };
 
 static struct meson_pinctrl_data meson_gxbb_periphs_pinctrl_data = {
-- 
GitLab


From ed3a2b74f3eb34c84c8377353f4730f05acdfd05 Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Mon, 29 Oct 2018 16:13:38 +0100
Subject: [PATCH 0585/1890] pinctrl: meson: fix gxl ao pull register bits

AO pull register definition is inverted between pull (up/down) and
pull enable. Fixing this allows to properly apply bias setting
through pinconf

Fixes: 0f15f500ff2c ("pinctrl: meson: Add GXL pinctrl definitions")
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/meson/pinctrl-meson-gxl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pinctrl/meson/pinctrl-meson-gxl.c b/drivers/pinctrl/meson/pinctrl-meson-gxl.c
index 7dae1d7bf6b0a..158f618f16957 100644
--- a/drivers/pinctrl/meson/pinctrl-meson-gxl.c
+++ b/drivers/pinctrl/meson/pinctrl-meson-gxl.c
@@ -807,7 +807,7 @@ static struct meson_bank meson_gxl_periphs_banks[] = {
 
 static struct meson_bank meson_gxl_aobus_banks[] = {
 	/*   name    first      last      irq	pullen  pull    dir     out     in  */
-	BANK("AO",   GPIOAO_0,  GPIOAO_9, 0, 9, 0,  0,  0, 16,  0,  0,  0, 16,  1,  0),
+	BANK("AO",   GPIOAO_0,  GPIOAO_9, 0, 9, 0,  16, 0, 0,   0,  0,  0, 16,  1,  0),
 };
 
 static struct meson_pinctrl_data meson_gxl_periphs_pinctrl_data = {
-- 
GitLab


From e91b162d2868672d06010f34aa83d408db13d3c6 Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Mon, 29 Oct 2018 16:13:39 +0100
Subject: [PATCH 0586/1890] pinctrl: meson: fix meson8 ao pull register bits

AO pull register definition is inverted between pull (up/down) and
pull enable. Fixing this allows to properly apply bias setting
through pinconf

Fixes: 6ac730951104 ("pinctrl: add driver for Amlogic Meson SoCs")
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/meson/pinctrl-meson8.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pinctrl/meson/pinctrl-meson8.c b/drivers/pinctrl/meson/pinctrl-meson8.c
index c6d79315218fa..86466173114da 100644
--- a/drivers/pinctrl/meson/pinctrl-meson8.c
+++ b/drivers/pinctrl/meson/pinctrl-meson8.c
@@ -1053,7 +1053,7 @@ static struct meson_bank meson8_cbus_banks[] = {
 
 static struct meson_bank meson8_aobus_banks[] = {
 	/*   name    first     last         irq    pullen  pull    dir     out     in  */
-	BANK("AO",   GPIOAO_0, GPIO_TEST_N, 0, 13, 0,  0,  0, 16,  0,  0,  0, 16,  1,  0),
+	BANK("AO",   GPIOAO_0, GPIO_TEST_N, 0, 13, 0, 16,  0,  0,  0,  0,  0, 16,  1,  0),
 };
 
 static struct meson_pinctrl_data meson8_cbus_pinctrl_data = {
-- 
GitLab


From a1705f02704cd8a24d434bfd0141ee8142ad277a Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Mon, 29 Oct 2018 16:13:40 +0100
Subject: [PATCH 0587/1890] pinctrl: meson: fix meson8b ao pull register bits

AO pull register definition is inverted between pull (up/down) and
pull enable. Fixing this allows to properly apply bias setting
through pinconf

Fixes: 0fefcb6876d0 ("pinctrl: Add support for Meson8b")
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/meson/pinctrl-meson8b.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pinctrl/meson/pinctrl-meson8b.c b/drivers/pinctrl/meson/pinctrl-meson8b.c
index bb2a30964fc69..647ad15d5c3c4 100644
--- a/drivers/pinctrl/meson/pinctrl-meson8b.c
+++ b/drivers/pinctrl/meson/pinctrl-meson8b.c
@@ -906,7 +906,7 @@ static struct meson_bank meson8b_cbus_banks[] = {
 
 static struct meson_bank meson8b_aobus_banks[] = {
 	/*   name    first     lastc        irq    pullen  pull    dir     out     in  */
-	BANK("AO",   GPIOAO_0, GPIO_TEST_N, 0, 13, 0,  0,  0, 16,  0,  0,  0, 16,  1,  0),
+	BANK("AO",   GPIOAO_0, GPIO_TEST_N, 0, 13, 0,  16, 0, 0,  0,  0,  0, 16,  1,  0),
 };
 
 static struct meson_pinctrl_data meson8b_cbus_pinctrl_data = {
-- 
GitLab


From 4920b1f7676d55dcebdf55f2b0431a328acb4abe Mon Sep 17 00:00:00 2001
From: Punit Agrawal <punit.agrawal@arm.com>
Date: Fri, 2 Nov 2018 16:57:52 +0000
Subject: [PATCH 0588/1890] mailmap: Update email for Punit Agrawal

As I'll no longer be working with Arm, add a mailmap entry so any mail
directed towards me reaches the appropriate mailbox.

Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Punit Agrawal <punit.agrawal@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 .mailmap | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.mailmap b/.mailmap
index a76be45fef6ca..28fecafa65069 100644
--- a/.mailmap
+++ b/.mailmap
@@ -159,6 +159,7 @@ Peter Oruba <peter@oruba.de>
 Peter Oruba <peter.oruba@amd.com>
 Pratyush Anand <pratyush.anand@gmail.com> <pratyush.anand@st.com>
 Praveen BP <praveenbp@ti.com>
+Punit Agrawal <punitagrawal@gmail.com> <punit.agrawal@arm.com>
 Qais Yousef <qsyousef@gmail.com> <qais.yousef@imgtec.com>
 Oleksij Rempel <linux@rempel-privat.de> <bug-track@fisher-privat.net>
 Oleksij Rempel <linux@rempel-privat.de> <external.Oleksij.Rempel@de.bosch.com>
-- 
GitLab


From 2f5d94123ce3c12a1e15802e6d5ee98f304ccb71 Mon Sep 17 00:00:00 2001
From: Vasily Khoruzhick <anarsoul@gmail.com>
Date: Fri, 2 Nov 2018 21:26:51 -0700
Subject: [PATCH 0589/1890] ASoC: sunxi: rename SND_SUNXI_ADDA_PR_REGMAP to
 SND_SUN8I_ADDA_PR_REGMAP

SND_SUN50I_CODEC_ANALOG selects SND_SUNXI_ADDA_PR_REGMAP which is leftover
of renaming SND_SUNXI_ADDA_PR_REGMAP to SND_SUN8I_ADDA_PR_REGMAP. Replace
it with SND_SUN8I_ADDA_PR_REGMAP to fix possible link errors for some
configurations:

sound/soc/sunxi/sun50i-codec-analog.o: In function `sun50i_codec_analog_probe':
sun50i-codec-analog.c:(.text+0x62): undefined reference to `sun8i_adda_pr_regmap_init'

Fixes: 42371f327df0 ("ASoC: sunxi: Add new driver for Allwinner A64 codec's analog path controls")
Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com>
Reviewed-by: Andre Przywara <andre.przywara@arm.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sunxi/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/sunxi/Kconfig b/sound/soc/sunxi/Kconfig
index 66aad0d3f9c7f..8134c3c942292 100644
--- a/sound/soc/sunxi/Kconfig
+++ b/sound/soc/sunxi/Kconfig
@@ -31,7 +31,7 @@ config SND_SUN8I_CODEC_ANALOG
 config SND_SUN50I_CODEC_ANALOG
 	tristate "Allwinner sun50i Codec Analog Controls Support"
 	depends on (ARM64 && ARCH_SUNXI) || COMPILE_TEST
-	select SND_SUNXI_ADDA_PR_REGMAP
+	select SND_SUN8I_ADDA_PR_REGMAP
 	help
 	  Say Y or M if you want to add support for the analog controls for
 	  the codec embedded in Allwinner A64 SoC.
-- 
GitLab


From a182ecd3809c8d5a2da80c520f3602e301c5317e Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 31 Oct 2018 15:22:25 +0100
Subject: [PATCH 0590/1890] ASoC: intel: cht_bsw_max98090_ti: Add quirk for
 boards using pmc_plt_clk_0

Some boards such as the Swanky model Chromebooks use pmc_plt_clk_0 for the
mclk instead of pmc_plt_clk_3.

This commit adds a DMI based quirk for this.

This fixing audio no longer working on these devices after
commit 648e921888ad ("clk: x86: Stop marking clocks as CLK_IS_CRITICAL")
that commit fixes us unnecessary keeping unused clocks on, but in case
of the Swanky that was breaking audio support since we were not using
the right clock in the cht_bsw_max98090_ti machine driver.

Cc: stable@vger.kernel.org
Fixes: 648e921888ad ("clk: x86: Stop marking clocks as CLK_IS_CRITICAL")
Reported-and-tested-by: Dean Wallace <duffydack73@gmail.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/intel/boards/cht_bsw_max98090_ti.c | 32 ++++++++++++++++++--
 1 file changed, 29 insertions(+), 3 deletions(-)

diff --git a/sound/soc/intel/boards/cht_bsw_max98090_ti.c b/sound/soc/intel/boards/cht_bsw_max98090_ti.c
index db6976f4ddaa2..9d9f6e41d81c0 100644
--- a/sound/soc/intel/boards/cht_bsw_max98090_ti.c
+++ b/sound/soc/intel/boards/cht_bsw_max98090_ti.c
@@ -19,6 +19,7 @@
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  */
 
+#include <linux/dmi.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
@@ -35,6 +36,8 @@
 #define CHT_PLAT_CLK_3_HZ	19200000
 #define CHT_CODEC_DAI	"HiFi"
 
+#define QUIRK_PMC_PLT_CLK_0				0x01
+
 struct cht_mc_private {
 	struct clk *mclk;
 	struct snd_soc_jack jack;
@@ -385,11 +388,29 @@ static struct snd_soc_card snd_soc_card_cht = {
 	.num_controls = ARRAY_SIZE(cht_mc_controls),
 };
 
+static const struct dmi_system_id cht_max98090_quirk_table[] = {
+	{
+		/* Swanky model Chromebook (Toshiba Chromebook 2) */
+		.matches = {
+			DMI_MATCH(DMI_PRODUCT_NAME, "Swanky"),
+		},
+		.driver_data = (void *)QUIRK_PMC_PLT_CLK_0,
+	},
+	{}
+};
+
 static int snd_cht_mc_probe(struct platform_device *pdev)
 {
+	const struct dmi_system_id *dmi_id;
 	struct device *dev = &pdev->dev;
 	int ret_val = 0;
 	struct cht_mc_private *drv;
+	const char *mclk_name;
+	int quirks = 0;
+
+	dmi_id = dmi_first_match(cht_max98090_quirk_table);
+	if (dmi_id)
+		quirks = (unsigned long)dmi_id->driver_data;
 
 	drv = devm_kzalloc(&pdev->dev, sizeof(*drv), GFP_KERNEL);
 	if (!drv)
@@ -411,11 +432,16 @@ static int snd_cht_mc_probe(struct platform_device *pdev)
 	snd_soc_card_cht.dev = &pdev->dev;
 	snd_soc_card_set_drvdata(&snd_soc_card_cht, drv);
 
-	drv->mclk = devm_clk_get(&pdev->dev, "pmc_plt_clk_3");
+	if (quirks & QUIRK_PMC_PLT_CLK_0)
+		mclk_name = "pmc_plt_clk_0";
+	else
+		mclk_name = "pmc_plt_clk_3";
+
+	drv->mclk = devm_clk_get(&pdev->dev, mclk_name);
 	if (IS_ERR(drv->mclk)) {
 		dev_err(&pdev->dev,
-			"Failed to get MCLK from pmc_plt_clk_3: %ld\n",
-			PTR_ERR(drv->mclk));
+			"Failed to get MCLK from %s: %ld\n",
+			mclk_name, PTR_ERR(drv->mclk));
 		return PTR_ERR(drv->mclk);
 	}
 
-- 
GitLab


From 112b57fa737445b2361be332ce8cc0fd3e2b994e Mon Sep 17 00:00:00 2001
From: Rohit kumar <rohitkr@codeaurora.org>
Date: Thu, 1 Nov 2018 17:21:07 +0530
Subject: [PATCH 0591/1890] ASoC: qdsp6: q6afe: Fix wrong MI2S SD line mask

SD line mask for MI2S starts from BIT 0 instead of BIT 1.
Fix all bit mask for MI2S SD lines.

Signed-off-by: Rohit kumar <rohitkr@codeaurora.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/qcom/qdsp6/q6afe.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/sound/soc/qcom/qdsp6/q6afe.c b/sound/soc/qcom/qdsp6/q6afe.c
index 000775b4bba83..829b5e987b2aa 100644
--- a/sound/soc/qcom/qdsp6/q6afe.c
+++ b/sound/soc/qcom/qdsp6/q6afe.c
@@ -49,14 +49,14 @@
 #define AFE_PORT_I2S_SD1		0x2
 #define AFE_PORT_I2S_SD2		0x3
 #define AFE_PORT_I2S_SD3		0x4
-#define AFE_PORT_I2S_SD0_MASK		BIT(0x1)
-#define AFE_PORT_I2S_SD1_MASK		BIT(0x2)
-#define AFE_PORT_I2S_SD2_MASK		BIT(0x3)
-#define AFE_PORT_I2S_SD3_MASK		BIT(0x4)
-#define AFE_PORT_I2S_SD0_1_MASK		GENMASK(2, 1)
-#define AFE_PORT_I2S_SD2_3_MASK		GENMASK(4, 3)
-#define AFE_PORT_I2S_SD0_1_2_MASK	GENMASK(3, 1)
-#define AFE_PORT_I2S_SD0_1_2_3_MASK	GENMASK(4, 1)
+#define AFE_PORT_I2S_SD0_MASK		BIT(0x0)
+#define AFE_PORT_I2S_SD1_MASK		BIT(0x1)
+#define AFE_PORT_I2S_SD2_MASK		BIT(0x2)
+#define AFE_PORT_I2S_SD3_MASK		BIT(0x3)
+#define AFE_PORT_I2S_SD0_1_MASK		GENMASK(1, 0)
+#define AFE_PORT_I2S_SD2_3_MASK		GENMASK(3, 2)
+#define AFE_PORT_I2S_SD0_1_2_MASK	GENMASK(2, 0)
+#define AFE_PORT_I2S_SD0_1_2_3_MASK	GENMASK(3, 0)
 #define AFE_PORT_I2S_QUAD01		0x5
 #define AFE_PORT_I2S_QUAD23		0x6
 #define AFE_PORT_I2S_6CHS		0x7
-- 
GitLab


From fc09ab7a767394f9ecdad84ea6e85d68b83c8e21 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 5 Nov 2018 11:52:50 +0100
Subject: [PATCH 0592/1890] vga_switcheroo: Fix missing gpu_bound call at audio
 client registration

The commit 37a3a98ef601 ("ALSA: hda - Enable runtime PM only for
discrete GPU") added a new ops gpu_bound to be called when GPU gets
bound.  The patch overlooked, however, that vga_switcheroo_enable() is
called only once at GPU is bound.  When an audio client is registered
after that point, it would miss the gpu_bound call.  This leads to the
unexpected lack of runtime PM in HD-audio side.

For addressing that regression, just call gpu_bound callback manually
at vga_switcheroo_register_audio_client() when the GPU was already
bound.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=201615
Fixes: 37a3a98ef601 ("ALSA: hda - Enable runtime PM only for discrete GPU")
Cc: <stable@vger.kernel.org>
Reviewed-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 drivers/gpu/vga/vga_switcheroo.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c
index cf2a18571d484..a132c37d73349 100644
--- a/drivers/gpu/vga/vga_switcheroo.c
+++ b/drivers/gpu/vga/vga_switcheroo.c
@@ -380,6 +380,9 @@ int vga_switcheroo_register_audio_client(struct pci_dev *pdev,
 			mutex_unlock(&vgasr_mutex);
 			return -EINVAL;
 		}
+		/* notify if GPU has been already bound */
+		if (ops->gpu_bound)
+			ops->gpu_bound(pdev, id);
 	}
 	mutex_unlock(&vgasr_mutex);
 
-- 
GitLab


From aab7a2414ba0d5c3d0571a90031b535adba7146a Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Fri, 28 Sep 2018 02:38:36 +0000
Subject: [PATCH 0593/1890] arm64: dts: renesas: r8a7795: add missing dma-names
 on hscif2

hscif2 has 4 dmas, but has only 2 dma-names.
This patch add missing dma-names.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Fixes: e0f0bda79337701a ("arm64: dts: renesas: r8a7795: sort subnodes
of the soc node")
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
---
 arch/arm64/boot/dts/renesas/r8a7795.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/boot/dts/renesas/r8a7795.dtsi b/arch/arm64/boot/dts/renesas/r8a7795.dtsi
index b5f2273caca4d..a79c8d369e0b4 100644
--- a/arch/arm64/boot/dts/renesas/r8a7795.dtsi
+++ b/arch/arm64/boot/dts/renesas/r8a7795.dtsi
@@ -652,7 +652,7 @@ hscif2: serial@e6560000 {
 			clock-names = "fck", "brg_int", "scif_clk";
 			dmas = <&dmac1 0x35>, <&dmac1 0x34>,
 			       <&dmac2 0x35>, <&dmac2 0x34>;
-			dma-names = "tx", "rx";
+			dma-names = "tx", "rx", "tx", "rx";
 			power-domains = <&sysc R8A7795_PD_ALWAYS_ON>;
 			resets = <&cpg 518>;
 			status = "disabled";
-- 
GitLab


From 058ad7b6aa5204d3af878415c7b946748ab34f7a Mon Sep 17 00:00:00 2001
From: Fabrizio Castro <fabrizio.castro@bp.renesas.com>
Date: Thu, 4 Oct 2018 09:53:10 +0100
Subject: [PATCH 0594/1890] dt-bindings: arm: Fix RZ/G2E part number

Fix RZ/G2E part number from its description.

Signed-off-by: Fabrizio Castro <fabrizio.castro@bp.renesas.com>
Reviewed-by: Biju Das <biju.das@bp.renesas.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
---
 Documentation/devicetree/bindings/arm/shmobile.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/arm/shmobile.txt b/Documentation/devicetree/bindings/arm/shmobile.txt
index f5e0f82fd5031..58c4256d37a39 100644
--- a/Documentation/devicetree/bindings/arm/shmobile.txt
+++ b/Documentation/devicetree/bindings/arm/shmobile.txt
@@ -27,7 +27,7 @@ SoCs:
     compatible = "renesas,r8a77470"
   - RZ/G2M (R8A774A1)
     compatible = "renesas,r8a774a1"
-  - RZ/G2E (RA8774C0)
+  - RZ/G2E (R8A774C0)
     compatible = "renesas,r8a774c0"
   - R-Car M1A (R8A77781)
     compatible = "renesas,r8a7778"
-- 
GitLab


From eab53fdfd60a84b0cc514d4f1f5d79226c76df01 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Thu, 18 Oct 2018 19:48:53 +0300
Subject: [PATCH 0595/1890] arm64: dts: renesas: condor: switch from EtherAVB
 to GEther

The "official" Condor boards have always been wired to mount NFS via
GEther, not EtherAVB -- the boards resoldered for EtherAVB were local
to Cogent Embedded, so we've been having an unpleasant situation where
a "normal" Condor board still can't mount NFS (unless an EtherAVB PHY
extension board is plugged in). Switch from EtherAVB to GEther at last!

Fixes: 8091788f3d38 ("arm64: dts: renesas: condor: add EtherAVB support")
Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
---
 .../boot/dts/renesas/r8a77980-condor.dts      | 47 ++++++++++---------
 1 file changed, 24 insertions(+), 23 deletions(-)

diff --git a/arch/arm64/boot/dts/renesas/r8a77980-condor.dts b/arch/arm64/boot/dts/renesas/r8a77980-condor.dts
index fe2e2c051cc93..5a7012be0d6ad 100644
--- a/arch/arm64/boot/dts/renesas/r8a77980-condor.dts
+++ b/arch/arm64/boot/dts/renesas/r8a77980-condor.dts
@@ -15,7 +15,7 @@ / {
 
 	aliases {
 		serial0 = &scif0;
-		ethernet0 = &avb;
+		ethernet0 = &gether;
 	};
 
 	chosen {
@@ -97,23 +97,6 @@ x1_clk: x1-clock {
 	};
 };
 
-&avb {
-	pinctrl-0 = <&avb_pins>;
-	pinctrl-names = "default";
-
-	phy-mode = "rgmii-id";
-	phy-handle = <&phy0>;
-	renesas,no-ether-link;
-	status = "okay";
-
-	phy0: ethernet-phy@0 {
-		rxc-skew-ps = <1500>;
-		reg = <0>;
-		interrupt-parent = <&gpio1>;
-		interrupts = <17 IRQ_TYPE_LEVEL_LOW>;
-	};
-};
-
 &canfd {
 	pinctrl-0 = <&canfd0_pins>;
 	pinctrl-names = "default";
@@ -139,6 +122,23 @@ &extalr_clk {
 	clock-frequency = <32768>;
 };
 
+&gether {
+	pinctrl-0 = <&gether_pins>;
+	pinctrl-names = "default";
+
+	phy-mode = "rgmii-id";
+	phy-handle = <&phy0>;
+	renesas,no-ether-link;
+	status = "okay";
+
+	phy0: ethernet-phy@0 {
+		rxc-skew-ps = <1500>;
+		reg = <0>;
+		interrupt-parent = <&gpio4>;
+		interrupts = <23 IRQ_TYPE_LEVEL_LOW>;
+	};
+};
+
 &i2c0 {
 	pinctrl-0 = <&i2c0_pins>;
 	pinctrl-names = "default";
@@ -236,16 +236,17 @@ &pcie_phy {
 };
 
 &pfc {
-	avb_pins: avb {
-		groups = "avb_mdio", "avb_rgmii";
-		function = "avb";
-	};
-
 	canfd0_pins: canfd0 {
 		groups = "canfd0_data_a";
 		function = "canfd0";
 	};
 
+	gether_pins: gether {
+		groups = "gether_mdio_a", "gether_rgmii",
+			 "gether_txcrefclk", "gether_txcrefclk_mega";
+		function = "gether";
+	};
+
 	i2c0_pins: i2c0 {
 		groups = "i2c0";
 		function = "i2c0";
-- 
GitLab


From 02522ad77fb7619615720147dc5da18024cad577 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 2 Nov 2018 13:09:08 +0100
Subject: [PATCH 0596/1890] s390: update defconfigs

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/configs/debug_defconfig       | 14 ++++-
 arch/s390/configs/performance_defconfig | 13 +++-
 arch/s390/defconfig                     | 79 +++++++++++++------------
 3 files changed, 63 insertions(+), 43 deletions(-)

diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 259d1698ac50a..c69cb04b7a594 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -64,6 +64,8 @@ CONFIG_NUMA=y
 CONFIG_PREEMPT=y
 CONFIG_HZ_100=y
 CONFIG_KEXEC_FILE=y
+CONFIG_EXPOLINE=y
+CONFIG_EXPOLINE_AUTO=y
 CONFIG_MEMORY_HOTPLUG=y
 CONFIG_MEMORY_HOTREMOVE=y
 CONFIG_KSM=y
@@ -84,9 +86,11 @@ CONFIG_PCI_DEBUG=y
 CONFIG_HOTPLUG_PCI=y
 CONFIG_HOTPLUG_PCI_S390=y
 CONFIG_CHSC_SCH=y
+CONFIG_VFIO_AP=m
 CONFIG_CRASH_DUMP=y
 CONFIG_BINFMT_MISC=m
 CONFIG_HIBERNATION=y
+CONFIG_PM_DEBUG=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_PACKET_DIAG=m
@@ -161,8 +165,6 @@ CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_CT_NETLINK=m
 CONFIG_NF_CT_NETLINK_TIMEOUT=m
 CONFIG_NF_TABLES=m
-CONFIG_NFT_EXTHDR=m
-CONFIG_NFT_META=m
 CONFIG_NFT_CT=m
 CONFIG_NFT_COUNTER=m
 CONFIG_NFT_LOG=m
@@ -365,6 +367,8 @@ CONFIG_NET_ACT_SKBEDIT=m
 CONFIG_NET_ACT_CSUM=m
 CONFIG_DNS_RESOLVER=y
 CONFIG_OPENVSWITCH=m
+CONFIG_VSOCKETS=m
+CONFIG_VIRTIO_VSOCKETS=m
 CONFIG_NETLINK_DIAG=m
 CONFIG_CGROUP_NET_PRIO=y
 CONFIG_BPF_JIT=y
@@ -461,6 +465,7 @@ CONFIG_PPTP=m
 CONFIG_PPPOL2TP=m
 CONFIG_PPP_ASYNC=m
 CONFIG_PPP_SYNC_TTY=m
+CONFIG_ISM=m
 CONFIG_INPUT_EVDEV=y
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
@@ -486,9 +491,12 @@ CONFIG_MLX4_INFINIBAND=m
 CONFIG_MLX5_INFINIBAND=m
 CONFIG_VFIO=m
 CONFIG_VFIO_PCI=m
+CONFIG_VFIO_MDEV=m
+CONFIG_VFIO_MDEV_DEVICE=m
 CONFIG_VIRTIO_PCI=m
 CONFIG_VIRTIO_BALLOON=m
 CONFIG_VIRTIO_INPUT=y
+CONFIG_S390_AP_IOMMU=y
 CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
 CONFIG_EXT4_FS_SECURITY=y
@@ -615,7 +623,6 @@ CONFIG_DEBUG_CREDENTIALS=y
 CONFIG_RCU_TORTURE_TEST=m
 CONFIG_RCU_CPU_STALL_TIMEOUT=300
 CONFIG_NOTIFIER_ERROR_INJECTION=m
-CONFIG_PM_NOTIFIER_ERROR_INJECT=m
 CONFIG_NETDEV_NOTIFIER_ERROR_INJECT=m
 CONFIG_FAULT_INJECTION=y
 CONFIG_FAILSLAB=y
@@ -727,3 +734,4 @@ CONFIG_APPLDATA_BASE=y
 CONFIG_KVM=m
 CONFIG_KVM_S390_UCONTROL=y
 CONFIG_VHOST_NET=m
+CONFIG_VHOST_VSOCK=m
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig
index 37fd60c20e22d..32f539dc9c192 100644
--- a/arch/s390/configs/performance_defconfig
+++ b/arch/s390/configs/performance_defconfig
@@ -65,6 +65,8 @@ CONFIG_NR_CPUS=512
 CONFIG_NUMA=y
 CONFIG_HZ_100=y
 CONFIG_KEXEC_FILE=y
+CONFIG_EXPOLINE=y
+CONFIG_EXPOLINE_AUTO=y
 CONFIG_MEMORY_HOTPLUG=y
 CONFIG_MEMORY_HOTREMOVE=y
 CONFIG_KSM=y
@@ -82,9 +84,11 @@ CONFIG_PCI=y
 CONFIG_HOTPLUG_PCI=y
 CONFIG_HOTPLUG_PCI_S390=y
 CONFIG_CHSC_SCH=y
+CONFIG_VFIO_AP=m
 CONFIG_CRASH_DUMP=y
 CONFIG_BINFMT_MISC=m
 CONFIG_HIBERNATION=y
+CONFIG_PM_DEBUG=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_PACKET_DIAG=m
@@ -159,8 +163,6 @@ CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_CT_NETLINK=m
 CONFIG_NF_CT_NETLINK_TIMEOUT=m
 CONFIG_NF_TABLES=m
-CONFIG_NFT_EXTHDR=m
-CONFIG_NFT_META=m
 CONFIG_NFT_CT=m
 CONFIG_NFT_COUNTER=m
 CONFIG_NFT_LOG=m
@@ -362,6 +364,8 @@ CONFIG_NET_ACT_SKBEDIT=m
 CONFIG_NET_ACT_CSUM=m
 CONFIG_DNS_RESOLVER=y
 CONFIG_OPENVSWITCH=m
+CONFIG_VSOCKETS=m
+CONFIG_VIRTIO_VSOCKETS=m
 CONFIG_NETLINK_DIAG=m
 CONFIG_CGROUP_NET_PRIO=y
 CONFIG_BPF_JIT=y
@@ -458,6 +462,7 @@ CONFIG_PPTP=m
 CONFIG_PPPOL2TP=m
 CONFIG_PPP_ASYNC=m
 CONFIG_PPP_SYNC_TTY=m
+CONFIG_ISM=m
 CONFIG_INPUT_EVDEV=y
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
@@ -483,9 +488,12 @@ CONFIG_MLX4_INFINIBAND=m
 CONFIG_MLX5_INFINIBAND=m
 CONFIG_VFIO=m
 CONFIG_VFIO_PCI=m
+CONFIG_VFIO_MDEV=m
+CONFIG_VFIO_MDEV_DEVICE=m
 CONFIG_VIRTIO_PCI=m
 CONFIG_VIRTIO_BALLOON=m
 CONFIG_VIRTIO_INPUT=y
+CONFIG_S390_AP_IOMMU=y
 CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
 CONFIG_EXT4_FS_SECURITY=y
@@ -666,3 +674,4 @@ CONFIG_APPLDATA_BASE=y
 CONFIG_KVM=m
 CONFIG_KVM_S390_UCONTROL=y
 CONFIG_VHOST_NET=m
+CONFIG_VHOST_VSOCK=m
diff --git a/arch/s390/defconfig b/arch/s390/defconfig
index 7cb6a52f727da..4d58a92b5d979 100644
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -26,14 +26,23 @@ CONFIG_CGROUP_CPUACCT=y
 CONFIG_CGROUP_PERF=y
 CONFIG_NAMESPACES=y
 CONFIG_USER_NS=y
+CONFIG_CHECKPOINT_RESTORE=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_EXPERT=y
 # CONFIG_SYSFS_SYSCALL is not set
-CONFIG_CHECKPOINT_RESTORE=y
 CONFIG_BPF_SYSCALL=y
 CONFIG_USERFAULTFD=y
 # CONFIG_COMPAT_BRK is not set
 CONFIG_PROFILING=y
+CONFIG_LIVEPATCH=y
+CONFIG_NR_CPUS=256
+CONFIG_NUMA=y
+CONFIG_HZ_100=y
+CONFIG_KEXEC_FILE=y
+CONFIG_CRASH_DUMP=y
+CONFIG_HIBERNATION=y
+CONFIG_PM_DEBUG=y
+CONFIG_CMM=m
 CONFIG_OPROFILE=y
 CONFIG_KPROBES=y
 CONFIG_JUMP_LABEL=y
@@ -44,11 +53,7 @@ CONFIG_BLK_DEV_INTEGRITY=y
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_IBM_PARTITION=y
 CONFIG_DEFAULT_DEADLINE=y
-CONFIG_LIVEPATCH=y
-CONFIG_NR_CPUS=256
-CONFIG_NUMA=y
-CONFIG_HZ_100=y
-CONFIG_KEXEC_FILE=y
+CONFIG_BINFMT_MISC=m
 CONFIG_MEMORY_HOTPLUG=y
 CONFIG_MEMORY_HOTREMOVE=y
 CONFIG_KSM=y
@@ -60,9 +65,6 @@ CONFIG_ZBUD=m
 CONFIG_ZSMALLOC=m
 CONFIG_ZSMALLOC_STAT=y
 CONFIG_IDLE_PAGE_TRACKING=y
-CONFIG_CRASH_DUMP=y
-CONFIG_BINFMT_MISC=m
-CONFIG_HIBERNATION=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
@@ -98,6 +100,7 @@ CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_VIRTIO_BLK=y
 CONFIG_SCSI=y
+# CONFIG_SCSI_MQ_DEFAULT is not set
 CONFIG_BLK_DEV_SD=y
 CONFIG_CHR_DEV_ST=y
 CONFIG_BLK_DEV_SR=y
@@ -131,6 +134,7 @@ CONFIG_EQUALIZER=m
 CONFIG_TUN=m
 CONFIG_VIRTIO_NET=y
 # CONFIG_NET_VENDOR_ALACRITECH is not set
+# CONFIG_NET_VENDOR_AURORA is not set
 # CONFIG_NET_VENDOR_CORTINA is not set
 # CONFIG_NET_VENDOR_SOLARFLARE is not set
 # CONFIG_NET_VENDOR_SOCIONEXT is not set
@@ -157,33 +161,6 @@ CONFIG_TMPFS=y
 CONFIG_TMPFS_POSIX_ACL=y
 CONFIG_HUGETLBFS=y
 # CONFIG_NETWORK_FILESYSTEMS is not set
-CONFIG_DEBUG_INFO=y
-CONFIG_DEBUG_INFO_DWARF4=y
-CONFIG_GDB_SCRIPTS=y
-CONFIG_UNUSED_SYMBOLS=y
-CONFIG_DEBUG_SECTION_MISMATCH=y
-CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_PAGEALLOC=y
-CONFIG_DETECT_HUNG_TASK=y
-CONFIG_PANIC_ON_OOPS=y
-CONFIG_PROVE_LOCKING=y
-CONFIG_LOCK_STAT=y
-CONFIG_DEBUG_LOCKDEP=y
-CONFIG_DEBUG_ATOMIC_SLEEP=y
-CONFIG_DEBUG_LIST=y
-CONFIG_DEBUG_SG=y
-CONFIG_DEBUG_NOTIFIERS=y
-CONFIG_RCU_CPU_STALL_TIMEOUT=60
-CONFIG_LATENCYTOP=y
-CONFIG_SCHED_TRACER=y
-CONFIG_FTRACE_SYSCALLS=y
-CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y
-CONFIG_STACK_TRACER=y
-CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_FUNCTION_PROFILER=y
-# CONFIG_RUNTIME_TESTING_MENU is not set
-CONFIG_S390_PTDUMP=y
 CONFIG_CRYPTO_CRYPTD=m
 CONFIG_CRYPTO_AUTHENC=m
 CONFIG_CRYPTO_TEST=m
@@ -193,6 +170,7 @@ CONFIG_CRYPTO_CBC=y
 CONFIG_CRYPTO_CFB=m
 CONFIG_CRYPTO_CTS=m
 CONFIG_CRYPTO_LRW=m
+CONFIG_CRYPTO_OFB=m
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_XTS=m
 CONFIG_CRYPTO_CMAC=m
@@ -231,7 +209,6 @@ CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_ZCRYPT=m
-CONFIG_ZCRYPT_MULTIDEVNODES=y
 CONFIG_PKEY=m
 CONFIG_CRYPTO_PAES_S390=m
 CONFIG_CRYPTO_SHA1_S390=m
@@ -247,4 +224,30 @@ CONFIG_CRC7=m
 # CONFIG_XZ_DEC_ARM is not set
 # CONFIG_XZ_DEC_ARMTHUMB is not set
 # CONFIG_XZ_DEC_SPARC is not set
-CONFIG_CMM=m
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF4=y
+CONFIG_GDB_SCRIPTS=y
+CONFIG_UNUSED_SYMBOLS=y
+CONFIG_DEBUG_SECTION_MISMATCH=y
+CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_PAGEALLOC=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_PANIC_ON_OOPS=y
+CONFIG_PROVE_LOCKING=y
+CONFIG_LOCK_STAT=y
+CONFIG_DEBUG_LOCKDEP=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+CONFIG_DEBUG_LIST=y
+CONFIG_DEBUG_SG=y
+CONFIG_DEBUG_NOTIFIERS=y
+CONFIG_RCU_CPU_STALL_TIMEOUT=60
+CONFIG_LATENCYTOP=y
+CONFIG_SCHED_TRACER=y
+CONFIG_FTRACE_SYSCALLS=y
+CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y
+CONFIG_STACK_TRACER=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_FUNCTION_PROFILER=y
+# CONFIG_RUNTIME_TESTING_MENU is not set
+CONFIG_S390_PTDUMP=y
-- 
GitLab


From f55275bcc72948056387041abe9fce1c54300adc Mon Sep 17 00:00:00 2001
From: Olof Johansson <olof@lixom.net>
Date: Mon, 5 Nov 2018 06:34:09 -0800
Subject: [PATCH 0597/1890] ARM: defconfig: Disable PREEMPT again on  multi_v7

I should have let this soak for a while in linux-next, since we have at
least one board that hit a regression from it. Revert from 4.20-rc, and
we'll queue it for next merge window once regression is fixed.

This reverts commit 513eb98595522bc0cb83831a9daee1d5738e66f1.

Signed-off-by: Olof Johansson <olof@lixom.net>
---
 arch/arm/configs/multi_v7_defconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index 1c7616815a86a..63af6234c1b69 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -1,7 +1,6 @@
 CONFIG_SYSVIPC=y
 CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
-CONFIG_PREEMPT=y
 CONFIG_CGROUPS=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_EMBEDDED=y
-- 
GitLab


From ee474b81fe5aa5dc0faae920bf66240fbf55f891 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 1 Nov 2018 23:29:28 +0900
Subject: [PATCH 0598/1890] tracing/kprobes: Fix strpbrk() argument order

Fix strpbrk()'s argument order, it must pass acceptable string
in 2nd argument. Note that this can cause a kernel panic where
it recovers backup character to code->data.

Link: http://lkml.kernel.org/r/154108256792.2604.1816052586385217811.stgit@devbox

Fixes: a6682814f371 ("tracing/kprobes: Allow kprobe-events to record module symbol")
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace_probe.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 3ef15a6683c00..bd30e9398d2a8 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -535,7 +535,7 @@ int traceprobe_update_arg(struct probe_arg *arg)
 			if (code[1].op != FETCH_OP_IMM)
 				return -EINVAL;
 
-			tmp = strpbrk("+-", code->data);
+			tmp = strpbrk(code->data, "+-");
 			if (tmp)
 				c = *tmp;
 			ret = traceprobe_split_symbol_offset(code->data,
-- 
GitLab


From 79ef68c7e1f665578e005b454480b6eca60edabe Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 5 Nov 2018 12:23:40 -0300
Subject: [PATCH 0599/1890] perf augmented_syscalls: Start collecting pathnames
 in the BPF program

This is the start of having the raw_syscalls:sys_enter BPF handler
collecting pointer arguments, namely pathnames, and with two syscalls
that have that pointer in different arguments, "open" as it as its first
argument, "openat" as the second.

With this in place the existing beautifiers in 'perf trace' works, those
args are shown instead of just the pointer that comes with the syscalls
tracepoints.

This also serves to show and document pitfalls in the process of using
just that place in the kernel (raw_syscalls:sys_enter) plus tables
provided by userspace to collect syscall pointer arguments.

One is the need to use a barrier, as suggested by Edward, to avoid clang
optimizations that make the kernel BPF verifier to refuse loading our
pointer contents collector.

The end result should be a generic eBPF program that works in all
architectures, with the differences amongst archs resolved by the
userspace component, 'perf trace', that should get all its tables
created automatically from the kernel components where they are defined,
via string table constructors for things not expressed in BTF/DWARF
(enums, structs, etc), and otherwise using those observability files
(BTF).

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David Ahern <dsahern@gmail.com>
Cc: Edward Cree <ecree@solarflare.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Martin KaFai Lau <kafai@fb.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Cc: Yonghong Song <yhs@fb.com>
Link: https://lkml.kernel.org/n/tip-37dz54pmotgpnwg9tb6zuk9j@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../examples/bpf/augmented_raw_syscalls.c     | 72 +++++++++++++++++++
 1 file changed, 72 insertions(+)

diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c
index cde91c34b1016..90a19336310b0 100644
--- a/tools/perf/examples/bpf/augmented_raw_syscalls.c
+++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c
@@ -37,15 +37,87 @@ struct syscall_exit_args {
 	long		   ret;
 };
 
+struct augmented_filename {
+	unsigned int	size;
+	int		reserved;
+	char		value[256];
+};
+
+#define SYS_OPEN 2
+#define SYS_OPENAT 257
+
 SEC("raw_syscalls:sys_enter")
 int sys_enter(struct syscall_enter_args *args)
 {
 	struct {
 		struct syscall_enter_args args;
+		struct augmented_filename filename;
 	} augmented_args;
 	unsigned int len = sizeof(augmented_args);
+	const void *filename_arg = NULL;
 
 	probe_read(&augmented_args.args, sizeof(augmented_args.args), args);
+	/*
+	 * Yonghong and Edward Cree sayz:
+	 *
+	 * https://www.spinics.net/lists/netdev/msg531645.html
+	 *
+	 * >>   R0=inv(id=0) R1=inv2 R6=ctx(id=0,off=0,imm=0) R7=inv64 R10=fp0,call_-1
+	 * >> 10: (bf) r1 = r6
+	 * >> 11: (07) r1 += 16
+	 * >> 12: (05) goto pc+2
+	 * >> 15: (79) r3 = *(u64 *)(r1 +0)
+	 * >> dereference of modified ctx ptr R1 off=16 disallowed
+	 * > Aha, we at least got a different error message this time.
+	 * > And indeed llvm has done that optimisation, rather than the more obvious
+	 * > 11: r3 = *(u64 *)(r1 +16)
+	 * > because it wants to have lots of reads share a single insn.  You may be able
+	 * > to defeat that optimisation by adding compiler barriers, idk.  Maybe someone
+	 * > with llvm knowledge can figure out how to stop it (ideally, llvm would know
+	 * > when it's generating for bpf backend and not do that).  -O0?  Â¯\_(ãƒ„)_/Â¯
+	 *
+	 * The optimization mostly likes below:
+	 *
+	 *	br1:
+	 * 	...
+	 *	r1 += 16
+	 *	goto merge
+	 *	br2:
+	 *	...
+	 *	r1 += 20
+	 *	goto merge
+	 *	merge:
+	 *	*(u64 *)(r1 + 0)
+	 *
+	 * The compiler tries to merge common loads. There is no easy way to
+	 * stop this compiler optimization without turning off a lot of other
+	 * optimizations. The easiest way is to add barriers:
+	 *
+	 * 	 __asm__ __volatile__("": : :"memory")
+	 *
+	 * 	 after the ctx memory access to prevent their down stream merging.
+	 */
+	switch (augmented_args.args.syscall_nr) {
+	case SYS_OPEN:	 filename_arg = (const void *)args->args[0];
+			__asm__ __volatile__("": : :"memory");
+			 break;
+	case SYS_OPENAT: filename_arg = (const void *)args->args[1];
+			 break;
+	}
+
+	if (filename_arg != NULL) {
+		augmented_args.filename.reserved = 0;
+		augmented_args.filename.size = probe_read_str(&augmented_args.filename.value,
+							      sizeof(augmented_args.filename.value),
+							      filename_arg);
+		if (augmented_args.filename.size < sizeof(augmented_args.filename.value)) {
+			len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size;
+			len &= sizeof(augmented_args.filename.value) - 1;
+		}
+	} else {
+		len = sizeof(augmented_args.args);
+	}
+
 	perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len);
 	return 0;
 }
-- 
GitLab


From b42967dcac1d4f5b059ec25568136462bcb051fe Mon Sep 17 00:00:00 2001
From: Yi Wang <wang.yi59@zte.com.cn>
Date: Mon, 29 Oct 2018 15:17:31 +0800
Subject: [PATCH 0600/1890] x86/hyper-v: Fix indentation in
 hv_do_fast_hypercall16()

Remove the surplus TAB in hv_do_fast_hypercall16().

Signed-off-by: Yi Wang <wang.yi59@zte.com.cn>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: kys@microsoft.com
Cc: haiyangz@microsoft.com
Cc: sthemmin@microsoft.com
Cc: bp@alien8.de
Cc: hpa@zytor.com
Cc: devel@linuxdriverproject.org
Cc: zhong.weidong@zte.com.cn
Link: https://lkml.kernel.org/r/1540797451-2792-1-git-send-email-wang.yi59@zte.com.cn
---
 arch/x86/include/asm/mshyperv.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 0d6271cce198d..1d0a7778e1631 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -232,7 +232,7 @@ static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
 				      : "cc");
 	}
 #endif
-		return hv_status;
+	return hv_status;
 }
 
 /*
-- 
GitLab


From 437e88ab8f9e2ad90576ab74c4cf8f527bbf51cd Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Tue, 23 Oct 2018 16:11:25 -0700
Subject: [PATCH 0601/1890] x86/build: Remove -pipe from KBUILD_CFLAGS

Commit 77b0bf55bc67 ("kbuild/Makefile: Prepare for using macros in
inline assembly code to work around asm() related GCC inlining bugs")
added -Wa,- to KBUILD_CFLAGS, which breaks compiling with Clang (hangs
indefinitely at compiling init/main.o). This happens because while Clang
accepts -pipe (and has it documented in its list of supported flags), it
silently ignores it after this 2010 commit (thanks to Nick Desaulniers
for tracking this down), meaning that gas just infinitely waits for
stdin and never receives it.

https://github.com/llvm-mirror/clang/commit/c19a12dc3d441bec62eed55e312b76c12d6d9022

Initially, I had suggested just add -Wa,- to KBUILD_CFLAGS when GCC was
being used but that was before realizing it is because Clang doesn't do
anything with -pipe. H. Peter Anvin suggested checking to see if -pipe
gives us any gains out of GCC. Turns out it might actually be hurting:

With -pipe:

real    3m40.813s
real    3m44.449s
real    3m39.648s

Without -pipe:

real    3m38.492s
real    3m38.335s
real    3m38.975s

The issue of -Wa,- being passed along to gas without -pipe being
supported should still probably be fixed on the LLVM side (open issue:
https://bugs.llvm.org/show_bug.cgi?id=39410) but this is not as much of
a workaround anymore since it helps both GCC and Clang.

Suggested-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Nadav Amit <namit@vmware.com>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Kees Cook <keescook@chromium.org>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Link: https://github.com/ClangBuiltLinux/linux/issues/213
Link: https://lkml.kernel.org/r/20181023231125.27976-1-natechancellor@gmail.com
---
 arch/x86/Makefile | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 5b562e4640099..88398fdf81291 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -213,8 +213,6 @@ ifdef CONFIG_X86_64
 KBUILD_LDFLAGS += $(call ld-option, -z max-page-size=0x200000)
 endif
 
-# Speed up the build
-KBUILD_CFLAGS += -pipe
 # Workaround for a gcc prelease that unfortunately was shipped in a suse release
 KBUILD_CFLAGS += -Wno-sign-compare
 #
@@ -239,7 +237,7 @@ archheaders:
 archmacros:
 	$(Q)$(MAKE) $(build)=arch/x86/kernel arch/x86/kernel/macros.s
 
-ASM_MACRO_FLAGS = -Wa,arch/x86/kernel/macros.s -Wa,-
+ASM_MACRO_FLAGS = -Wa,arch/x86/kernel/macros.s
 export ASM_MACRO_FLAGS
 KBUILD_CFLAGS += $(ASM_MACRO_FLAGS)
 
-- 
GitLab


From 163c8d54a997153ee1a1e07fcac087492ad85b37 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Mon, 5 Nov 2018 07:36:28 +0100
Subject: [PATCH 0602/1890] compiler: remove __no_sanitize_address_or_inline
 again

The __no_sanitize_address_or_inline and __no_kasan_or_inline defines
are almost identical. The only difference is that __no_kasan_or_inline
does not have the 'notrace' attribute.

To be able to replace __no_sanitize_address_or_inline with the older
definition, add 'notrace' to __no_kasan_or_inline and change to two
users of __no_sanitize_address_or_inline in the s390 code.

The 'notrace' option is necessary for e.g. the __load_psw_mask function
in arch/s390/include/asm/processor.h. Without the option it is possible
to trace __load_psw_mask which leads to kernel stack overflow.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Pointed-out-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/s390/include/asm/processor.h |  4 ++--
 include/linux/compiler-gcc.h      | 12 ------------
 include/linux/compiler.h          |  2 +-
 3 files changed, 3 insertions(+), 15 deletions(-)

diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 302795c47c06c..81038ab357ce9 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -236,7 +236,7 @@ static inline unsigned long current_stack_pointer(void)
 	return sp;
 }
 
-static __no_sanitize_address_or_inline unsigned short stap(void)
+static __no_kasan_or_inline unsigned short stap(void)
 {
 	unsigned short cpu_address;
 
@@ -330,7 +330,7 @@ static inline void __load_psw(psw_t psw)
  * Set PSW mask to specified value, while leaving the
  * PSW addr pointing to the next instruction.
  */
-static __no_sanitize_address_or_inline void __load_psw_mask(unsigned long mask)
+static __no_kasan_or_inline void __load_psw_mask(unsigned long mask)
 {
 	unsigned long addr;
 	psw_t psw;
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index c0f5db3a96217..2010493e10408 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -143,18 +143,6 @@
 #define KASAN_ABI_VERSION 3
 #endif
 
-/*
- * Because __no_sanitize_address conflicts with inlining:
- *   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368
- * we do one or the other. 
- */
-#ifdef CONFIG_KASAN
-#define __no_sanitize_address_or_inline					\
-	__no_sanitize_address __maybe_unused notrace
-#else
-#define __no_sanitize_address_or_inline inline
-#endif
-
 #if GCC_VERSION >= 50100
 #define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1
 #endif
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 18c80cfa4fc48..06396c1cf127f 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -189,7 +189,7 @@ void __read_once_size(const volatile void *p, void *res, int size)
  * 	https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368
  * '__maybe_unused' allows us to avoid defined-but-not-used warnings.
  */
-# define __no_kasan_or_inline __no_sanitize_address __maybe_unused
+# define __no_kasan_or_inline __no_sanitize_address notrace __maybe_unused
 #else
 # define __no_kasan_or_inline __always_inline
 #endif
-- 
GitLab


From 21b42eb46834f6245a55ac77bdf3e14c034e2864 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 5 Nov 2018 16:51:49 +0900
Subject: [PATCH 0603/1890] kbuild: rpm-pkg: fix binrpm-pkg breakage when O= is
 used

Zhenzhong Duan reported that running 'make O=/build/kernel binrpm-pkg'
failed with the following errors:

  Running 'make O=/build/kernel binrpm-pkg' failed with below two errors.

  Makefile:600: include/config/auto.conf: No such file or directory

  + cp make -C /mnt/root/kernel O=/build/kernel image_name make -f
  /mnt/root/kernel/Makefile ...
  cp: invalid option -- 'C'
  Try 'cp --help' for more information.

Prior to commit 80463f1b7bf9 ("kbuild: add --include-dir flag only
for out-of-tree build"), both srctree and objtree were added to
--include-dir redundantly, and the wrong code 'make image_name'
was working by relying on that. Now, the potential issue that had
previously been hidden just showed up.

'make image_name' recurses to the generated $(objtree)/Makefile and
ends up with running in srctree, which is incorrect. It should be
invoked with '-f $srctree/Makefile' (or KBUILD_SRC=) to be executed
in objtree.

Fixes: 80463f1b7bf9 ("kbuild: add --include-dir flag only for out-of-tree build")
Reported-by: Zhenzhong Duan <zhenzhong.duan@oracle.com>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 scripts/package/mkspec | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/scripts/package/mkspec b/scripts/package/mkspec
index e05646dc24dcf..009147d4718ee 100755
--- a/scripts/package/mkspec
+++ b/scripts/package/mkspec
@@ -12,6 +12,7 @@
 # how we were called determines which rpms we build and how we build them
 if [ "$1" = prebuilt ]; then
 	S=DEL
+	MAKE="$MAKE -f $srctree/Makefile"
 else
 	S=
 fi
@@ -78,19 +79,19 @@ $S	%prep
 $S	%setup -q
 $S
 $S	%build
-$S	make %{?_smp_mflags} KBUILD_BUILD_VERSION=%{release}
+$S	$MAKE %{?_smp_mflags} KBUILD_BUILD_VERSION=%{release}
 $S
 	%install
 	mkdir -p %{buildroot}/boot
 	%ifarch ia64
 	mkdir -p %{buildroot}/boot/efi
-	cp \$(make image_name) %{buildroot}/boot/efi/vmlinuz-$KERNELRELEASE
+	cp \$($MAKE image_name) %{buildroot}/boot/efi/vmlinuz-$KERNELRELEASE
 	ln -s efi/vmlinuz-$KERNELRELEASE %{buildroot}/boot/
 	%else
-	cp \$(make image_name) %{buildroot}/boot/vmlinuz-$KERNELRELEASE
+	cp \$($MAKE image_name) %{buildroot}/boot/vmlinuz-$KERNELRELEASE
 	%endif
-$M	make %{?_smp_mflags} INSTALL_MOD_PATH=%{buildroot} KBUILD_SRC= modules_install
-	make %{?_smp_mflags} INSTALL_HDR_PATH=%{buildroot}/usr KBUILD_SRC= headers_install
+$M	$MAKE %{?_smp_mflags} INSTALL_MOD_PATH=%{buildroot} modules_install
+	$MAKE %{?_smp_mflags} INSTALL_HDR_PATH=%{buildroot}/usr headers_install
 	cp System.map %{buildroot}/boot/System.map-$KERNELRELEASE
 	cp .config %{buildroot}/boot/config-$KERNELRELEASE
 	bzip2 -9 --keep vmlinux
-- 
GitLab


From 02826a6ba301b72461c3706e1cc66d5571cd327e Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 5 Nov 2018 16:52:34 +0900
Subject: [PATCH 0604/1890] kbuild: deb-pkg: fix bindeb-pkg breakage when O= is
 used

Ard Biesheuvel reports bindeb-pkg with O= option is broken in the
following way:

  ...
    LD [M]  sound/soc/rockchip/snd-soc-rk3399-gru-sound.ko
    LD [M]  sound/soc/rockchip/snd-soc-rockchip-pcm.ko
    LD [M]  sound/soc/rockchip/snd-soc-rockchip-rt5645.ko
    LD [M]  sound/soc/rockchip/snd-soc-rockchip-spdif.ko
    LD [M]  sound/soc/sh/rcar/snd-soc-rcar.ko
   fakeroot -u debian/rules binary
  make KERNELRELEASE=4.19.0-12677-g19beffaf7a99-dirty ARCH=arm64 KBUILD_SRC= intdeb-pkg
  /bin/bash /home/ard/linux/scripts/package/builddeb
  Makefile:600: include/config/auto.conf: No such file or directory
  ***
  *** Configuration file ".config" not found!
  ***
  *** Please run some configurator (e.g. "make oldconfig" or
  *** "make menuconfig" or "make xconfig").
  ***
  make[12]: *** [syncconfig] Error 1
  make[11]: *** [syncconfig] Error 2
  make[10]: *** [include/config/auto.conf] Error 2
  make[9]: *** [__sub-make] Error 2
  ...

Prior to commit 80463f1b7bf9 ("kbuild: add --include-dir flag only
for out-of-tree build"), both srctree and objtree were added to
--include-dir redundantly, and the wrong code '$MAKE image_name'
was working by relying on that. Now, the potential issue that had
previously been hidden just showed up.

'$MAKE image_name' recurses to the generated $(objtree)/Makefile and
ends up with running in srctree, which is incorrect. It should be
invoked with '-f $srctree/Makefile' (or KBUILD_SRC=) to be executed
in objtree.

Fixes: 80463f1b7bf9 ("kbuild: add --include-dir flag only for out-of-tree build")
Reported-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 scripts/package/builddeb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index 90c9a8ac7adb8..0b31f4f1f92cf 100755
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -81,7 +81,7 @@ else
 	cp System.map "$tmpdir/boot/System.map-$version"
 	cp $KCONFIG_CONFIG "$tmpdir/boot/config-$version"
 fi
-cp "$($MAKE -s image_name)" "$tmpdir/$installed_image_path"
+cp "$($MAKE -s -f $srctree/Makefile image_name)" "$tmpdir/$installed_image_path"
 
 if grep -q "^CONFIG_OF=y" $KCONFIG_CONFIG ; then
 	# Only some architectures with OF support have this target
-- 
GitLab


From 5d7a5bcb67c70cbc904057ef52d3fcfeb24420bb Mon Sep 17 00:00:00 2001
From: Frank Sorenson <sorenson@redhat.com>
Date: Tue, 30 Oct 2018 15:10:40 -0500
Subject: [PATCH 0605/1890] sunrpc: correct the computation for page_ptr when
 truncating

When truncating the encode buffer, the page_ptr is getting
advanced, causing the next page to be skipped while encoding.
The page is still included in the response, so the response
contains a page of bogus data.

We need to adjust the page_ptr backwards to ensure we encode
the next page into the correct place.

We saw this triggered when concurrent directory modifications caused
nfsd4_encode_direct_fattr() to return nfserr_noent, and the resulting
call to xdr_truncate_encode() corrupted the READDIR reply.

Signed-off-by: Frank Sorenson <sorenson@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/xdr.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 2bbb8d38d2bf5..5cfb9e0a18dcb 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -673,11 +673,10 @@ void xdr_truncate_encode(struct xdr_stream *xdr, size_t len)
 		WARN_ON_ONCE(xdr->iov);
 		return;
 	}
-	if (fraglen) {
+	if (fraglen)
 		xdr->end = head->iov_base + head->iov_len;
-		xdr->page_ptr--;
-	}
 	/* (otherwise assume xdr->end is already set) */
+	xdr->page_ptr--;
 	head->iov_len = len;
 	buf->len = len;
 	xdr->p = head->iov_base + head->iov_len;
-- 
GitLab


From 6fce3a406108ee6c8a61e2a33e52e9198a626ea0 Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Thu, 4 Oct 2018 11:37:00 +0200
Subject: [PATCH 0606/1890] drm/etnaviv: fix bogus fence complete check in
 timeout handler

The GPU hardware fences and the job out-fences are on different timelines
so it's wrong to compare them. Fix this by only looking at the out-fence.

Cc: <stable@vger.kernel.org>
Fixes: 2c83a726d6fb (drm/etnaviv: bring back progress check in job
                     timeout handler)
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
---
 drivers/gpu/drm/etnaviv/etnaviv_sched.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
index 69e9b431bf1f0..e5a9fae31ab7b 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
@@ -93,7 +93,7 @@ static void etnaviv_sched_timedout_job(struct drm_sched_job *sched_job)
 	 * If the GPU managed to complete this jobs fence, the timout is
 	 * spurious. Bail out.
 	 */
-	if (fence_completed(gpu, submit->out_fence->seqno))
+	if (dma_fence_is_signaled(submit->out_fence))
 		return;
 
 	/*
-- 
GitLab


From c3537fc251503af18085b8f84126d13743663970 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Mon, 1 Oct 2018 12:59:26 -0700
Subject: [PATCH 0607/1890] perf evlist: Move perf_evsel__reset_weak_group into
 evlist

- Move the function from builtin-stat to evlist for reuse
- Rename to evlist to match purpose better
- Pass the evlist as first argument.
- No functional changes

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: http://lkml.kernel.org/r/20181001195927.14211-1-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c | 28 +---------------------------
 tools/perf/util/evlist.c  | 27 +++++++++++++++++++++++++++
 tools/perf/util/evlist.h  |  3 +++
 3 files changed, 31 insertions(+), 27 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index d1028d7755bbc..a635abfa77b6a 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -383,32 +383,6 @@ static bool perf_evsel__should_store_id(struct perf_evsel *counter)
 	return STAT_RECORD || counter->attr.read_format & PERF_FORMAT_ID;
 }
 
-static struct perf_evsel *perf_evsel__reset_weak_group(struct perf_evsel *evsel)
-{
-	struct perf_evsel *c2, *leader;
-	bool is_open = true;
-
-	leader = evsel->leader;
-	pr_debug("Weak group for %s/%d failed\n",
-			leader->name, leader->nr_members);
-
-	/*
-	 * for_each_group_member doesn't work here because it doesn't
-	 * include the first entry.
-	 */
-	evlist__for_each_entry(evsel_list, c2) {
-		if (c2 == evsel)
-			is_open = false;
-		if (c2->leader == leader) {
-			if (is_open)
-				perf_evsel__close(c2);
-			c2->leader = c2;
-			c2->nr_members = 0;
-		}
-	}
-	return leader;
-}
-
 static bool is_target_alive(struct target *_target,
 			    struct thread_map *threads)
 {
@@ -477,7 +451,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
 			if ((errno == EINVAL || errno == EBADF) &&
 			    counter->leader != counter &&
 			    counter->weak_group) {
-				counter = perf_evsel__reset_weak_group(counter);
+				counter = perf_evlist__reset_weak_group(evsel_list, counter);
 				goto try_again;
 			}
 
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index e88e6f9b1463f..668d2a9ef0f4b 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1810,3 +1810,30 @@ void perf_evlist__force_leader(struct perf_evlist *evlist)
 		leader->forced_leader = true;
 	}
 }
+
+struct perf_evsel *perf_evlist__reset_weak_group(struct perf_evlist *evsel_list,
+						 struct perf_evsel *evsel)
+{
+	struct perf_evsel *c2, *leader;
+	bool is_open = true;
+
+	leader = evsel->leader;
+	pr_debug("Weak group for %s/%d failed\n",
+			leader->name, leader->nr_members);
+
+	/*
+	 * for_each_group_member doesn't work here because it doesn't
+	 * include the first entry.
+	 */
+	evlist__for_each_entry(evsel_list, c2) {
+		if (c2 == evsel)
+			is_open = false;
+		if (c2->leader == leader) {
+			if (is_open)
+				perf_evsel__close(c2);
+			c2->leader = c2;
+			c2->nr_members = 0;
+		}
+	}
+	return leader;
+}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index dc66436add98a..9919eed6d15bc 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -312,4 +312,7 @@ bool perf_evlist__exclude_kernel(struct perf_evlist *evlist);
 
 void perf_evlist__force_leader(struct perf_evlist *evlist);
 
+struct perf_evsel *perf_evlist__reset_weak_group(struct perf_evlist *evlist,
+						 struct perf_evsel *evsel);
+
 #endif /* __PERF_EVLIST_H */
-- 
GitLab


From cf99ad1424c54fc84b84d3a3deb57a48c340c30a Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Mon, 1 Oct 2018 12:59:27 -0700
Subject: [PATCH 0608/1890] perf record: Support weak groups

Implement a weak group fallback for 'perf record', similar to the
existing 'perf stat' support.  This allows to use groups that might be
longer than the available counters without failing.

Before:

  $ perf record  -e '{cycles,cache-misses,cache-references,cpu_clk_unhalted.thread,cycles,cycles,cycles}' -a sleep 1
  Error:
  The sys_perf_event_open() syscall returned with 22 (Invalid argument) for event (cycles).
  /bin/dmesg | grep -i perf may provide additional information.

After:

  $ ./perf record  -e '{cycles,cache-misses,cache-references,cpu_clk_unhalted.thread,cycles,cycles,cycles}:W' -a sleep 1
  WARNING: No sample_id_all support, falling back to unordered processing
  [ perf record: Woken up 3 times to write data ]
  [ perf record: Captured and wrote 8.136 MB perf.data (134069 samples) ]

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: http://lkml.kernel.org/r/20181001195927.14211-2-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-list.txt | 1 -
 tools/perf/builtin-record.c            | 7 ++++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index 236b9b97dfdb1..667c14e56031b 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -55,7 +55,6 @@ counted. The following modifiers exist:
  S - read sample value (PERF_SAMPLE_READ)
  D - pin the event to the PMU
  W - group is weak and will fallback to non-group if not schedulable,
-     only supported in 'perf stat' for now.
 
 The 'p' modifier can be used for specifying how precise the instruction
 address should be. The 'p' modifier can be specified multiple times:
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 10cf889c6d75d..488779bc4c8d2 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -391,7 +391,12 @@ static int record__open(struct record *rec)
 					ui__warning("%s\n", msg);
 				goto try_again;
 			}
-
+			if ((errno == EINVAL || errno == EBADF) &&
+			    pos->leader != pos &&
+			    pos->weak_group) {
+			        pos = perf_evlist__reset_weak_group(evlist, pos);
+				goto try_again;
+			}
 			rc = -errno;
 			perf_evsel__open_strerror(pos, &opts->target,
 						  errno, msg, sizeof(msg));
-- 
GitLab


From ea1fa48c055f833eb25f0c33188feecb7002ada5 Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.ibm.com>
Date: Tue, 23 Oct 2018 17:16:16 +0200
Subject: [PATCH 0609/1890] perf stat: Handle different PMU names with common
 prefix

On s390 the CPU Measurement Facility for counters now supports
2 PMUs named cpum_cf (CPU Measurement Facility for counters) and
cpum_cf_diag (CPU Measurement Facility for diagnostic counters)
for one and the same CPU.

Running command

 [root@s35lp76 perf]# ./perf stat -e tx_c_tend \
	 -- ~/mytests/cf-tx-events 1

 Measuring transactions
 TX_C_TABORT_NO_SPECIAL: 0 expected:0
 TX_C_TABORT_SPECIAL: 0 expected:0
 TX_C_TEND: 1 expected:1
 TX_NC_TABORT: 11 expected:11
 TX_NC_TEND: 1 expected:1

 Performance counter stats for '/root/mytests/cf-tx-events 1':

  2      tx_c_tend

      0.002120091 seconds time elapsed

      0.000121000 seconds user
      0.002127000 seconds sys

 [root@s35lp76 perf]#

displays output which is unexpected (and wrong):

  2      tx_c_tend

The test program definitely triggers only one transaction, as shown
in line 'TX_C_TEND: 1 expected:1'.

This is caused by the following call sequence:

pmu_lookup() scans and installs a PMU.
+--> pmu_aliases() parses all aliases in directory
		.../<pmu-name>/events/* which are file names.
     +--> pmu_aliases_parse() Read each file in directory and create
                      an new alias entry. This is done with
          +--> perf_pmu__new_alias() and
	       +--> __perf_pmu__new_alias() which also check for
	                   identical alias names.

After pmu_aliases() returns, a complete list of event names
for this pmu has been created. Now function

pmu_add_cpu_aliases()   is called to add the events listed in the json
|                       files to the alias list of the cpu.
+--> perf_pmu__find_map()  Returns a pointer to the json events.

Now function pmu_add_cpu_aliases() scans through all events listed
in the JSON files for this CPU.
Each json event pmu name is compared with the current PMU being
built up and if they mismatch, the json event is added to the
current PMUs alias list.
To avoid duplicate entries the following comparison is done:

	if (!is_arm_pmu_core(name)) {
	     pname = pe->pmu ? pe->pmu : "cpu";
	     if (strncmp(pname, name, strlen(pname)))
		     continue;
     }

The culprit is the strncmp() function.

Using current s390 PMU naming, the first PMU is 'cpum_cf'
and a long list of events is added, among them 'tx_c_tend'

When the second PMU named 'cpum_cf_diag' is added, only one event
named 'CF_DIAG' is added by the pmu_aliases()  function.

Now function pmu_add_cpu_aliases() is invoked for PMU 'cpum_cf_diag'.
Since the CPUID string is the same for both PMUs, json file events
for PMU named 'cpum_cf' are added to the PMU 'cpm_cf_diag'

This happens because the strncmp() actually compares:

     strncmp("cpum_cf", "cpum_cf_diag", 6);

The first parameter is the pmu name taken from the event in
the json file. The second parameter is the pmu name of the PMU
currently being built.
They are different, but the length of the compare only tests the
common prefix and this returns 0(true) when it should return false.

Now all events for PMU cpum_cf are added to the alias list for pmu
cpum_cf_diag.

Later on in function parse_events_add_pmu() the event 'tx_c_end' is
searched in all available PMUs and found twice, adding it two
times to the evsel_list global variable which is the root
of all events. This results in a counter value of 2 instead
of 1.

Output with this patch:

 [root@s35lp76 perf]# ./perf stat -e tx_c_tend \
			-- ~/mytests/cf-tx-events 1
 Measuring transactions
 TX_C_TABORT_NO_SPECIAL: 0 expected:0
 TX_C_TABORT_SPECIAL: 0 expected:0
 TX_C_TEND: 1 expected:1
 TX_NC_TABORT: 11 expected:11
 TX_NC_TEND: 1 expected:1

 Performance counter stats for '/root/mytests/cf-tx-events 1':

                  1      tx_c_tend

      0.001815365 seconds time elapsed

      0.000123000 seconds user
      0.001756000 seconds sys

 [root@s35lp76 perf]#

Signed-off-by: Thomas Richter <tmricht@linux.ibm.com>
Reviewed-by: Hendrik Brueckner <brueckner@linux.ibm.com>
Reviewed-by: Sebastien Boisvert <sboisvert@gydle.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: stable@vger.kernel.org
Fixes: 292c34c10249 ("perf pmu: Fix core PMU alias list for X86 platform")
Link: http://lkml.kernel.org/r/20181023151616.78193-1-tmricht@linux.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/pmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 7799788f662fd..7e49baad304d7 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -773,7 +773,7 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu)
 
 		if (!is_arm_pmu_core(name)) {
 			pname = pe->pmu ? pe->pmu : "cpu";
-			if (strncmp(pname, name, strlen(pname)))
+			if (strcmp(pname, name))
 				continue;
 		}
 
-- 
GitLab


From 590ac60d8aa929bd21e35cd95a7d8720d00eb4f3 Mon Sep 17 00:00:00 2001
From: Jin Yao <yao.jin@linux.intel.com>
Date: Wed, 31 Oct 2018 19:06:35 +0800
Subject: [PATCH 0610/1890] perf top: Display the LBR stats in callchain entry

'perf report' has supported the displaying of LBR stats (such as cycles,
predicted%) in callchain entry.

For example:

  $ perf report --branch-history --stdio

  --1.01%--intel_idle mwait.h:29
            intel_idle cpufeature.h:164 (cycles:5)
            intel_idle cpufeature.h:164 (predicted:76.4%)
            intel_idle mwait.h:102 (cycles:41)
            intel_idle current.h:15

While 'perf top' doesn't support that.

For example:

  $ perf top -a -b --call-graph branch

  -   13.86%     0.23%  [kernel]		[k] __x86_indirect_thunk_rax
     - 13.65% __x86_indirect_thunk_rax
        + 1.69% do_syscall_64
        + 1.68% do_select
        + 1.41% ktime_get
        + 0.70% __schedule
        + 0.62% do_sys_poll
          0.58% __x86_indirect_thunk_rax

Actually it's very easy to enable this feature in 'perf top'.

With this patch, the result is:

  $ perf top -a -b --call-graph branch

  $ -   13.58%     0.00%  [kernel]		[k] __x86_indirect_thunk_rax
     $ - 13.57% __x86_indirect_thunk_rax (predicted:93.9%)
        $ + 1.78% do_select (cycles:2)
        $ + 1.68% perf_pmu_disable.part.99 (cycles:1)
        $ + 1.45% ___sys_recvmsg (cycles:25)
        $ + 0.81% unix_stream_sendmsg (cycles:18)
        $ + 0.80% ktime_get (cycles:400)
          $ 0.58% pick_next_task_fair (cycles:47)
        $ + 0.56% i915_request_retire (cycles:2)
        $ + 0.52% do_sys_poll (cycles:4)

Signed-off-by: Jin Yao <yao.jin@linux.intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1540983995-20462-1-git-send-email-yao.jin@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-top.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index b2838de13de02..aa0c73e579240 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1429,6 +1429,9 @@ int cmd_top(int argc, const char **argv)
 		}
 	}
 
+	if (opts->branch_stack && callchain_param.enabled)
+		symbol_conf.show_branchflag_count = true;
+
 	sort__mode = SORT_MODE__TOP;
 	/* display thread wants entries to be collapsed in a different tree */
 	perf_hpp_list.need_collapse = 1;
-- 
GitLab


From 5ed4419d47f8ba6bbccd8e3203276b3c39a792b7 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Sun, 4 Nov 2018 17:12:35 +0200
Subject: [PATCH 0611/1890] perf scripts python: exported-sql-viewer.py: Fall
 back to /usr/local/lib/libxed.so

Fall back to /usr/local/lib/libxed.so to cater for distributions that do
not have /usr/local/lib in the library path by default.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/20181104151238.15947-2-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/scripts/python/exported-sql-viewer.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py
index 24cb0bd56afa5..20cc8e7879b97 100755
--- a/tools/perf/scripts/python/exported-sql-viewer.py
+++ b/tools/perf/scripts/python/exported-sql-viewer.py
@@ -1929,7 +1929,12 @@ class XEDInstruction():
 class LibXED():
 
 	def __init__(self):
-		self.libxed = CDLL("libxed.so")
+		try:
+			self.libxed = CDLL("libxed.so")
+		except:
+			self.libxed = None
+		if not self.libxed:
+			self.libxed = CDLL("/usr/local/lib/libxed.so")
 
 		self.xed_tables_init = self.libxed.xed_tables_init
 		self.xed_tables_init.restype = None
-- 
GitLab


From 210cf1f96185f0c6383df8b6030e3d2945e1b41a Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Sun, 4 Nov 2018 17:12:36 +0200
Subject: [PATCH 0612/1890] perf scripts python: exported-sql-viewer.py: Add
 Selected branches report

Fetching data from the database can be slow. Add a report that provides
the ability to select a subset of branches.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/20181104151238.15947-3-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../scripts/python/exported-sql-viewer.py     | 327 ++++++++++++++++++
 1 file changed, 327 insertions(+)

diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py
index 20cc8e7879b97..a9d2b3170141f 100755
--- a/tools/perf/scripts/python/exported-sql-viewer.py
+++ b/tools/perf/scripts/python/exported-sql-viewer.py
@@ -119,6 +119,14 @@ def dsoname(name):
 		return "[kernel]"
 	return name
 
+def findnth(s, sub, n, offs=0):
+	pos = s.find(sub)
+	if pos < 0:
+		return pos
+	if n <= 1:
+		return offs + pos
+	return findnth(s[pos + 1:], sub, n - 1, offs + pos + 1)
+
 # Percent to one decimal place
 
 def PercentToOneDP(n, d):
@@ -1464,6 +1472,317 @@ class BranchWindow(QMdiSubWindow):
 		else:
 			self.find_bar.NotFound()
 
+# Dialog data item converted and validated using a SQL table
+
+class SQLTableDialogDataItem():
+
+	def __init__(self, glb, label, placeholder_text, table_name, match_column, column_name1, column_name2, parent):
+		self.glb = glb
+		self.label = label
+		self.placeholder_text = placeholder_text
+		self.table_name = table_name
+		self.match_column = match_column
+		self.column_name1 = column_name1
+		self.column_name2 = column_name2
+		self.parent = parent
+
+		self.value = ""
+
+		self.widget = QLineEdit()
+		self.widget.editingFinished.connect(self.Validate)
+		self.widget.textChanged.connect(self.Invalidate)
+		self.red = False
+		self.error = ""
+		self.validated = True
+
+		self.last_id = 0
+		self.first_time = 0
+		self.last_time = 2 ** 64
+		if self.table_name == "<timeranges>":
+			query = QSqlQuery(self.glb.db)
+			QueryExec(query, "SELECT id, time FROM samples ORDER BY id DESC LIMIT 1")
+			if query.next():
+				self.last_id = int(query.value(0))
+				self.last_time = int(query.value(1))
+			QueryExec(query, "SELECT time FROM samples WHERE time != 0 ORDER BY id LIMIT 1")
+			if query.next():
+				self.first_time = int(query.value(0))
+			if placeholder_text:
+				placeholder_text += ", between " + str(self.first_time) + " and " + str(self.last_time)
+
+		if placeholder_text:
+			self.widget.setPlaceholderText(placeholder_text)
+
+	def ValueToIds(self, value):
+		ids = []
+		query = QSqlQuery(self.glb.db)
+		stmt = "SELECT id FROM " + self.table_name + " WHERE " + self.match_column + " = '" + value + "'"
+		ret = query.exec_(stmt)
+		if ret:
+			while query.next():
+				ids.append(str(query.value(0)))
+		return ids
+
+	def IdBetween(self, query, lower_id, higher_id, order):
+		QueryExec(query, "SELECT id FROM samples WHERE id > " + str(lower_id) + " AND id < " + str(higher_id) + " ORDER BY id " + order + " LIMIT 1")
+		if query.next():
+			return True, int(query.value(0))
+		else:
+			return False, 0
+
+	def BinarySearchTime(self, lower_id, higher_id, target_time, get_floor):
+		query = QSqlQuery(self.glb.db)
+		while True:
+			next_id = int((lower_id + higher_id) / 2)
+			QueryExec(query, "SELECT time FROM samples WHERE id = " + str(next_id))
+			if not query.next():
+				ok, dbid = self.IdBetween(query, lower_id, next_id, "DESC")
+				if not ok:
+					ok, dbid = self.IdBetween(query, next_id, higher_id, "")
+					if not ok:
+						return str(higher_id)
+				next_id = dbid
+				QueryExec(query, "SELECT time FROM samples WHERE id = " + str(next_id))
+			next_time = int(query.value(0))
+			if get_floor:
+				if target_time > next_time:
+					lower_id = next_id
+				else:
+					higher_id = next_id
+				if higher_id <= lower_id + 1:
+					return str(higher_id)
+			else:
+				if target_time >= next_time:
+					lower_id = next_id
+				else:
+					higher_id = next_id
+				if higher_id <= lower_id + 1:
+					return str(lower_id)
+
+	def ConvertRelativeTime(self, val):
+		print "val ", val
+		mult = 1
+		suffix = val[-2:]
+		if suffix == "ms":
+			mult = 1000000
+		elif suffix == "us":
+			mult = 1000
+		elif suffix == "ns":
+			mult = 1
+		else:
+			return val
+		val = val[:-2].strip()
+		if not self.IsNumber(val):
+			return val
+		val = int(val) * mult
+		if val >= 0:
+			val += self.first_time
+		else:
+			val += self.last_time
+		return str(val)
+
+	def ConvertTimeRange(self, vrange):
+		print "vrange ", vrange
+		if vrange[0] == "":
+			vrange[0] = str(self.first_time)
+		if vrange[1] == "":
+			vrange[1] = str(self.last_time)
+		vrange[0] = self.ConvertRelativeTime(vrange[0])
+		vrange[1] = self.ConvertRelativeTime(vrange[1])
+		print "vrange2 ", vrange
+		if not self.IsNumber(vrange[0]) or not self.IsNumber(vrange[1]):
+			return False
+		print "ok1"
+		beg_range = max(int(vrange[0]), self.first_time)
+		end_range = min(int(vrange[1]), self.last_time)
+		if beg_range > self.last_time or end_range < self.first_time:
+			return False
+		print "ok2"
+		vrange[0] = self.BinarySearchTime(0, self.last_id, beg_range, True)
+		vrange[1] = self.BinarySearchTime(1, self.last_id + 1, end_range, False)
+		print "vrange3 ", vrange
+		return True
+
+	def AddTimeRange(self, value, ranges):
+		print "value ", value
+		n = value.count("-")
+		if n == 1:
+			pass
+		elif n == 2:
+			if value.split("-")[1].strip() == "":
+				n = 1
+		elif n == 3:
+			n = 2
+		else:
+			return False
+		pos = findnth(value, "-", n)
+		vrange = [value[:pos].strip() ,value[pos+1:].strip()]
+		if self.ConvertTimeRange(vrange):
+			ranges.append(vrange)
+			return True
+		return False
+
+	def InvalidValue(self, value):
+		self.value = ""
+		palette = QPalette()
+		palette.setColor(QPalette.Text,Qt.red)
+		self.widget.setPalette(palette)
+		self.red = True
+		self.error = self.label + " invalid value '" + value + "'"
+		self.parent.ShowMessage(self.error)
+
+	def IsNumber(self, value):
+		try:
+			x = int(value)
+		except:
+			x = 0
+		return str(x) == value
+
+	def Invalidate(self):
+		self.validated = False
+
+	def Validate(self):
+		input_string = self.widget.text()
+		self.validated = True
+		if self.red:
+			palette = QPalette()
+			self.widget.setPalette(palette)
+			self.red = False
+		if not len(input_string.strip()):
+			self.error = ""
+			self.value = ""
+			return
+		if self.table_name == "<timeranges>":
+			ranges = []
+			for value in [x.strip() for x in input_string.split(",")]:
+				if not self.AddTimeRange(value, ranges):
+					return self.InvalidValue(value)
+			ranges = [("(" + self.column_name1 + " >= " + r[0] + " AND " + self.column_name1 + " <= " + r[1] + ")") for r in ranges]
+			self.value = " OR ".join(ranges)
+		elif self.table_name == "<ranges>":
+			singles = []
+			ranges = []
+			for value in [x.strip() for x in input_string.split(",")]:
+				if "-" in value:
+					vrange = value.split("-")
+					if len(vrange) != 2 or not self.IsNumber(vrange[0]) or not self.IsNumber(vrange[1]):
+						return self.InvalidValue(value)
+					ranges.append(vrange)
+				else:
+					if not self.IsNumber(value):
+						return self.InvalidValue(value)
+					singles.append(value)
+			ranges = [("(" + self.column_name1 + " >= " + r[0] + " AND " + self.column_name1 + " <= " + r[1] + ")") for r in ranges]
+			if len(singles):
+				ranges.append(self.column_name1 + " IN (" + ",".join(singles) + ")")
+			self.value = " OR ".join(ranges)
+		elif self.table_name:
+			all_ids = []
+			for value in [x.strip() for x in input_string.split(",")]:
+				ids = self.ValueToIds(value)
+				if len(ids):
+					all_ids.extend(ids)
+				else:
+					return self.InvalidValue(value)
+			self.value = self.column_name1 + " IN (" + ",".join(all_ids) + ")"
+			if self.column_name2:
+				self.value = "( " + self.value + " OR " + self.column_name2 + " IN (" + ",".join(all_ids) + ") )"
+		else:
+			self.value = input_string.strip()
+		self.error = ""
+		self.parent.ClearMessage()
+
+	def IsValid(self):
+		if not self.validated:
+			self.Validate()
+		if len(self.error):
+			self.parent.ShowMessage(self.error)
+			return False
+		return True
+
+# Selected branch report creation dialog
+
+class SelectedBranchDialog(QDialog):
+
+	def __init__(self, glb, parent=None):
+		super(SelectedBranchDialog, self).__init__(parent)
+
+		self.glb = glb
+
+		self.name = ""
+		self.where_clause = ""
+
+		self.setWindowTitle("Selected Branches")
+		self.setMinimumWidth(600)
+
+		items = (
+			("Report name:", "Enter a name to appear in the window title bar", "", "", "", ""),
+			("Time ranges:", "Enter time ranges", "<timeranges>", "", "samples.id", ""),
+			("CPUs:", "Enter CPUs or ranges e.g. 0,5-6", "<ranges>", "", "cpu", ""),
+			("Commands:", "Only branches with these commands will be included", "comms", "comm", "comm_id", ""),
+			("PIDs:", "Only branches with these process IDs will be included", "threads", "pid", "thread_id", ""),
+			("TIDs:", "Only branches with these thread IDs will be included", "threads", "tid", "thread_id", ""),
+			("DSOs:", "Only branches with these DSOs will be included", "dsos", "short_name", "samples.dso_id", "to_dso_id"),
+			("Symbols:", "Only branches with these symbols will be included", "symbols", "name", "symbol_id", "to_symbol_id"),
+			("Raw SQL clause: ", "Enter a raw SQL WHERE clause", "", "", "", ""),
+			)
+		self.data_items = [SQLTableDialogDataItem(glb, *x, parent=self) for x in items]
+
+		self.grid = QGridLayout()
+
+		for row in xrange(len(self.data_items)):
+			self.grid.addWidget(QLabel(self.data_items[row].label), row, 0)
+			self.grid.addWidget(self.data_items[row].widget, row, 1)
+
+		self.status = QLabel()
+
+		self.ok_button = QPushButton("Ok", self)
+		self.ok_button.setDefault(True)
+		self.ok_button.released.connect(self.Ok)
+		self.ok_button.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed)
+
+		self.cancel_button = QPushButton("Cancel", self)
+		self.cancel_button.released.connect(self.reject)
+		self.cancel_button.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed)
+
+		self.hbox = QHBoxLayout()
+		#self.hbox.addStretch()
+		self.hbox.addWidget(self.status)
+		self.hbox.addWidget(self.ok_button)
+		self.hbox.addWidget(self.cancel_button)
+
+		self.vbox = QVBoxLayout()
+		self.vbox.addLayout(self.grid)
+		self.vbox.addLayout(self.hbox)
+
+		self.setLayout(self.vbox);
+
+	def Ok(self):
+		self.name = self.data_items[0].value
+		if not self.name:
+			self.ShowMessage("Report name is required")
+			return
+		for d in self.data_items:
+			if not d.IsValid():
+				return
+		for d in self.data_items[1:]:
+			if len(d.value):
+				if len(self.where_clause):
+					self.where_clause += " AND "
+				self.where_clause += d.value
+		if len(self.where_clause):
+			self.where_clause = " AND ( " + self.where_clause + " ) "
+		else:
+			self.ShowMessage("No selection")
+			return
+		self.accept()
+
+	def ShowMessage(self, msg):
+		self.status.setText("<font color=#FF0000>" + msg)
+
+	def ClearMessage(self):
+		self.status.setText("")
+
 # Event list
 
 def GetEventList(db):
@@ -1888,6 +2207,8 @@ class MainWindow(QMainWindow):
 			if event == "branches":
 				label = "All branches" if branches_events == 1 else "All branches " + "(id=" + dbid + ")"
 				reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda x=dbid: self.NewBranchView(x), self))
+				label = "Selected branches" if branches_events == 1 else "Selected branches " + "(id=" + dbid + ")"
+				reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda x=dbid: self.NewSelectedBranchView(x), self))
 
 	def TableMenu(self, tables, menu):
 		table_menu = menu.addMenu("&Tables")
@@ -1900,6 +2221,12 @@ class MainWindow(QMainWindow):
 	def NewBranchView(self, event_id):
 		BranchWindow(self.glb, event_id, "", "", self)
 
+	def NewSelectedBranchView(self, event_id):
+		dialog = SelectedBranchDialog(self.glb, self)
+		ret = dialog.exec_()
+		if ret:
+			BranchWindow(self.glb, event_id, dialog.name, dialog.where_clause, self)
+
 	def NewTableView(self, table_name):
 		TableWindow(self.glb, table_name, self)
 
-- 
GitLab


From 65b24292e8f34df22a16bf7c47823325ac247572 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Sun, 4 Nov 2018 17:12:37 +0200
Subject: [PATCH 0613/1890] perf scripts python: exported-sql-viewer.py: Add
 help window

Add a window to display help. It is also possible to display the help
only, by using the option "--help-only" instead of a database name.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/20181104151238.15947-4-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../scripts/python/exported-sql-viewer.py     | 155 +++++++++++++++++-
 1 file changed, 154 insertions(+), 1 deletion(-)

diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py
index a9d2b3170141f..c2fcf6c5237ab 100755
--- a/tools/perf/scripts/python/exported-sql-viewer.py
+++ b/tools/perf/scripts/python/exported-sql-viewer.py
@@ -2084,6 +2084,147 @@ class WindowMenu():
 	def setActiveSubWindow(self, nr):
 		self.mdi_area.setActiveSubWindow(self.mdi_area.subWindowList()[nr - 1])
 
+# Help text
+
+glb_help_text = """
+<h1>Contents</h1>
+<style>
+p.c1 {
+    text-indent: 40px;
+}
+p.c2 {
+    text-indent: 80px;
+}
+}
+</style>
+<p class=c1><a href=#reports>1. Reports</a></p>
+<p class=c2><a href=#callgraph>1.1 Context-Sensitive Call Graph</a></p>
+<p class=c2><a href=#allbranches>1.2 All branches</a></p>
+<p class=c2><a href=#selectedbranches>1.3 Selected branches</a></p>
+<p class=c1><a href=#tables>2. Tables</a></p>
+<h1 id=reports>1. Reports</h1>
+<h2 id=callgraph>1.1 Context-Sensitive Call Graph</h2>
+The result is a GUI window with a tree representing a context-sensitive
+call-graph. Expanding a couple of levels of the tree and adjusting column
+widths to suit will display something like:
+<pre>
+                                         Call Graph: pt_example
+Call Path                          Object      Count   Time(ns)  Time(%)  Branch Count   Branch Count(%)
+v- ls
+    v- 2638:2638
+        v- _start                  ld-2.19.so    1     10074071   100.0         211135            100.0
+          |- unknown               unknown       1        13198     0.1              1              0.0
+          >- _dl_start             ld-2.19.so    1      1400980    13.9          19637              9.3
+          >- _d_linit_internal     ld-2.19.so    1       448152     4.4          11094              5.3
+          v-__libc_start_main@plt  ls            1      8211741    81.5         180397             85.4
+             >- _dl_fixup          ld-2.19.so    1         7607     0.1            108              0.1
+             >- __cxa_atexit       libc-2.19.so  1        11737     0.1             10              0.0
+             >- __libc_csu_init    ls            1        10354     0.1             10              0.0
+             |- _setjmp            libc-2.19.so  1            0     0.0              4              0.0
+             v- main               ls            1      8182043    99.6         180254             99.9
+</pre>
+<h3>Points to note:</h3>
+<ul>
+<li>The top level is a command name (comm)</li>
+<li>The next level is a thread (pid:tid)</li>
+<li>Subsequent levels are functions</li>
+<li>'Count' is the number of calls</li>
+<li>'Time' is the elapsed time until the function returns</li>
+<li>Percentages are relative to the level above</li>
+<li>'Branch Count' is the total number of branches for that function and all functions that it calls
+</ul>
+<h3>Find</h3>
+Ctrl-F displays a Find bar which finds function names by either an exact match or a pattern match.
+The pattern matching symbols are ? for any character and * for zero or more characters.
+<h2 id=allbranches>1.2 All branches</h2>
+The All branches report displays all branches in chronological order.
+Not all data is fetched immediately. More records can be fetched using the Fetch bar provided.
+<h3>Disassembly</h3>
+Open a branch to display disassembly. This only works if:
+<ol>
+<li>The disassembler is available. Currently, only Intel XED is supported - see <a href=#xed>Intel XED Setup</a></li>
+<li>The object code is available. Currently, only the perf build ID cache is searched for object code.
+The default directory ~/.debug can be overridden by setting environment variable PERF_BUILDID_DIR.
+One exception is kcore where the DSO long name is used (refer dsos_view on the Tables menu),
+or alternatively, set environment variable PERF_KCORE to the kcore file name.</li>
+</ol>
+<h4 id=xed>Intel XED Setup</h4>
+To use Intel XED, libxed.so must be present.  To build and install libxed.so:
+<pre>
+git clone https://github.com/intelxed/mbuild.git mbuild
+git clone https://github.com/intelxed/xed
+cd xed
+./mfile.py --share
+sudo ./mfile.py --prefix=/usr/local install
+sudo ldconfig
+</pre>
+<h3>Find</h3>
+Ctrl-F displays a Find bar which finds substrings by either an exact match or a regular expression match.
+Refer to Python documentation for the regular expression syntax.
+All columns are searched, but only currently fetched rows are searched.
+<h2 id=selectedbranches>1.3 Selected branches</h2>
+This is the same as the <a href=#allbranches>All branches</a> report but with the data reduced
+by various selection criteria. A dialog box displays available criteria which are AND'ed together.
+<h3>1.3.1 Time ranges</h3>
+The time ranges hint text shows the total time range. Relative time ranges can also be entered in
+ms, us or ns. Also, negative values are relative to the end of trace.  Examples:
+<pre>
+	81073085947329-81073085958238	From 81073085947329 to 81073085958238
+	100us-200us		From 100us to 200us
+	10ms-			From 10ms to the end
+	-100ns			The first 100ns
+	-10ms-			The last 10ms
+</pre>
+N.B. Due to the granularity of timestamps, there could be no branches in any given time range.
+<h1 id=tables>2. Tables</h1>
+The Tables menu shows all tables and views in the database. Most tables have an associated view
+which displays the information in a more friendly way. Not all data for large tables is fetched
+immediately. More records can be fetched using the Fetch bar provided. Columns can be sorted,
+but that can be slow for large tables.
+<p>There are also tables of database meta-information.
+For SQLite3 databases, the sqlite_master table is included.
+For PostgreSQL databases, information_schema.tables/views/columns are included.
+<h3>Find</h3>
+Ctrl-F displays a Find bar which finds substrings by either an exact match or a regular expression match.
+Refer to Python documentation for the regular expression syntax.
+All columns are searched, but only currently fetched rows are searched.
+"""
+
+# Help window
+
+class HelpWindow(QMdiSubWindow):
+
+	def __init__(self, glb, parent=None):
+		super(HelpWindow, self).__init__(parent)
+
+		self.text = QTextBrowser()
+		self.text.setHtml(glb_help_text)
+		self.text.setReadOnly(True)
+		self.text.setOpenExternalLinks(True)
+
+		self.setWidget(self.text)
+
+		AddSubWindow(glb.mainwindow.mdi_area, self, "Exported SQL Viewer Help")
+
+# Main window that only displays the help text
+
+class HelpOnlyWindow(QMainWindow):
+
+	def __init__(self, parent=None):
+		super(HelpOnlyWindow, self).__init__(parent)
+
+		self.setMinimumSize(200, 100)
+		self.resize(800, 600)
+		self.setWindowTitle("Exported SQL Viewer Help")
+		self.setWindowIcon(self.style().standardIcon(QStyle.SP_MessageBoxInformation))
+
+		self.text = QTextBrowser()
+		self.text.setHtml(glb_help_text)
+		self.text.setReadOnly(True)
+		self.text.setOpenExternalLinks(True)
+
+		self.setCentralWidget(self.text)
+
 # Font resize
 
 def ResizeFont(widget, diff):
@@ -2170,6 +2311,9 @@ class MainWindow(QMainWindow):
 
 		self.window_menu = WindowMenu(self.mdi_area, menu)
 
+		help_menu = menu.addMenu("&Help")
+		help_menu.addAction(CreateAction("&Exported SQL Viewer Help", "Helpful information", self.Help, self, QKeySequence.HelpContents))
+
 	def Find(self):
 		win = self.mdi_area.activeSubWindow()
 		if win:
@@ -2230,6 +2374,9 @@ class MainWindow(QMainWindow):
 	def NewTableView(self, table_name):
 		TableWindow(self.glb, table_name, self)
 
+	def Help(self):
+		HelpWindow(self.glb, self)
+
 # XED Disassembler
 
 class xed_state_t(Structure):
@@ -2429,10 +2576,16 @@ class DBRef():
 
 def Main():
 	if (len(sys.argv) < 2):
-		print >> sys.stderr, "Usage is: exported-sql-viewer.py <database name>"
+		print >> sys.stderr, "Usage is: exported-sql-viewer.py {<database name> | --help-only}"
 		raise Exception("Too few arguments")
 
 	dbname = sys.argv[1]
+	if dbname == "--help-only":
+		app = QApplication(sys.argv)
+		mainwindow = HelpOnlyWindow()
+		mainwindow.show()
+		err = app.exec_()
+		sys.exit(err)
 
 	is_sqlite3 = False
 	try:
-- 
GitLab


From 35fa1cee21e34f43db928d022610707d5a234faf Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Sun, 4 Nov 2018 17:12:38 +0200
Subject: [PATCH 0614/1890] perf scripts python: exported-sql-viewer.py: Fix
 table find when table re-ordered

Table rows can be re-ordered by selecting a column to sort by. After
re-ordering, the "find" operation was highlighting the wrong row, fix
it.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/20181104151238.15947-5-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/scripts/python/exported-sql-viewer.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py
index c2fcf6c5237ab..f278ce5ebab76 100755
--- a/tools/perf/scripts/python/exported-sql-viewer.py
+++ b/tools/perf/scripts/python/exported-sql-viewer.py
@@ -1975,7 +1975,7 @@ class TableWindow(QMdiSubWindow, ResizeColumnsToContentsBase):
 	def FindDone(self, row):
 		self.find_bar.Idle()
 		if row >= 0:
-			self.view.setCurrentIndex(self.model.index(row, 0, QModelIndex()))
+			self.view.setCurrentIndex(self.model.mapFromSource(self.data_model.index(row, 0, QModelIndex())))
 		else:
 			self.find_bar.NotFound()
 
@@ -2188,6 +2188,8 @@ For PostgreSQL databases, information_schema.tables/views/columns are included.
 Ctrl-F displays a Find bar which finds substrings by either an exact match or a regular expression match.
 Refer to Python documentation for the regular expression syntax.
 All columns are searched, but only currently fetched rows are searched.
+<p>N.B. Results are found in id order, so if the table is re-ordered, find-next and find-previous
+will go to the next/previous result in id order, instead of display order.
 """
 
 # Help window
-- 
GitLab


From 93f8be2799515e01647c5a9b0d17a90a00ebcf82 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Mon, 5 Nov 2018 09:35:04 +0200
Subject: [PATCH 0615/1890] perf intel-pt: Add more event information to debug
 log

More event information is useful for debugging, especially MMAP events.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Link: http://lkml.kernel.org/r/20181105073505.8129-2-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/intel-pt-decoder/intel-pt-log.c |  5 +++++
 tools/perf/util/intel-pt-decoder/intel-pt-log.h |  1 +
 tools/perf/util/intel-pt.c                      | 16 +++++++++++++---
 3 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.c b/tools/perf/util/intel-pt-decoder/intel-pt-log.c
index e02bc7b166a0e..5e64da270f976 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-log.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.c
@@ -31,6 +31,11 @@ static FILE *f;
 static char log_name[MAX_LOG_NAME];
 bool intel_pt_enable_logging;
 
+void *intel_pt_log_fp(void)
+{
+	return f;
+}
+
 void intel_pt_log_enable(void)
 {
 	intel_pt_enable_logging = true;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.h b/tools/perf/util/intel-pt-decoder/intel-pt-log.h
index 45b64f93f3588..cc084937f701a 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-log.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.h
@@ -22,6 +22,7 @@
 
 struct intel_pt_pkt;
 
+void *intel_pt_log_fp(void);
 void intel_pt_log_enable(void);
 void intel_pt_log_disable(void);
 void intel_pt_log_set_name(const char *name);
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 86cc9a64e9827..149ff361ca789 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -206,6 +206,16 @@ static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf,
 	intel_pt_dump(pt, buf, len);
 }
 
+static void intel_pt_log_event(union perf_event *event)
+{
+	FILE *f = intel_pt_log_fp();
+
+	if (!intel_pt_enable_logging || !f)
+		return;
+
+	perf_event__fprintf(event, f);
+}
+
 static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a,
 				   struct auxtrace_buffer *b)
 {
@@ -2010,9 +2020,9 @@ static int intel_pt_process_event(struct perf_session *session,
 		 event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
 		err = intel_pt_context_switch(pt, event, sample);
 
-	intel_pt_log("event %s (%u): cpu %d time %"PRIu64" tsc %#"PRIx64"\n",
-		     perf_event__name(event->header.type), event->header.type,
-		     sample->cpu, sample->time, timestamp);
+	intel_pt_log("event %u: cpu %d time %"PRIu64" tsc %#"PRIx64" ",
+		     event->header.type, sample->cpu, sample->time, timestamp);
+	intel_pt_log_event(event);
 
 	return err;
 }
-- 
GitLab


From f6c23e3b55cb93f32a724f41af8d38888bc2ab6b Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Mon, 5 Nov 2018 09:35:05 +0200
Subject: [PATCH 0616/1890] perf intel-pt: Add MTC and CYC timestamps to debug
 log

One cause of decoding errors is un-synchronized side-band data.
Timestamps are needed to debug such cases. TSC packet timestamps are
logged. Log also MTC and CYC timestamps.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Link: http://lkml.kernel.org/r/20181105073505.8129-3-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index 58f6a9ceb5909..4503f3ca45ab4 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -1474,6 +1474,8 @@ static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder)
 		decoder->have_calc_cyc_to_tsc = false;
 		intel_pt_calc_cyc_to_tsc(decoder, true);
 	}
+
+	intel_pt_log_to("Setting timestamp", decoder->timestamp);
 }
 
 static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder)
@@ -1514,6 +1516,8 @@ static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder)
 		decoder->timestamp = timestamp;
 
 	decoder->timestamp_insn_cnt = 0;
+
+	intel_pt_log_to("Setting timestamp", decoder->timestamp);
 }
 
 /* Walk PSB+ packets when already in sync. */
-- 
GitLab


From c0fae7e2452b90c31edd2d25eb3baf0c76b400ca Mon Sep 17 00:00:00 2001
From: Aaro Koskinen <aaro.koskinen@iki.fi>
Date: Sat, 27 Oct 2018 01:46:34 +0300
Subject: [PATCH 0617/1890] MIPS: OCTEON: fix out of bounds array access on
 CN68XX

The maximum number of interfaces is returned by
cvmx_helper_get_number_of_interfaces(), and the value is used to access
interface_port_count[]. When CN68XX support was added, we forgot
to increase the array size. Fix that.

Fixes: 2c8c3f0201333 ("MIPS: Octeon: Support additional interfaces on CN68XX")
Signed-off-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Patchwork: https://patchwork.linux-mips.org/patch/20949/
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Cc: stable@vger.kernel.org # v4.3+
---
 arch/mips/cavium-octeon/executive/cvmx-helper.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/cavium-octeon/executive/cvmx-helper.c b/arch/mips/cavium-octeon/executive/cvmx-helper.c
index 75108ec669ebc..6c79e8a16a268 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-helper.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-helper.c
@@ -67,7 +67,7 @@ void (*cvmx_override_pko_queue_priority) (int pko_port,
 void (*cvmx_override_ipd_port_setup) (int ipd_port);
 
 /* Port count per interface */
-static int interface_port_count[5];
+static int interface_port_count[9];
 
 /**
  * Return the number of interfaces the chip has. Each interface
-- 
GitLab


From d01501f85249848a2497968d46dd46d5c6fe32e6 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@linux-mips.org>
Date: Thu, 1 Nov 2018 07:54:24 +0000
Subject: [PATCH 0618/1890] MIPS: Fix `dma_alloc_coherent' returning a
 non-coherent allocation

Fix a MIPS `dma_alloc_coherent' regression from commit bc3ec75de545
("dma-mapping: merge direct and noncoherent ops") that causes a cached
allocation to be returned on noncoherent cache systems.

This is due to an inverted check now used in the MIPS implementation of
`arch_dma_alloc' on the result from `dma_direct_alloc_pages' before
doing the cached-to-uncached mapping of the allocation address obtained.
The mapping has to be done for a non-NULL rather than NULL result,
because a NULL result means the allocation has failed.

Invert the check for correct operation then.

Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Fixes: bc3ec75de545 ("dma-mapping: merge direct and noncoherent ops")
Patchwork: https://patchwork.linux-mips.org/patch/20965/
---
 arch/mips/mm/dma-noncoherent.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c
index e6c9485cadcff..cb38461391cb7 100644
--- a/arch/mips/mm/dma-noncoherent.c
+++ b/arch/mips/mm/dma-noncoherent.c
@@ -50,7 +50,7 @@ void *arch_dma_alloc(struct device *dev, size_t size,
 	void *ret;
 
 	ret = dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs);
-	if (!ret && !(attrs & DMA_ATTR_NON_CONSISTENT)) {
+	if (ret && !(attrs & DMA_ATTR_NON_CONSISTENT)) {
 		dma_cache_wback_inv((unsigned long) ret, size);
 		ret = (void *)UNCAC_ADDR(ret);
 	}
-- 
GitLab


From 7900f06c3feedbe74d79f28a0d062dbf11fd10bf Mon Sep 17 00:00:00 2001
From: Jon Mason <jon.mason@broadcom.com>
Date: Wed, 19 Sep 2018 11:31:40 -0400
Subject: [PATCH 0619/1890] MAINTAINERS: Remove self from Broadcom SoCs

I'm leaving Broadcom, and will no longer have access to hardware and
documentation necessary to be effective in a maintainership role.

Signed-off-by: Jon Mason <jon.mason@broadcom.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
 MAINTAINERS | 2 --
 1 file changed, 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index f4855974f3250..f3ea3287195d6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2869,7 +2869,6 @@ F:	arch/mips/include/asm/mach-bcm47xx/*
 BROADCOM BCM5301X ARM ARCHITECTURE
 M:	Hauke Mehrtens <hauke@hauke-m.de>
 M:	RafaÅ‚ MiÅ‚ecki <zajec5@gmail.com>
-M:	Jon Mason <jonmason@broadcom.com>
 M:	bcm-kernel-feedback-list@broadcom.com
 L:	linux-arm-kernel@lists.infradead.org
 S:	Maintained
@@ -3015,7 +3014,6 @@ F:	drivers/net/ethernet/broadcom/genet/
 BROADCOM IPROC ARM ARCHITECTURE
 M:	Ray Jui <rjui@broadcom.com>
 M:	Scott Branden <sbranden@broadcom.com>
-M:	Jon Mason <jonmason@broadcom.com>
 M:	bcm-kernel-feedback-list@broadcom.com
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 T:	git git://github.com/broadcom/cygnus-linux.git
-- 
GitLab


From e2c39f36c354a06c6e9d32d4fdf8660b41803d82 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 5 Nov 2018 15:46:51 -0300
Subject: [PATCH 0620/1890] perf beauty: Use SRCARCH, ARCH=x86_64 must map to
 "x86" to find the headers

Guenter reported that using ARCH=x86_64 to build perf has regressed:

  $ make -C tools/perf O=/tmp/build/perf ARCH=x86_64
  make: Entering directory '/home/acme/git/perf/tools/perf'
    BUILD:   Doing 'make -j4' parallel build
    HOSTCC   /tmp/build/perf/fixdep.o
    HOSTLD   /tmp/build/perf/fixdep-in.o
    LINK     /tmp/build/perf/fixdep

  Auto-detecting system features:
  ...                         dwarf: [ on  ]
  <SNIP>
  ...                           bpf: [ on  ]

    GEN      /tmp/build/perf/common-cmds.h
  make[2]: *** No rule to make target '/home/acme/git/perf/tools/arch/x86_64/include/uapi/asm//mman.h', needed by '/tmp/build/perf/trace/beauty/generated/mmap_flags_array.c'.  Stop.
  make[2]: *** Waiting for unfinished jobs....
    PERF_VERSION = 4.19.gf6c23e3
  make[1]: *** [Makefile.perf:207: sub-make] Error 2
  make: *** [Makefile:70: all] Error 2
  make: Leaving directory '/home/acme/git/perf/tools/perf'
  $

This is because we must use $(SRCARCH) where we were using $(ARCH), so
that, just like the top level Makefile, we get this done:

  # Additional ARCH settings for x86
  ifeq ($(ARCH),i386)
          SRCARCH := x86
  endif
  ifeq ($(ARCH),x86_64)
          SRCARCH := x86
  endif

Which is done in tools/scripts/Makefile.arch, so switch to use
$(SRCARCH).

Reported-by: Guenter Roeck <linux@roeck-us.net>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Clark Williams <williams@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Fixes: fbd7458db757 ("perf beauty: Wire up the mmap flags table generator to the Makefile")
Link: https://lkml.kernel.org/r/20181105184612.GD7077@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Makefile.perf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 3ccb4f0bf0883..d95655489f7e1 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -387,7 +387,7 @@ SHELL = $(SHELL_PATH)
 
 linux_uapi_dir := $(srctree)/tools/include/uapi/linux
 asm_generic_uapi_dir := $(srctree)/tools/include/uapi/asm-generic
-arch_asm_uapi_dir := $(srctree)/tools/arch/$(ARCH)/include/uapi/asm/
+arch_asm_uapi_dir := $(srctree)/tools/arch/$(SRCARCH)/include/uapi/asm/
 
 beauty_outdir := $(OUTPUT)trace/beauty/generated
 beauty_ioctl_outdir := $(beauty_outdir)/ioctl
-- 
GitLab


From fd5ba6ee3187617287fb9cb187e3d6b3631210a3 Mon Sep 17 00:00:00 2001
From: Aaro Koskinen <aaro.koskinen@nokia.com>
Date: Fri, 2 Nov 2018 21:10:48 +0200
Subject: [PATCH 0621/1890] arm64: dts: stratix10: fix multicast filtering

On Stratix 10, the EMAC has 256 hash buckets for multicast filtering. This
needs to be specified in DTS, otherwise the stmmac driver defaults to 64
buckets and initializes the filter incorrectly. As a result, e.g. valid
IPv6 multicast traffic ends up being dropped.

Fixes: 78cd6a9d8e15 ("arm64: dts: Add base stratix 10 dtsi")
Cc: stable@vger.kernel.org
Signed-off-by: Aaro Koskinen <aaro.koskinen@nokia.com>
Signed-off-by: Dinh Nguyen <dinguyen@kernel.org>
---
 arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
index 8253a1a9e9857..fef7351e9f677 100644
--- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
+++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
@@ -139,6 +139,7 @@ gmac0: ethernet@ff800000 {
 			clock-names = "stmmaceth";
 			tx-fifo-depth = <16384>;
 			rx-fifo-depth = <16384>;
+			snps,multicast-filter-bins = <256>;
 			status = "disabled";
 		};
 
@@ -154,6 +155,7 @@ gmac1: ethernet@ff802000 {
 			clock-names = "stmmaceth";
 			tx-fifo-depth = <16384>;
 			rx-fifo-depth = <16384>;
+			snps,multicast-filter-bins = <256>;
 			status = "disabled";
 		};
 
@@ -169,6 +171,7 @@ gmac2: ethernet@ff804000 {
 			clock-names = "stmmaceth";
 			tx-fifo-depth = <16384>;
 			rx-fifo-depth = <16384>;
+			snps,multicast-filter-bins = <256>;
 			status = "disabled";
 		};
 
-- 
GitLab


From 6ac2226229d931153331a93d90655a3de05b9290 Mon Sep 17 00:00:00 2001
From: Gustavo Romero <gromero@linux.vnet.ibm.com>
Date: Thu, 1 Nov 2018 20:13:21 -0400
Subject: [PATCH 0622/1890] perf tools: Fix undefined symbol scnprintf in
 libperf-jvmti.so

Currently jvmti agent can not be used because function scnprintf is not
present in the agent libperf-jvmti.so. As a result the JVM when using
such agent to record JITed code profiling information will fail on
looking up scnprintf:

  java: symbol lookup error: lib/libperf-jvmti.so: undefined symbol: scnprintf

This commit fixes that by reverting to the use of snprintf, that can be
looked up, instead of scnprintf, adding a proper check for the returned
value in order to print a better error message when the jitdump file
pathname is too long. Checking the returned value also helps to comply
with some recent gcc versions, like gcc8, which will fail due to
truncated writing checks related to the -Werror=format-truncation= flag.

Signed-off-by: Gustavo Romero <gromero@linux.vnet.ibm.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
LPU-Reference: 1541117601-18937-2-git-send-email-gromero@linux.vnet.ibm.com
Link: https://lkml.kernel.org/n/tip-mvpxxxy7wnzaj74cq75muw3f@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/jvmti/jvmti_agent.c | 49 ++++++++++++++++++++++++++--------
 1 file changed, 38 insertions(+), 11 deletions(-)

diff --git a/tools/perf/jvmti/jvmti_agent.c b/tools/perf/jvmti/jvmti_agent.c
index ac1bcdc17dae7..f7eb63cbbc655 100644
--- a/tools/perf/jvmti/jvmti_agent.c
+++ b/tools/perf/jvmti/jvmti_agent.c
@@ -125,7 +125,7 @@ perf_get_timestamp(void)
 }
 
 static int
-debug_cache_init(void)
+create_jit_cache_dir(void)
 {
 	char str[32];
 	char *base, *p;
@@ -144,8 +144,13 @@ debug_cache_init(void)
 
 	strftime(str, sizeof(str), JIT_LANG"-jit-%Y%m%d", &tm);
 
-	snprintf(jit_path, PATH_MAX - 1, "%s/.debug/", base);
-
+	ret = snprintf(jit_path, PATH_MAX, "%s/.debug/", base);
+	if (ret >= PATH_MAX) {
+		warnx("jvmti: cannot generate jit cache dir because %s/.debug/"
+			" is too long, please check the cwd, JITDUMPDIR, and"
+			" HOME variables", base);
+		return -1;
+	}
 	ret = mkdir(jit_path, 0755);
 	if (ret == -1) {
 		if (errno != EEXIST) {
@@ -154,20 +159,32 @@ debug_cache_init(void)
 		}
 	}
 
-	snprintf(jit_path, PATH_MAX - 1, "%s/.debug/jit", base);
+	ret = snprintf(jit_path, PATH_MAX, "%s/.debug/jit", base);
+	if (ret >= PATH_MAX) {
+		warnx("jvmti: cannot generate jit cache dir because"
+			" %s/.debug/jit is too long, please check the cwd,"
+			" JITDUMPDIR, and HOME variables", base);
+		return -1;
+	}
 	ret = mkdir(jit_path, 0755);
 	if (ret == -1) {
 		if (errno != EEXIST) {
-			warn("cannot create jit cache dir %s", jit_path);
+			warn("jvmti: cannot create jit cache dir %s", jit_path);
 			return -1;
 		}
 	}
 
-	snprintf(jit_path, PATH_MAX - 1, "%s/.debug/jit/%s.XXXXXXXX", base, str);
-
+	ret = snprintf(jit_path, PATH_MAX, "%s/.debug/jit/%s.XXXXXXXX", base, str);
+	if (ret >= PATH_MAX) {
+		warnx("jvmti: cannot generate jit cache dir because"
+			" %s/.debug/jit/%s.XXXXXXXX is too long, please check"
+			" the cwd, JITDUMPDIR, and HOME variables",
+			base, str);
+		return -1;
+	}
 	p = mkdtemp(jit_path);
 	if (p != jit_path) {
-		warn("cannot create jit cache dir %s", jit_path);
+		warn("jvmti: cannot create jit cache dir %s", jit_path);
 		return -1;
 	}
 
@@ -228,7 +245,7 @@ void *jvmti_open(void)
 {
 	char dump_path[PATH_MAX];
 	struct jitheader header;
-	int fd;
+	int fd, ret;
 	FILE *fp;
 
 	init_arch_timestamp();
@@ -245,12 +262,22 @@ void *jvmti_open(void)
 
 	memset(&header, 0, sizeof(header));
 
-	debug_cache_init();
+	/*
+	 * jitdump file dir
+	 */
+	if (create_jit_cache_dir() < 0)
+		return NULL;
 
 	/*
 	 * jitdump file name
 	 */
-	scnprintf(dump_path, PATH_MAX, "%s/jit-%i.dump", jit_path, getpid());
+	ret = snprintf(dump_path, PATH_MAX, "%s/jit-%i.dump", jit_path, getpid());
+	if (ret >= PATH_MAX) {
+		warnx("jvmti: cannot generate jitdump file full path because"
+			" %s/jit-%i.dump is too long, please check the cwd,"
+			" JITDUMPDIR, and HOME variables", jit_path, getpid());
+		return NULL;
+	}
 
 	fd = open(dump_path, O_CREAT|O_TRUNC|O_RDWR, 0666);
 	if (fd == -1)
-- 
GitLab


From af31b04b67f4fd7f639fd465a507c154c46fc9fb Mon Sep 17 00:00:00 2001
From: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
Date: Tue, 30 Oct 2018 21:50:25 -0400
Subject: [PATCH 0623/1890] tools/testing/nvdimm: Fix the array size for dimm
 devices.

KASAN reports following global out of bounds access while
nfit_test is being loaded. The out of bound access happens
the following reference to dimm_fail_cmd_flags[dimm]. 'dimm' is
over than the index value, NUM_DCR (==5).

  static int override_return_code(int dimm, unsigned int func, int rc)
  {
          if ((1 << func) & dimm_fail_cmd_flags[dimm]) {

dimm_fail_cmd_flags[] definition:
  static unsigned long dimm_fail_cmd_flags[NUM_DCR];

'dimm' is the return value of get_dimm(), and get_dimm() returns
the index of handle[] array. The handle[] has 7 index. Let's use
ARRAY_SIZE(handle) as the array size.

KASAN report:

==================================================================
BUG: KASAN: global-out-of-bounds in nfit_test_ctl+0x47bb/0x55b0 [nfit_test]
Read of size 8 at addr ffffffffc10cbbe8 by task kworker/u41:0/8
...
Call Trace:
 dump_stack+0xea/0x1b0
 ? dump_stack_print_info.cold.0+0x1b/0x1b
 ? kmsg_dump_rewind_nolock+0xd9/0xd9
 print_address_description+0x65/0x22e
 ? nfit_test_ctl+0x47bb/0x55b0 [nfit_test]
 kasan_report.cold.6+0x92/0x1a6
 nfit_test_ctl+0x47bb/0x55b0 [nfit_test]
...
The buggy address belongs to the variable:
 dimm_fail_cmd_flags+0x28/0xffffffffffffa440 [nfit_test]
==================================================================

Fixes: 39611e83a28c ("tools/testing/nvdimm: Make DSM failure code injection...")
Signed-off-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 tools/testing/nvdimm/test/nfit.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 9527d47a1070e..01ec04bf91b59 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -140,8 +140,8 @@ static u32 handle[] = {
 	[6] = NFIT_DIMM_HANDLE(1, 0, 0, 0, 1),
 };
 
-static unsigned long dimm_fail_cmd_flags[NUM_DCR];
-static int dimm_fail_cmd_code[NUM_DCR];
+static unsigned long dimm_fail_cmd_flags[ARRAY_SIZE(handle)];
+static int dimm_fail_cmd_code[ARRAY_SIZE(handle)];
 
 static const struct nd_intel_smart smart_def = {
 	.flags = ND_INTEL_SMART_HEALTH_VALID
@@ -205,7 +205,7 @@ struct nfit_test {
 		unsigned long deadline;
 		spinlock_t lock;
 	} ars_state;
-	struct device *dimm_dev[NUM_DCR];
+	struct device *dimm_dev[ARRAY_SIZE(handle)];
 	struct nd_intel_smart *smart;
 	struct nd_intel_smart_threshold *smart_threshold;
 	struct badrange badrange;
@@ -2680,7 +2680,7 @@ static int nfit_test_probe(struct platform_device *pdev)
 		u32 nfit_handle = __to_nfit_memdev(nfit_mem)->device_handle;
 		int i;
 
-		for (i = 0; i < NUM_DCR; i++)
+		for (i = 0; i < ARRAY_SIZE(handle); i++)
 			if (nfit_handle == handle[i])
 				dev_set_drvdata(nfit_test->dimm_dev[i],
 						nfit_mem);
-- 
GitLab


From 8229706e03e4147f3e22d1de0d30630cde6d18a9 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Thu, 1 Nov 2018 16:55:19 -0400
Subject: [PATCH 0624/1890] XArray: Fix xa_for_each with a single element at 0

The following sequence of calls would result in an infinite loop in
xa_find_after():

	xa_store(xa, 0, x, GFP_KERNEL);
	index = 0;
	xa_for_each(xa, entry, index, ULONG_MAX, XA_PRESENT) { }

xa_find_after() was confusing the situation where we found no entry in
the tree with finding a multiorder entry, so it would look for the
successor entry forever.  Just check for this case explicitly.  Includes
a few new checks in the test suite to be sure this doesn't reappear.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 lib/test_xarray.c | 30 +++++++++++++++++++++++++++++-
 lib/xarray.c      |  2 ++
 2 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index aa47754150cee..126127658b494 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -702,7 +702,7 @@ static noinline void check_multi_find_2(struct xarray *xa)
 	}
 }
 
-static noinline void check_find(struct xarray *xa)
+static noinline void check_find_1(struct xarray *xa)
 {
 	unsigned long i, j, k;
 
@@ -748,6 +748,34 @@ static noinline void check_find(struct xarray *xa)
 		XA_BUG_ON(xa, xa_get_mark(xa, i, XA_MARK_0));
 	}
 	XA_BUG_ON(xa, !xa_empty(xa));
+}
+
+static noinline void check_find_2(struct xarray *xa)
+{
+	void *entry;
+	unsigned long i, j, index = 0;
+
+	xa_for_each(xa, entry, index, ULONG_MAX, XA_PRESENT) {
+		XA_BUG_ON(xa, true);
+	}
+
+	for (i = 0; i < 1024; i++) {
+		xa_store_index(xa, index, GFP_KERNEL);
+		j = 0;
+		index = 0;
+		xa_for_each(xa, entry, index, ULONG_MAX, XA_PRESENT) {
+			XA_BUG_ON(xa, xa_mk_value(index) != entry);
+			XA_BUG_ON(xa, index != j++);
+		}
+	}
+
+	xa_destroy(xa);
+}
+
+static noinline void check_find(struct xarray *xa)
+{
+	check_find_1(xa);
+	check_find_2(xa);
 	check_multi_find(xa);
 	check_multi_find_2(xa);
 }
diff --git a/lib/xarray.c b/lib/xarray.c
index 8b176f009c087..c991ff4523ef1 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -1829,6 +1829,8 @@ void *xa_find_after(struct xarray *xa, unsigned long *indexp,
 			entry = xas_find_marked(&xas, max, filter);
 		else
 			entry = xas_find(&xas, max);
+		if (xas.xa_node == XAS_BOUNDS)
+			break;
 		if (xas.xa_shift) {
 			if (xas.xa_index & ((1UL << xas.xa_shift) - 1))
 				continue;
-- 
GitLab


From 9ee5a3b7eeb190eb413e0fac3246022bd1baa05d Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Thu, 1 Nov 2018 22:52:06 -0400
Subject: [PATCH 0625/1890] XArray: Export __xa_foo to non-GPL modules

Without this, it's not possible to use static inlines like xa_store_bh()
and xa_erase_irq().

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 lib/xarray.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/xarray.c b/lib/xarray.c
index c991ff4523ef1..e7be4e47c6a91 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -1334,7 +1334,7 @@ void *__xa_erase(struct xarray *xa, unsigned long index)
 	XA_STATE(xas, xa, index);
 	return xas_result(&xas, xas_store(&xas, NULL));
 }
-EXPORT_SYMBOL_GPL(__xa_erase);
+EXPORT_SYMBOL(__xa_erase);
 
 /**
  * xa_store() - Store this entry in the XArray.
@@ -1674,7 +1674,7 @@ void __xa_set_mark(struct xarray *xa, unsigned long index, xa_mark_t mark)
 	if (entry)
 		xas_set_mark(&xas, mark);
 }
-EXPORT_SYMBOL_GPL(__xa_set_mark);
+EXPORT_SYMBOL(__xa_set_mark);
 
 /**
  * __xa_clear_mark() - Clear this mark on this entry while locked.
@@ -1692,7 +1692,7 @@ void __xa_clear_mark(struct xarray *xa, unsigned long index, xa_mark_t mark)
 	if (entry)
 		xas_clear_mark(&xas, mark);
 }
-EXPORT_SYMBOL_GPL(__xa_clear_mark);
+EXPORT_SYMBOL(__xa_clear_mark);
 
 /**
  * xa_get_mark() - Inquire whether this mark is set on this entry.
-- 
GitLab


From fe2b51145c9ffd5a49013fe180e42e92ef0e6df9 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Mon, 4 Dec 2017 19:33:30 -0500
Subject: [PATCH 0626/1890] nilfs2: Use xa_erase_irq

This code simply opencoded xa_erase_irq().

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 fs/nilfs2/btnode.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index de99db518571b..f2129a5d9f237 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -266,9 +266,7 @@ void nilfs_btnode_abort_change_key(struct address_space *btnc,
 		return;
 
 	if (nbh == NULL) {	/* blocksize == pagesize */
-		xa_lock_irq(&btnc->i_pages);
-		__xa_erase(&btnc->i_pages, newkey);
-		xa_unlock_irq(&btnc->i_pages);
+		xa_erase_irq(&btnc->i_pages, newkey);
 		unlock_page(ctxt->bh->b_page);
 	} else
 		brelse(nbh);
-- 
GitLab


From aba118389a6fb2ad7958de0f37b5869852bd38cf Mon Sep 17 00:00:00 2001
From: "Dmitry V. Levin" <ldv@altlinux.org>
Date: Thu, 1 Nov 2018 14:03:08 +0300
Subject: [PATCH 0627/1890] uapi: fix linux/kfd_ioctl.h userspace compilation
 errors

Consistently use types provided by <linux/types.h> via <drm/drm.h>
to fix the following linux/kfd_ioctl.h userspace compilation errors:

/usr/include/linux/kfd_ioctl.h:250:2: error: unknown type name 'uint32_t'
  uint32_t reset_type;
/usr/include/linux/kfd_ioctl.h:251:2: error: unknown type name 'uint32_t'
  uint32_t reset_cause;
/usr/include/linux/kfd_ioctl.h:252:2: error: unknown type name 'uint32_t'
  uint32_t memory_lost;
/usr/include/linux/kfd_ioctl.h:253:2: error: unknown type name 'uint32_t'
  uint32_t gpu_id;

Fixes: 0c119abad7f0d ("drm/amd: Add kfd ioctl defines for hw_exception event")
Cc: <stable@vger.kernel.org> # v4.19
Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/linux/kfd_ioctl.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index f5ff8a76e208f..dae897f38e593 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -255,10 +255,10 @@ struct kfd_hsa_memory_exception_data {
 
 /* hw exception data */
 struct kfd_hsa_hw_exception_data {
-	uint32_t reset_type;
-	uint32_t reset_cause;
-	uint32_t memory_lost;
-	uint32_t gpu_id;
+	__u32 reset_type;
+	__u32 reset_cause;
+	__u32 memory_lost;
+	__u32 gpu_id;
 };
 
 /* Event data */
-- 
GitLab


From 8e7f91719db36440d63de37331367be9700ca0c7 Mon Sep 17 00:00:00 2001
From: "Dmitry V. Levin" <ldv@altlinux.org>
Date: Thu, 1 Nov 2018 14:03:28 +0300
Subject: [PATCH 0628/1890] uapi: fix more linux/kfd_ioctl.h userspace
 compilation errors

Consistently use types provided by <linux/types.h> via <drm/drm.h>
to fix struct kfd_ioctl_get_queue_wave_state_args userspace compilation errors.

Fixes: 5df099e8bc83f ("drm/amdkfd: Add wavefront context save state retrieval ioctl")
Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/linux/kfd_ioctl.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index dae897f38e593..b01eb502d49c5 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -83,11 +83,11 @@ struct kfd_ioctl_set_cu_mask_args {
 };
 
 struct kfd_ioctl_get_queue_wave_state_args {
-	uint64_t ctl_stack_address;	/* to KFD */
-	uint32_t ctl_stack_used_size;	/* from KFD */
-	uint32_t save_area_used_size;	/* from KFD */
-	uint32_t queue_id;		/* to KFD */
-	uint32_t pad;
+	__u64 ctl_stack_address;	/* to KFD */
+	__u32 ctl_stack_used_size;	/* from KFD */
+	__u32 save_area_used_size;	/* from KFD */
+	__u32 queue_id;			/* to KFD */
+	__u32 pad;
 };
 
 /* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */
-- 
GitLab


From 6915ed86cca6a0b422e8ada05ec7009d2074b88a Mon Sep 17 00:00:00 2001
From: Jeff Barnhill <0xeffeff@gmail.com>
Date: Mon, 5 Nov 2018 20:36:45 +0000
Subject: [PATCH 0629/1890] net/ipv6: Move anycast init/cleanup functions out
 of CONFIG_PROC_FS

Move the anycast.c init and cleanup functions which were inadvertently
added inside the CONFIG_PROC_FS definition.

Fixes: 2384d02520ff ("net/ipv6: Add anycast addresses to a global hashtable")
Signed-off-by: Jeff Barnhill <0xeffeff@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/anycast.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 7698637cf8276..94999058e1102 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -590,6 +590,7 @@ void ac6_proc_exit(struct net *net)
 {
 	remove_proc_entry("anycast6", net->proc_net);
 }
+#endif
 
 /*	Init / cleanup code
  */
@@ -611,4 +612,3 @@ void ipv6_anycast_cleanup(void)
 		WARN_ON(!hlist_empty(&inet6_acaddr_lst[i]));
 	spin_unlock(&acaddr_hash_lock);
 }
-#endif
-- 
GitLab


From 4c0608f4a0e76dfb82d3accd20081f4bf47ed143 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Tue, 30 Oct 2018 09:45:55 -0400
Subject: [PATCH 0630/1890] XArray: Regularise xa_reserve

The xa_reserve() function was a little unusual in that it attempted to
be callable for all kinds of locking scenarios.  Make it look like the
other APIs with __xa_reserve, xa_reserve_bh and xa_reserve_irq variants.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 Documentation/core-api/xarray.rst | 13 +++++
 include/linux/xarray.h            | 80 ++++++++++++++++++++++++++++++-
 lib/test_xarray.c                 |  6 +++
 lib/xarray.c                      | 18 +++----
 4 files changed, 105 insertions(+), 12 deletions(-)

diff --git a/Documentation/core-api/xarray.rst b/Documentation/core-api/xarray.rst
index a4e705108f428..65c77a81b6891 100644
--- a/Documentation/core-api/xarray.rst
+++ b/Documentation/core-api/xarray.rst
@@ -105,6 +105,15 @@ may result in the entry being marked at some, but not all of the other
 indices.  Storing into one index may result in the entry retrieved by
 some, but not all of the other indices changing.
 
+Sometimes you need to ensure that a subsequent call to :c:func:`xa_store`
+will not need to allocate memory.  The :c:func:`xa_reserve` function
+will store a reserved entry at the indicated index.  Users of the normal
+API will see this entry as containing ``NULL``.  If you do not need to
+use the reserved entry, you can call :c:func:`xa_release` to remove the
+unused entry.  If another user has stored to the entry in the meantime,
+:c:func:`xa_release` will do nothing; if instead you want the entry to
+become ``NULL``, you should use :c:func:`xa_erase`.
+
 Finally, you can remove all entries from an XArray by calling
 :c:func:`xa_destroy`.  If the XArray entries are pointers, you may wish
 to free the entries first.  You can do this by iterating over all present
@@ -167,6 +176,9 @@ Takes xa_lock internally:
  * :c:func:`xa_alloc`
  * :c:func:`xa_alloc_bh`
  * :c:func:`xa_alloc_irq`
+ * :c:func:`xa_reserve`
+ * :c:func:`xa_reserve_bh`
+ * :c:func:`xa_reserve_irq`
  * :c:func:`xa_destroy`
  * :c:func:`xa_set_mark`
  * :c:func:`xa_clear_mark`
@@ -177,6 +189,7 @@ Assumes xa_lock held on entry:
  * :c:func:`__xa_erase`
  * :c:func:`__xa_cmpxchg`
  * :c:func:`__xa_alloc`
+ * :c:func:`__xa_reserve`
  * :c:func:`__xa_set_mark`
  * :c:func:`__xa_clear_mark`
 
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index d9514928ddacb..c2cb0426c60c3 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -291,7 +291,6 @@ void *xa_load(struct xarray *, unsigned long index);
 void *xa_store(struct xarray *, unsigned long index, void *entry, gfp_t);
 void *xa_cmpxchg(struct xarray *, unsigned long index,
 			void *old, void *entry, gfp_t);
-int xa_reserve(struct xarray *, unsigned long index, gfp_t);
 void *xa_store_range(struct xarray *, unsigned long first, unsigned long last,
 			void *entry, gfp_t);
 bool xa_get_mark(struct xarray *, unsigned long index, xa_mark_t);
@@ -455,6 +454,7 @@ void *__xa_store(struct xarray *, unsigned long index, void *entry, gfp_t);
 void *__xa_cmpxchg(struct xarray *, unsigned long index, void *old,
 		void *entry, gfp_t);
 int __xa_alloc(struct xarray *, u32 *id, u32 max, void *entry, gfp_t);
+int __xa_reserve(struct xarray *, unsigned long index, gfp_t);
 void __xa_set_mark(struct xarray *, unsigned long index, xa_mark_t);
 void __xa_clear_mark(struct xarray *, unsigned long index, xa_mark_t);
 
@@ -621,6 +621,84 @@ static inline int xa_alloc_irq(struct xarray *xa, u32 *id, u32 max, void *entry,
 	return err;
 }
 
+/**
+ * xa_reserve() - Reserve this index in the XArray.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @gfp: Memory allocation flags.
+ *
+ * Ensures there is somewhere to store an entry at @index in the array.
+ * If there is already something stored at @index, this function does
+ * nothing.  If there was nothing there, the entry is marked as reserved.
+ * Loading from a reserved entry returns a %NULL pointer.
+ *
+ * If you do not use the entry that you have reserved, call xa_release()
+ * or xa_erase() to free any unnecessary memory.
+ *
+ * Context: Any context.  Takes and releases the xa_lock.
+ * May sleep if the @gfp flags permit.
+ * Return: 0 if the reservation succeeded or -ENOMEM if it failed.
+ */
+static inline
+int xa_reserve(struct xarray *xa, unsigned long index, gfp_t gfp)
+{
+	int ret;
+
+	xa_lock(xa);
+	ret = __xa_reserve(xa, index, gfp);
+	xa_unlock(xa);
+
+	return ret;
+}
+
+/**
+ * xa_reserve_bh() - Reserve this index in the XArray.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @gfp: Memory allocation flags.
+ *
+ * A softirq-disabling version of xa_reserve().
+ *
+ * Context: Any context.  Takes and releases the xa_lock while
+ * disabling softirqs.
+ * Return: 0 if the reservation succeeded or -ENOMEM if it failed.
+ */
+static inline
+int xa_reserve_bh(struct xarray *xa, unsigned long index, gfp_t gfp)
+{
+	int ret;
+
+	xa_lock_bh(xa);
+	ret = __xa_reserve(xa, index, gfp);
+	xa_unlock_bh(xa);
+
+	return ret;
+}
+
+/**
+ * xa_reserve_irq() - Reserve this index in the XArray.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @gfp: Memory allocation flags.
+ *
+ * An interrupt-disabling version of xa_reserve().
+ *
+ * Context: Process context.  Takes and releases the xa_lock while
+ * disabling interrupts.
+ * Return: 0 if the reservation succeeded or -ENOMEM if it failed.
+ */
+static inline
+int xa_reserve_irq(struct xarray *xa, unsigned long index, gfp_t gfp)
+{
+	int ret;
+
+	xa_lock_irq(xa);
+	ret = __xa_reserve(xa, index, gfp);
+	xa_unlock_irq(xa);
+
+	return ret;
+}
+
 /* Everything below here is the Advanced API.  Proceed with caution. */
 
 /*
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index 126127658b494..e5294b20b52fd 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -373,6 +373,12 @@ static noinline void check_reserve(struct xarray *xa)
 	xa_erase_index(xa, 12345678);
 	XA_BUG_ON(xa, !xa_empty(xa));
 
+	/* And so does xa_insert */
+	xa_reserve(xa, 12345678, GFP_KERNEL);
+	XA_BUG_ON(xa, xa_insert(xa, 12345678, xa_mk_value(12345678), 0) != 0);
+	xa_erase_index(xa, 12345678);
+	XA_BUG_ON(xa, !xa_empty(xa));
+
 	/* Can iterate through a reserved entry */
 	xa_store_index(xa, 5, GFP_KERNEL);
 	xa_reserve(xa, 6, GFP_KERNEL);
diff --git a/lib/xarray.c b/lib/xarray.c
index e7be4e47c6a91..9cab8cfef8a8e 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -1488,7 +1488,7 @@ void *__xa_cmpxchg(struct xarray *xa, unsigned long index,
 EXPORT_SYMBOL(__xa_cmpxchg);
 
 /**
- * xa_reserve() - Reserve this index in the XArray.
+ * __xa_reserve() - Reserve this index in the XArray.
  * @xa: XArray.
  * @index: Index into array.
  * @gfp: Memory allocation flags.
@@ -1496,33 +1496,29 @@ EXPORT_SYMBOL(__xa_cmpxchg);
  * Ensures there is somewhere to store an entry at @index in the array.
  * If there is already something stored at @index, this function does
  * nothing.  If there was nothing there, the entry is marked as reserved.
- * Loads from @index will continue to see a %NULL pointer until a
- * subsequent store to @index.
+ * Loading from a reserved entry returns a %NULL pointer.
  *
  * If you do not use the entry that you have reserved, call xa_release()
  * or xa_erase() to free any unnecessary memory.
  *
- * Context: Process context.  Takes and releases the xa_lock, IRQ or BH safe
- * if specified in XArray flags.  May sleep if the @gfp flags permit.
+ * Context: Any context.  Expects the xa_lock to be held on entry.  May
+ * release the lock, sleep and reacquire the lock if the @gfp flags permit.
  * Return: 0 if the reservation succeeded or -ENOMEM if it failed.
  */
-int xa_reserve(struct xarray *xa, unsigned long index, gfp_t gfp)
+int __xa_reserve(struct xarray *xa, unsigned long index, gfp_t gfp)
 {
 	XA_STATE(xas, xa, index);
-	unsigned int lock_type = xa_lock_type(xa);
 	void *curr;
 
 	do {
-		xas_lock_type(&xas, lock_type);
 		curr = xas_load(&xas);
 		if (!curr)
 			xas_store(&xas, XA_ZERO_ENTRY);
-		xas_unlock_type(&xas, lock_type);
-	} while (xas_nomem(&xas, gfp));
+	} while (__xas_nomem(&xas, gfp));
 
 	return xas_error(&xas);
 }
-EXPORT_SYMBOL(xa_reserve);
+EXPORT_SYMBOL(__xa_reserve);
 
 #ifdef CONFIG_XARRAY_MULTI
 static void xas_set_range(struct xa_state *xas, unsigned long first,
-- 
GitLab


From c5beb07e7a06b24f4f27304f6282b5dbd929543b Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Wed, 31 Oct 2018 14:39:28 -0400
Subject: [PATCH 0631/1890] XArray: Unify xa_cmpxchg and __xa_cmpxchg

xa_cmpxchg() was one of the largest functions in the xarray
implementation.  By turning it into a wrapper and having the callers
take the lock (like several other functions), we save 160 bytes on a
tinyconfig build and reduce the duplication in xarray.c.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/xarray.h | 113 +++++++++++++++++++++++++----------------
 lib/xarray.c           |  41 ---------------
 2 files changed, 69 insertions(+), 85 deletions(-)

diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index c2cb0426c60c3..8e59d4fbd55e1 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -289,8 +289,6 @@ struct xarray {
 void xa_init_flags(struct xarray *, gfp_t flags);
 void *xa_load(struct xarray *, unsigned long index);
 void *xa_store(struct xarray *, unsigned long index, void *entry, gfp_t);
-void *xa_cmpxchg(struct xarray *, unsigned long index,
-			void *old, void *entry, gfp_t);
 void *xa_store_range(struct xarray *, unsigned long first, unsigned long last,
 			void *entry, gfp_t);
 bool xa_get_mark(struct xarray *, unsigned long index, xa_mark_t);
@@ -359,48 +357,6 @@ static inline void *xa_erase(struct xarray *xa, unsigned long index)
 	return xa_store(xa, index, NULL, 0);
 }
 
-/**
- * xa_insert() - Store this entry in the XArray unless another entry is
- *			already present.
- * @xa: XArray.
- * @index: Index into array.
- * @entry: New entry.
- * @gfp: Memory allocation flags.
- *
- * If you would rather see the existing entry in the array, use xa_cmpxchg().
- * This function is for users who don't care what the entry is, only that
- * one is present.
- *
- * Context: Process context.  Takes and releases the xa_lock.
- *	    May sleep if the @gfp flags permit.
- * Return: 0 if the store succeeded.  -EEXIST if another entry was present.
- * -ENOMEM if memory could not be allocated.
- */
-static inline int xa_insert(struct xarray *xa, unsigned long index,
-		void *entry, gfp_t gfp)
-{
-	void *curr = xa_cmpxchg(xa, index, NULL, entry, gfp);
-	if (!curr)
-		return 0;
-	if (xa_is_err(curr))
-		return xa_err(curr);
-	return -EEXIST;
-}
-
-/**
- * xa_release() - Release a reserved entry.
- * @xa: XArray.
- * @index: Index of entry.
- *
- * After calling xa_reserve(), you can call this function to release the
- * reservation.  If the entry at @index has been stored to, this function
- * will do nothing.
- */
-static inline void xa_release(struct xarray *xa, unsigned long index)
-{
-	xa_cmpxchg(xa, index, NULL, NULL, 0);
-}
-
 /**
  * xa_for_each() - Iterate over a portion of an XArray.
  * @xa: XArray.
@@ -534,6 +490,61 @@ static inline void *xa_erase_irq(struct xarray *xa, unsigned long index)
 	return entry;
 }
 
+/**
+ * xa_cmpxchg() - Conditionally replace an entry in the XArray.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @old: Old value to test against.
+ * @entry: New value to place in array.
+ * @gfp: Memory allocation flags.
+ *
+ * If the entry at @index is the same as @old, replace it with @entry.
+ * If the return value is equal to @old, then the exchange was successful.
+ *
+ * Context: Any context.  Takes and releases the xa_lock.  May sleep
+ * if the @gfp flags permit.
+ * Return: The old value at this index or xa_err() if an error happened.
+ */
+static inline void *xa_cmpxchg(struct xarray *xa, unsigned long index,
+			void *old, void *entry, gfp_t gfp)
+{
+	void *curr;
+
+	xa_lock(xa);
+	curr = __xa_cmpxchg(xa, index, old, entry, gfp);
+	xa_unlock(xa);
+
+	return curr;
+}
+
+/**
+ * xa_insert() - Store this entry in the XArray unless another entry is
+ *			already present.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @entry: New entry.
+ * @gfp: Memory allocation flags.
+ *
+ * If you would rather see the existing entry in the array, use xa_cmpxchg().
+ * This function is for users who don't care what the entry is, only that
+ * one is present.
+ *
+ * Context: Process context.  Takes and releases the xa_lock.
+ *	    May sleep if the @gfp flags permit.
+ * Return: 0 if the store succeeded.  -EEXIST if another entry was present.
+ * -ENOMEM if memory could not be allocated.
+ */
+static inline int xa_insert(struct xarray *xa, unsigned long index,
+		void *entry, gfp_t gfp)
+{
+	void *curr = xa_cmpxchg(xa, index, NULL, entry, gfp);
+	if (!curr)
+		return 0;
+	if (xa_is_err(curr))
+		return xa_err(curr);
+	return -EEXIST;
+}
+
 /**
  * xa_alloc() - Find somewhere to store this entry in the XArray.
  * @xa: XArray.
@@ -699,6 +710,20 @@ int xa_reserve_irq(struct xarray *xa, unsigned long index, gfp_t gfp)
 	return ret;
 }
 
+/**
+ * xa_release() - Release a reserved entry.
+ * @xa: XArray.
+ * @index: Index of entry.
+ *
+ * After calling xa_reserve(), you can call this function to release the
+ * reservation.  If the entry at @index has been stored to, this function
+ * will do nothing.
+ */
+static inline void xa_release(struct xarray *xa, unsigned long index)
+{
+	xa_cmpxchg(xa, index, NULL, NULL, 0);
+}
+
 /* Everything below here is the Advanced API.  Proceed with caution. */
 
 /*
diff --git a/lib/xarray.c b/lib/xarray.c
index 9cab8cfef8a8e..77671d4a79101 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -1406,47 +1406,6 @@ void *__xa_store(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp)
 }
 EXPORT_SYMBOL(__xa_store);
 
-/**
- * xa_cmpxchg() - Conditionally replace an entry in the XArray.
- * @xa: XArray.
- * @index: Index into array.
- * @old: Old value to test against.
- * @entry: New value to place in array.
- * @gfp: Memory allocation flags.
- *
- * If the entry at @index is the same as @old, replace it with @entry.
- * If the return value is equal to @old, then the exchange was successful.
- *
- * Context: Process context.  Takes and releases the xa_lock.  May sleep
- * if the @gfp flags permit.
- * Return: The old value at this index or xa_err() if an error happened.
- */
-void *xa_cmpxchg(struct xarray *xa, unsigned long index,
-			void *old, void *entry, gfp_t gfp)
-{
-	XA_STATE(xas, xa, index);
-	void *curr;
-
-	if (WARN_ON_ONCE(xa_is_internal(entry)))
-		return XA_ERROR(-EINVAL);
-
-	do {
-		xas_lock(&xas);
-		curr = xas_load(&xas);
-		if (curr == XA_ZERO_ENTRY)
-			curr = NULL;
-		if (curr == old) {
-			xas_store(&xas, entry);
-			if (xa_track_free(xa) && entry)
-				xas_clear_mark(&xas, XA_FREE_MARK);
-		}
-		xas_unlock(&xas);
-	} while (xas_nomem(&xas, gfp));
-
-	return xas_result(&xas, curr);
-}
-EXPORT_SYMBOL(xa_cmpxchg);
-
 /**
  * __xa_cmpxchg() - Store this entry in the XArray.
  * @xa: XArray.
-- 
GitLab


From 9c16bb88905456a9b1299338041f05fa7699971b Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Mon, 5 Nov 2018 15:48:49 -0500
Subject: [PATCH 0632/1890] XArray: Turn xa_erase into an exported function

Make xa_erase() take the spinlock and then call __xa_erase(), but make
it out of line since it's such a common function.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/xarray.h | 18 +-----------------
 lib/xarray.c           | 24 ++++++++++++++++++++++++
 2 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 8e59d4fbd55e1..4c839c17a99be 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -289,6 +289,7 @@ struct xarray {
 void xa_init_flags(struct xarray *, gfp_t flags);
 void *xa_load(struct xarray *, unsigned long index);
 void *xa_store(struct xarray *, unsigned long index, void *entry, gfp_t);
+void *xa_erase(struct xarray *, unsigned long index);
 void *xa_store_range(struct xarray *, unsigned long first, unsigned long last,
 			void *entry, gfp_t);
 bool xa_get_mark(struct xarray *, unsigned long index, xa_mark_t);
@@ -340,23 +341,6 @@ static inline bool xa_marked(const struct xarray *xa, xa_mark_t mark)
 	return xa->xa_flags & XA_FLAGS_MARK(mark);
 }
 
-/**
- * xa_erase() - Erase this entry from the XArray.
- * @xa: XArray.
- * @index: Index of entry.
- *
- * This function is the equivalent of calling xa_store() with %NULL as
- * the third argument.  The XArray does not need to allocate memory, so
- * the user does not need to provide GFP flags.
- *
- * Context: Process context.  Takes and releases the xa_lock.
- * Return: The entry which used to be at this index.
- */
-static inline void *xa_erase(struct xarray *xa, unsigned long index)
-{
-	return xa_store(xa, index, NULL, 0);
-}
-
 /**
  * xa_for_each() - Iterate over a portion of an XArray.
  * @xa: XArray.
diff --git a/lib/xarray.c b/lib/xarray.c
index 77671d4a79101..b55aa8c1c20fd 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -1336,6 +1336,30 @@ void *__xa_erase(struct xarray *xa, unsigned long index)
 }
 EXPORT_SYMBOL(__xa_erase);
 
+/**
+ * xa_erase() - Erase this entry from the XArray.
+ * @xa: XArray.
+ * @index: Index of entry.
+ *
+ * This function is the equivalent of calling xa_store() with %NULL as
+ * the third argument.  The XArray does not need to allocate memory, so
+ * the user does not need to provide GFP flags.
+ *
+ * Context: Any context.  Takes and releases the xa_lock.
+ * Return: The entry which used to be at this index.
+ */
+void *xa_erase(struct xarray *xa, unsigned long index)
+{
+	void *entry;
+
+	xa_lock(xa);
+	entry = __xa_erase(xa, index);
+	xa_unlock(xa);
+
+	return entry;
+}
+EXPORT_SYMBOL(xa_erase);
+
 /**
  * xa_store() - Store this entry in the XArray.
  * @xa: XArray.
-- 
GitLab


From 84e5acb76dacb8ebd648a86a53907ce0dd616534 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Fri, 26 Oct 2018 14:41:29 -0400
Subject: [PATCH 0633/1890] XArray: Add xa_store_bh() and xa_store_irq()

These convenience wrappers disable interrupts while taking the spinlock.
A number of drivers would otherwise have to open-code these functions.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 Documentation/core-api/xarray.rst |  5 ++-
 include/linux/xarray.h            | 52 +++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/Documentation/core-api/xarray.rst b/Documentation/core-api/xarray.rst
index 65c77a81b6891..8a6e2087de777 100644
--- a/Documentation/core-api/xarray.rst
+++ b/Documentation/core-api/xarray.rst
@@ -167,6 +167,8 @@ Takes RCU read lock:
 
 Takes xa_lock internally:
  * :c:func:`xa_store`
+ * :c:func:`xa_store_bh`
+ * :c:func:`xa_store_irq`
  * :c:func:`xa_insert`
  * :c:func:`xa_erase`
  * :c:func:`xa_erase_bh`
@@ -247,7 +249,8 @@ Sharing the XArray with interrupt context is also possible, either
 using :c:func:`xa_lock_irqsave` in both the interrupt handler and process
 context, or :c:func:`xa_lock_irq` in process context and :c:func:`xa_lock`
 in the interrupt handler.  Some of the more common patterns have helper
-functions such as :c:func:`xa_erase_bh` and :c:func:`xa_erase_irq`.
+functions such as :c:func:`xa_store_bh`, :c:func:`xa_store_irq`,
+:c:func:`xa_erase_bh` and :c:func:`xa_erase_irq`.
 
 Sometimes you need to protect access to the XArray with a mutex because
 that lock sits above another mutex in the locking hierarchy.  That does
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 4c839c17a99be..52d9732e4ec41 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -426,6 +426,58 @@ static inline int __xa_insert(struct xarray *xa, unsigned long index,
 	return -EEXIST;
 }
 
+/**
+ * xa_store_bh() - Store this entry in the XArray.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @entry: New entry.
+ * @gfp: Memory allocation flags.
+ *
+ * This function is like calling xa_store() except it disables softirqs
+ * while holding the array lock.
+ *
+ * Context: Any context.  Takes and releases the xa_lock while
+ * disabling softirqs.
+ * Return: The entry which used to be at this index.
+ */
+static inline void *xa_store_bh(struct xarray *xa, unsigned long index,
+		void *entry, gfp_t gfp)
+{
+	void *curr;
+
+	xa_lock_bh(xa);
+	curr = __xa_store(xa, index, entry, gfp);
+	xa_unlock_bh(xa);
+
+	return curr;
+}
+
+/**
+ * xa_store_irq() - Erase this entry from the XArray.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @entry: New entry.
+ * @gfp: Memory allocation flags.
+ *
+ * This function is like calling xa_store() except it disables interrupts
+ * while holding the array lock.
+ *
+ * Context: Process context.  Takes and releases the xa_lock while
+ * disabling interrupts.
+ * Return: The entry which used to be at this index.
+ */
+static inline void *xa_store_irq(struct xarray *xa, unsigned long index,
+		void *entry, gfp_t gfp)
+{
+	void *curr;
+
+	xa_lock_irq(xa);
+	curr = __xa_store(xa, index, entry, gfp);
+	xa_unlock_irq(xa);
+
+	return curr;
+}
+
 /**
  * xa_erase_bh() - Erase this entry from the XArray.
  * @xa: XArray.
-- 
GitLab


From 611f318637daa5710a1d7a0e7dc6cda23914094a Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Mon, 5 Nov 2018 15:56:17 -0500
Subject: [PATCH 0634/1890] XArray: Unify xa_store and __xa_store

Saves around 115 bytes on a tinyconfig build and reduces the amount
of code duplication in the XArray implementation.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 lib/xarray.c | 58 ++++++++++++++++++++++------------------------------
 1 file changed, 25 insertions(+), 33 deletions(-)

diff --git a/lib/xarray.c b/lib/xarray.c
index b55aa8c1c20fd..a9d28013f9dc1 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -1361,23 +1361,21 @@ void *xa_erase(struct xarray *xa, unsigned long index)
 EXPORT_SYMBOL(xa_erase);
 
 /**
- * xa_store() - Store this entry in the XArray.
+ * __xa_store() - Store this entry in the XArray.
  * @xa: XArray.
  * @index: Index into array.
  * @entry: New entry.
  * @gfp: Memory allocation flags.
  *
- * After this function returns, loads from this index will return @entry.
- * Storing into an existing multislot entry updates the entry of every index.
- * The marks associated with @index are unaffected unless @entry is %NULL.
+ * You must already be holding the xa_lock when calling this function.
+ * It will drop the lock if needed to allocate memory, and then reacquire
+ * it afterwards.
  *
- * Context: Process context.  Takes and releases the xa_lock.  May sleep
- * if the @gfp flags permit.
- * Return: The old entry at this index on success, xa_err(-EINVAL) if @entry
- * cannot be stored in an XArray, or xa_err(-ENOMEM) if memory allocation
- * failed.
+ * Context: Any context.  Expects xa_lock to be held on entry.  May
+ * release and reacquire xa_lock if @gfp flags permit.
+ * Return: The old entry at this index or xa_err() if an error happened.
  */
-void *xa_store(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp)
+void *__xa_store(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp)
 {
 	XA_STATE(xas, xa, index);
 	void *curr;
@@ -1386,49 +1384,43 @@ void *xa_store(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp)
 		return XA_ERROR(-EINVAL);
 
 	do {
-		xas_lock(&xas);
 		curr = xas_store(&xas, entry);
 		if (xa_track_free(xa) && entry)
 			xas_clear_mark(&xas, XA_FREE_MARK);
-		xas_unlock(&xas);
-	} while (xas_nomem(&xas, gfp));
+	} while (__xas_nomem(&xas, gfp));
 
 	return xas_result(&xas, curr);
 }
-EXPORT_SYMBOL(xa_store);
+EXPORT_SYMBOL(__xa_store);
 
 /**
- * __xa_store() - Store this entry in the XArray.
+ * xa_store() - Store this entry in the XArray.
  * @xa: XArray.
  * @index: Index into array.
  * @entry: New entry.
  * @gfp: Memory allocation flags.
  *
- * You must already be holding the xa_lock when calling this function.
- * It will drop the lock if needed to allocate memory, and then reacquire
- * it afterwards.
+ * After this function returns, loads from this index will return @entry.
+ * Storing into an existing multislot entry updates the entry of every index.
+ * The marks associated with @index are unaffected unless @entry is %NULL.
  *
- * Context: Any context.  Expects xa_lock to be held on entry.  May
- * release and reacquire xa_lock if @gfp flags permit.
- * Return: The old entry at this index or xa_err() if an error happened.
+ * Context: Any context.  Takes and releases the xa_lock.
+ * May sleep if the @gfp flags permit.
+ * Return: The old entry at this index on success, xa_err(-EINVAL) if @entry
+ * cannot be stored in an XArray, or xa_err(-ENOMEM) if memory allocation
+ * failed.
  */
-void *__xa_store(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp)
+void *xa_store(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp)
 {
-	XA_STATE(xas, xa, index);
 	void *curr;
 
-	if (WARN_ON_ONCE(xa_is_internal(entry)))
-		return XA_ERROR(-EINVAL);
-
-	do {
-		curr = xas_store(&xas, entry);
-		if (xa_track_free(xa) && entry)
-			xas_clear_mark(&xas, XA_FREE_MARK);
-	} while (__xas_nomem(&xas, gfp));
+	xa_lock(xa);
+	curr = __xa_store(xa, index, entry, gfp);
+	xa_unlock(xa);
 
-	return xas_result(&xas, curr);
+	return curr;
 }
-EXPORT_SYMBOL(__xa_store);
+EXPORT_SYMBOL(xa_store);
 
 /**
  * __xa_cmpxchg() - Store this entry in the XArray.
-- 
GitLab


From d9c480435add8257f9069941f0e6196647f6d746 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Mon, 5 Nov 2018 16:15:56 -0500
Subject: [PATCH 0635/1890] XArray: Handle NULL pointers differently for
 allocation

For allocating XArrays, it makes sense to distinguish beteen erasing an
entry and storing NULL.  Storing NULL keeps the index allocated with a
NULL pointer associated with it while xa_erase() frees the index.  Some
existing IDR users rely on this ability.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 Documentation/core-api/xarray.rst | 28 +++++++++++++++++++---------
 lib/xarray.c                      | 13 ++++++++++---
 2 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/Documentation/core-api/xarray.rst b/Documentation/core-api/xarray.rst
index 8a6e2087de777..616ac406bf865 100644
--- a/Documentation/core-api/xarray.rst
+++ b/Documentation/core-api/xarray.rst
@@ -119,18 +119,27 @@ Finally, you can remove all entries from an XArray by calling
 to free the entries first.  You can do this by iterating over all present
 entries in the XArray using the :c:func:`xa_for_each` iterator.
 
-ID assignment
--------------
+Allocating XArrays
+------------------
+
+If you use :c:func:`DEFINE_XARRAY_ALLOC` to define the XArray, or
+initialise it by passing ``XA_FLAGS_ALLOC`` to :c:func:`xa_init_flags`,
+the XArray changes to track whether entries are in use or not.
 
 You can call :c:func:`xa_alloc` to store the entry at any unused index
 in the XArray.  If you need to modify the array from interrupt context,
 you can use :c:func:`xa_alloc_bh` or :c:func:`xa_alloc_irq` to disable
-interrupts while allocating the ID.  Unlike :c:func:`xa_store`, allocating
-a ``NULL`` pointer does not delete an entry.  Instead it reserves an
-entry like :c:func:`xa_reserve` and you can release it using either
-:c:func:`xa_erase` or :c:func:`xa_release`.  To use ID assignment, the
-XArray must be defined with :c:func:`DEFINE_XARRAY_ALLOC`, or initialised
-by passing ``XA_FLAGS_ALLOC`` to :c:func:`xa_init_flags`,
+interrupts while allocating the ID.
+
+Using :c:func:`xa_store`, :c:func:`xa_cmpxchg` or :c:func:`xa_insert`
+will mark the entry as being allocated.  Unlike a normal XArray, storing
+``NULL`` will mark the entry as being in use, like :c:func:`xa_reserve`.
+To free an entry, use :c:func:`xa_erase` (or :c:func:`xa_release` if
+you only want to free the entry if it's ``NULL``).
+
+You cannot use ``XA_MARK_0`` with an allocating XArray as this mark
+is used to track whether an entry is free or not.  The other marks are
+available for your use.
 
 Memory allocation
 -----------------
@@ -338,7 +347,8 @@ to :c:func:`xas_retry`, and retry the operation if it returns ``true``.
      - :c:func:`xa_is_zero`
      - Zero entries appear as ``NULL`` through the Normal API, but occupy
        an entry in the XArray which can be used to reserve the index for
-       future use.
+       future use.  This is used by allocating XArrays for allocated entries
+       which are ``NULL``.
 
 Other internal entries may be added in the future.  As far as possible, they
 will be handled by :c:func:`xas_retry`.
diff --git a/lib/xarray.c b/lib/xarray.c
index a9d28013f9dc1..c3e2084aa3139 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -1382,10 +1382,12 @@ void *__xa_store(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp)
 
 	if (WARN_ON_ONCE(xa_is_internal(entry)))
 		return XA_ERROR(-EINVAL);
+	if (xa_track_free(xa) && !entry)
+		entry = XA_ZERO_ENTRY;
 
 	do {
 		curr = xas_store(&xas, entry);
-		if (xa_track_free(xa) && entry)
+		if (xa_track_free(xa))
 			xas_clear_mark(&xas, XA_FREE_MARK);
 	} while (__xas_nomem(&xas, gfp));
 
@@ -1446,6 +1448,8 @@ void *__xa_cmpxchg(struct xarray *xa, unsigned long index,
 
 	if (WARN_ON_ONCE(xa_is_internal(entry)))
 		return XA_ERROR(-EINVAL);
+	if (xa_track_free(xa) && !entry)
+		entry = XA_ZERO_ENTRY;
 
 	do {
 		curr = xas_load(&xas);
@@ -1453,7 +1457,7 @@ void *__xa_cmpxchg(struct xarray *xa, unsigned long index,
 			curr = NULL;
 		if (curr == old) {
 			xas_store(&xas, entry);
-			if (xa_track_free(xa) && entry)
+			if (xa_track_free(xa))
 				xas_clear_mark(&xas, XA_FREE_MARK);
 		}
 	} while (__xas_nomem(&xas, gfp));
@@ -1487,8 +1491,11 @@ int __xa_reserve(struct xarray *xa, unsigned long index, gfp_t gfp)
 
 	do {
 		curr = xas_load(&xas);
-		if (!curr)
+		if (!curr) {
 			xas_store(&xas, XA_ZERO_ENTRY);
+			if (xa_track_free(xa))
+				xas_clear_mark(&xas, XA_FREE_MARK);
+		}
 	} while (__xas_nomem(&xas, gfp));
 
 	return xas_error(&xas);
-- 
GitLab


From 804dfaf01bcc9daa4298c608ba9018abf616ec48 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Mon, 5 Nov 2018 16:37:15 -0500
Subject: [PATCH 0636/1890] XArray: Fix Documentation

Minor fixes.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 Documentation/core-api/xarray.rst |  6 +++++-
 include/linux/xarray.h            |  4 ++--
 lib/xarray.c                      | 10 +++++-----
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/Documentation/core-api/xarray.rst b/Documentation/core-api/xarray.rst
index 616ac406bf865..dbe96cb5558ef 100644
--- a/Documentation/core-api/xarray.rst
+++ b/Documentation/core-api/xarray.rst
@@ -74,7 +74,8 @@ using :c:func:`xa_load`.  xa_store will overwrite any entry with the
 new entry and return the previous entry stored at that index.  You can
 use :c:func:`xa_erase` instead of calling :c:func:`xa_store` with a
 ``NULL`` entry.  There is no difference between an entry that has never
-been stored to and one that has most recently had ``NULL`` stored to it.
+been stored to, one that has been erased and one that has most recently
+had ``NULL`` stored to it.
 
 You can conditionally replace an entry at an index by using
 :c:func:`xa_cmpxchg`.  Like :c:func:`cmpxchg`, it will only succeed if
@@ -114,6 +115,9 @@ unused entry.  If another user has stored to the entry in the meantime,
 :c:func:`xa_release` will do nothing; if instead you want the entry to
 become ``NULL``, you should use :c:func:`xa_erase`.
 
+If all entries in the array are ``NULL``, the :c:func:`xa_empty` function
+will return ``true``.
+
 Finally, you can remove all entries from an XArray by calling
 :c:func:`xa_destroy`.  If the XArray entries are pointers, you may wish
 to free the entries first.  You can do this by iterating over all present
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 52d9732e4ec41..564892e19f8ca 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -487,7 +487,7 @@ static inline void *xa_store_irq(struct xarray *xa, unsigned long index,
  * the third argument.  The XArray does not need to allocate memory, so
  * the user does not need to provide GFP flags.
  *
- * Context: Process context.  Takes and releases the xa_lock while
+ * Context: Any context.  Takes and releases the xa_lock while
  * disabling softirqs.
  * Return: The entry which used to be at this index.
  */
@@ -622,7 +622,7 @@ static inline int xa_alloc(struct xarray *xa, u32 *id, u32 max, void *entry,
  * Updates the @id pointer with the index, then stores the entry at that
  * index.  A concurrent lookup will not see an uninitialised @id.
  *
- * Context: Process context.  Takes and releases the xa_lock while
+ * Context: Any context.  Takes and releases the xa_lock while
  * disabling softirqs.  May sleep if the @gfp flags permit.
  * Return: 0 on success, -ENOMEM if memory allocation fails or -ENOSPC if
  * there is no more space in the XArray.
diff --git a/lib/xarray.c b/lib/xarray.c
index c3e2084aa3139..7946380cd6c93 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -610,8 +610,8 @@ static int xas_expand(struct xa_state *xas, void *head)
  * (see the xa_cmpxchg() implementation for an example).
  *
  * Return: If the slot already existed, returns the contents of this slot.
- * If the slot was newly created, returns NULL.  If it failed to create the
- * slot, returns NULL and indicates the error in @xas.
+ * If the slot was newly created, returns %NULL.  If it failed to create the
+ * slot, returns %NULL and indicates the error in @xas.
  */
 static void *xas_create(struct xa_state *xas)
 {
@@ -1640,7 +1640,7 @@ EXPORT_SYMBOL(__xa_alloc);
  * @index: Index of entry.
  * @mark: Mark number.
  *
- * Attempting to set a mark on a NULL entry does not succeed.
+ * Attempting to set a mark on a %NULL entry does not succeed.
  *
  * Context: Any context.  Expects xa_lock to be held on entry.
  */
@@ -1710,7 +1710,7 @@ EXPORT_SYMBOL(xa_get_mark);
  * @index: Index of entry.
  * @mark: Mark number.
  *
- * Attempting to set a mark on a NULL entry does not succeed.
+ * Attempting to set a mark on a %NULL entry does not succeed.
  *
  * Context: Process context.  Takes and releases the xa_lock.
  */
@@ -1879,7 +1879,7 @@ static unsigned int xas_extract_marked(struct xa_state *xas, void **dst,
  *
  * The @filter may be an XArray mark value, in which case entries which are
  * marked with that mark will be copied.  It may also be %XA_PRESENT, in
- * which case all entries which are not NULL will be copied.
+ * which case all entries which are not %NULL will be copied.
  *
  * The entries returned may not represent a snapshot of the XArray at a
  * moment in time.  For example, if another thread stores to index 5, then
-- 
GitLab


From 0773495b1f5f1c5e23551843f87b5ff37e7af8f7 Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Sun, 4 Nov 2018 01:46:00 -0700
Subject: [PATCH 0637/1890] xtensa: make sure bFLT stack is 16 byte aligned

Xtensa ABI requires stack alignment to be at least 16. In noMMU
configuration ARCH_SLAB_MINALIGN is used to align stack. Make it at
least 16.

This fixes the following runtime error in noMMU configuration, caused by
interaction between insufficiently aligned stack and alloca function,
that results in corruption of on-stack variable in the libc function
glob:

 Caught unhandled exception in 'sh' (pid = 47, pc = 0x02d05d65)
  - should not happen
  EXCCAUSE is 15

Cc: stable@vger.kernel.org
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/include/asm/processor.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h
index e4ccb88b79963..677bc76c1d707 100644
--- a/arch/xtensa/include/asm/processor.h
+++ b/arch/xtensa/include/asm/processor.h
@@ -23,7 +23,11 @@
 # error Linux requires the Xtensa Windowed Registers Option.
 #endif
 
-#define ARCH_SLAB_MINALIGN	XCHAL_DATA_WIDTH
+/* Xtensa ABI requires stack alignment to be at least 16 */
+
+#define STACK_ALIGN (XCHAL_DATA_WIDTH > 16 ? XCHAL_DATA_WIDTH : 16)
+
+#define ARCH_SLAB_MINALIGN STACK_ALIGN
 
 /*
  * User space process size: 1 GB.
-- 
GitLab


From 91d7b67000c6e9bd605624079fee5a084238ad92 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Tue, 16 Oct 2018 09:13:46 +0200
Subject: [PATCH 0638/1890] mtd: spi-nor: cadence-quadspi: Return error code in
 cqspi_direct_read_execute()

We return 0 unconditionally in 'cqspi_direct_read_execute()'.
However, 'ret' is set to some error codes in several error handling
paths.

Return 'ret' instead to propagate the error code.

Fixes: ffa639e069fb ("mtd: spi-nor: cadence-quadspi: Add DMA support for direct mode reads")
Cc: <stable@vger.kernel.org>
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
---
 drivers/mtd/spi-nor/cadence-quadspi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/spi-nor/cadence-quadspi.c b/drivers/mtd/spi-nor/cadence-quadspi.c
index e24db817154ee..d846428ef038e 100644
--- a/drivers/mtd/spi-nor/cadence-quadspi.c
+++ b/drivers/mtd/spi-nor/cadence-quadspi.c
@@ -996,7 +996,7 @@ static int cqspi_direct_read_execute(struct spi_nor *nor, u_char *buf,
 err_unmap:
 	dma_unmap_single(nor->dev, dma_dst, len, DMA_FROM_DEVICE);
 
-	return 0;
+	return ret;
 }
 
 static ssize_t cqspi_read(struct spi_nor *nor, loff_t from,
-- 
GitLab


From 90c31cb9a8115a1183daed9a714eea4be0687931 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Fri, 19 Oct 2018 11:02:22 +0200
Subject: [PATCH 0639/1890] mtd: spi-nor: Reset nor->addr_width when SFDP
 parsing failed

Commit 5390a8df769e ("mtd: spi-nor: add support to non-uniform SFDP SPI
NOR flash memories") removed the 'nor->addr_width = 0;' statement when
spi_nor_parse_sfdp() returns an error, thus leaving ->addr_width in an
undefined state which can cause trouble when spi_nor_scan() checks its
value.

Reported-by: Cyrille Pitchen <cyrille.pitchen@wedev4u.fr>
Fixes: 5390a8df769e ("mtd: spi-nor: add support to non-uniform SFDP SPI NOR flash memories")
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Reviewed-by: Tudor Ambarus <tudor.ambarus@microchip.com>
---
 drivers/mtd/spi-nor/spi-nor.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 9407ca5f94433..3e54e31889c7b 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -3250,12 +3250,14 @@ static int spi_nor_init_params(struct spi_nor *nor,
 		memcpy(&sfdp_params, params, sizeof(sfdp_params));
 		memcpy(&prev_map, &nor->erase_map, sizeof(prev_map));
 
-		if (spi_nor_parse_sfdp(nor, &sfdp_params))
+		if (spi_nor_parse_sfdp(nor, &sfdp_params)) {
+			nor->addr_width = 0;
 			/* restore previous erase map */
 			memcpy(&nor->erase_map, &prev_map,
 			       sizeof(nor->erase_map));
-		else
+		} else {
 			memcpy(params, &sfdp_params, sizeof(*params));
+		}
 	}
 
 	return 0;
-- 
GitLab


From 5e1acb4afacc6229946c3d2b7ffc422b53fb2448 Mon Sep 17 00:00:00 2001
From: Alexey Kodanev <alexey.kodanev@oracle.com>
Date: Fri, 2 Nov 2018 19:11:04 +0300
Subject: [PATCH 0640/1890] rtnetlink: restore handling of dumpit return value
 in rtnl_dump_all()

For non-zero return from dumpit() we should break the loop
in rtnl_dump_all() and return the result. Otherwise, e.g.,
we could get the memory leak in inet6_dump_fib() [1]. The
pointer to the allocated struct fib6_walker there (saved
in cb->args) can be lost, reset on the next iteration.

Fix it by partially restoring the previous behavior before
commit c63586dc9b3e ("net: rtnl_dump_all needs to propagate
error from dumpit function"). The returned error from
dumpit() is still passed further.

[1]:
unreferenced object 0xffff88001322a200 (size 96):
  comm "sshd", pid 1484, jiffies 4296032768 (age 1432.542s)
  hex dump (first 32 bytes):
    00 01 00 00 00 00 ad de 00 02 00 00 00 00 ad de  ................
    18 09 41 36 00 88 ff ff 18 09 41 36 00 88 ff ff  ..A6......A6....
  backtrace:
    [<0000000095846b39>] kmem_cache_alloc_trace+0x151/0x220
    [<000000007d12709f>] inet6_dump_fib+0x68d/0x940
    [<000000002775a316>] rtnl_dump_all+0x1d9/0x2d0
    [<00000000d7cd302b>] netlink_dump+0x945/0x11a0
    [<000000002f43485f>] __netlink_dump_start+0x55d/0x800
    [<00000000f76bbeec>] rtnetlink_rcv_msg+0x4fa/0xa00
    [<000000009b5761f3>] netlink_rcv_skb+0x29c/0x420
    [<0000000087a1dae1>] rtnetlink_rcv+0x15/0x20
    [<00000000691b703b>] netlink_unicast+0x4e3/0x6c0
    [<00000000b5be0204>] netlink_sendmsg+0x7f2/0xba0
    [<0000000096d2aa60>] sock_sendmsg+0xba/0xf0
    [<000000008c1b786f>] __sys_sendto+0x1e4/0x330
    [<0000000019587b3f>] __x64_sys_sendto+0xe1/0x1a0
    [<00000000071f4d56>] do_syscall_64+0x9f/0x300
    [<000000002737577f>] entry_SYSCALL_64_after_hwframe+0x44/0xa9
    [<0000000057587684>] 0xffffffffffffffff

Fixes: c63586dc9b3e ("net: rtnl_dump_all needs to propagate error from dumpit function")
Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index e01274bd5e3e2..33d9227a8b807 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3367,7 +3367,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
 			cb->seq = 0;
 		}
 		ret = dumpit(skb, cb);
-		if (ret < 0)
+		if (ret)
 			break;
 	}
 	cb->family = idx;
-- 
GitLab


From e22d0bfa09a56e427a1a950ccb85621c86b343a4 Mon Sep 17 00:00:00 2001
From: Alexey Kodanev <alexey.kodanev@oracle.com>
Date: Fri, 2 Nov 2018 19:11:05 +0300
Subject: [PATCH 0641/1890] ipv6: properly check return value in
 inet6_dump_all()

Make sure we call fib6_dump_end() if it happens that skb->len
is zero. rtnl_dump_all() can reset cb->args on the next loop
iteration there.

Fixes: 08e814c9e8eb ("net/ipv6: Bail early if user only wants cloned entries")
Fixes: ae677bbb4441 ("net: Don't return invalid table id error when dumping all families")
Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_fib.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 1b8bc008b53b6..ae3786132c236 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -591,7 +591,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 
 	/* fib entries are never clones */
 	if (arg.filter.flags & RTM_F_CLONED)
-		return skb->len;
+		goto out;
 
 	w = (void *)cb->args[2];
 	if (!w) {
@@ -621,7 +621,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 		tb = fib6_get_table(net, arg.filter.table_id);
 		if (!tb) {
 			if (arg.filter.dump_all_families)
-				return skb->len;
+				goto out;
 
 			NL_SET_ERR_MSG_MOD(cb->extack, "FIB table does not exist");
 			return -ENOENT;
-- 
GitLab


From d016b4a3562b745fd9fa387a47d8de62ccd7e241 Mon Sep 17 00:00:00 2001
From: "Matwey V. Kornilov" <matwey@sai.msu.ru>
Date: Fri, 2 Nov 2018 21:19:36 +0300
Subject: [PATCH 0642/1890] net: core: netpoll: Enable netconsole IPv6 link
 local address

There is no reason to discard using source link local address when
remote netconsole IPv6 address is set to be link local one.

The patch allows administrators to use IPv6 netconsole without
explicitly configuring source address:

    netconsole=@/,@fe80::5054:ff:fe2f:6012/

Signed-off-by: Matwey V. Kornilov <matwey@sai.msu.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netpoll.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 5da9552b186bc..2b9fdbc43205f 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -717,7 +717,8 @@ int netpoll_setup(struct netpoll *np)
 
 				read_lock_bh(&idev->lock);
 				list_for_each_entry(ifp, &idev->addr_list, if_list) {
-					if (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)
+					if (!!(ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL) !=
+					    !!(ipv6_addr_type(&np->remote_ip.in6) & IPV6_ADDR_LINKLOCAL))
 						continue;
 					np->local_ip.in6 = ifp->addr;
 					err = 0;
-- 
GitLab


From c34c1287778b080ed692c0a46a8e345206cc29e6 Mon Sep 17 00:00:00 2001
From: Andrei Vagin <avagin@gmail.com>
Date: Sun, 4 Nov 2018 22:37:15 -0800
Subject: [PATCH 0643/1890] sock_diag: fix autoloading of the raw_diag module

IPPROTO_RAW isn't registred as an inet protocol, so
inet_protos[protocol] is always NULL for it.

Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Xin Long <lucien.xin@gmail.com>
Fixes: bf2ae2e4bf93 ("sock_diag: request _diag module only when the family or proto has been registered")
Signed-off-by: Andrei Vagin <avagin@gmail.com>
Reviewed-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/core/sock.c b/net/core/sock.c
index 6fcc4bc07d19b..080a880a1761b 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -3279,6 +3279,7 @@ int sock_load_diag_module(int family, int protocol)
 
 #ifdef CONFIG_INET
 	if (family == AF_INET &&
+	    protocol != IPPROTO_RAW &&
 	    !rcu_access_pointer(inet_protos[protocol]))
 		return -ENOENT;
 #endif
-- 
GitLab


From 97adaddaa6db7a8af81b9b11e30cbe3628cd6700 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Mon, 5 Nov 2018 22:31:41 +0900
Subject: [PATCH 0644/1890] net: bpfilter: fix iptables failure if bpfilter_umh
 is disabled

When iptables command is executed, ip_{set/get}sockopt() try to upload
bpfilter.ko if bpfilter is enabled. if it couldn't find bpfilter.ko,
command is failed.
bpfilter.ko is generated if CONFIG_BPFILTER_UMH is enabled.
ip_{set/get}sockopt() only checks CONFIG_BPFILTER.
So that if CONFIG_BPFILTER is enabled and CONFIG_BPFILTER_UMH is disabled,
iptables command is always failed.

test config:
   CONFIG_BPFILTER=y
   # CONFIG_BPFILTER_UMH is not set

test command:
   %iptables -L
   iptables: No chain/target/match by that name.

Fixes: d2ba09c17a06 ("net: add skeleton of bpfilter kernel module")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_sockglue.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 26c36cccabdc2..fffcc130900e5 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1246,7 +1246,7 @@ int ip_setsockopt(struct sock *sk, int level,
 		return -ENOPROTOOPT;
 
 	err = do_ip_setsockopt(sk, level, optname, optval, optlen);
-#ifdef CONFIG_BPFILTER
+#if IS_ENABLED(CONFIG_BPFILTER_UMH)
 	if (optname >= BPFILTER_IPT_SO_SET_REPLACE &&
 	    optname < BPFILTER_IPT_SET_MAX)
 		err = bpfilter_ip_set_sockopt(sk, optname, optval, optlen);
@@ -1559,7 +1559,7 @@ int ip_getsockopt(struct sock *sk, int level,
 	int err;
 
 	err = do_ip_getsockopt(sk, level, optname, optval, optlen, 0);
-#ifdef CONFIG_BPFILTER
+#if IS_ENABLED(CONFIG_BPFILTER_UMH)
 	if (optname >= BPFILTER_IPT_SO_GET_INFO &&
 	    optname < BPFILTER_IPT_GET_MAX)
 		err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen);
@@ -1596,7 +1596,7 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname,
 	err = do_ip_getsockopt(sk, level, optname, optval, optlen,
 		MSG_CMSG_COMPAT);
 
-#ifdef CONFIG_BPFILTER
+#if IS_ENABLED(CONFIG_BPFILTER_UMH)
 	if (optname >= BPFILTER_IPT_SO_GET_INFO &&
 	    optname < BPFILTER_IPT_GET_MAX)
 		err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen);
-- 
GitLab


From 7131193157414ac3167d7b2f2feb4c42b415d6c5 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Mon, 5 Nov 2018 18:52:21 +0100
Subject: [PATCH 0645/1890] net: alx: make alx_drv_name static

alx_drv_name is not used outside main.c, so there's no reason for it to
have external linkage.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/atheros/alx/alx.h  | 1 -
 drivers/net/ethernet/atheros/alx/main.c | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/atheros/alx/alx.h b/drivers/net/ethernet/atheros/alx/alx.h
index 78c5de467426f..9d0e74f6b089d 100644
--- a/drivers/net/ethernet/atheros/alx/alx.h
+++ b/drivers/net/ethernet/atheros/alx/alx.h
@@ -140,6 +140,5 @@ struct alx_priv {
 };
 
 extern const struct ethtool_ops alx_ethtool_ops;
-extern const char alx_drv_name[];
 
 #endif
diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index 7968c644ad861..c131cfc1b79df 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -49,7 +49,7 @@
 #include "hw.h"
 #include "reg.h"
 
-const char alx_drv_name[] = "alx";
+static const char alx_drv_name[] = "alx";
 
 static void alx_free_txbuf(struct alx_tx_queue *txq, int entry)
 {
-- 
GitLab


From 5841734fa6f997e57e8c29cbb6409bd74a6949e8 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Mon, 5 Nov 2018 16:44:59 -0800
Subject: [PATCH 0646/1890] scsi: target/core: Avoid that a kernel oops is
 triggered when COMPARE AND WRITE fails

Fixes: aa73237dcb2d ("scsi: target/core: Always call transport_complete_callback() upon failure")
Reviewed-by: David Disseldorp <ddiss@suse.de>
Cc: Nicholas Bellinger <nab@linux-iscsi.org>
Cc: Mike Christie <mchristi@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.de>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/target/target_core_transport.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index e31e4fc31aa15..2cfd61d62e973 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -1778,7 +1778,7 @@ EXPORT_SYMBOL(target_submit_tmr);
 void transport_generic_request_failure(struct se_cmd *cmd,
 		sense_reason_t sense_reason)
 {
-	int ret = 0;
+	int ret = 0, post_ret;
 
 	pr_debug("-----[ Storage Engine Exception; sense_reason %d\n",
 		 sense_reason);
@@ -1790,7 +1790,7 @@ void transport_generic_request_failure(struct se_cmd *cmd,
 	transport_complete_task_attr(cmd);
 
 	if (cmd->transport_complete_callback)
-		cmd->transport_complete_callback(cmd, false, NULL);
+		cmd->transport_complete_callback(cmd, false, &post_ret);
 
 	if (transport_check_aborted_status(cmd, 1))
 		return;
-- 
GitLab


From f8f4adc1c1661686f492cf27817844a3d0517aff Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 2 Nov 2018 16:34:49 +0100
Subject: [PATCH 0647/1890] scsi: myrb: fix sprintf buffer overflow warning

gcc warns that the 12 byte fw_version field might not be long enough to
contain the generated firmware name string:

drivers/scsi/myrb.c: In function 'myrb_get_hba_config':
drivers/scsi/myrb.c:1052:38: error: '%02d' directive writing between 2 and 3 bytes into a region of size between 2 and 5 [-Werror=format-overflow=]
  sprintf(cb->fw_version, "%d.%02d-%c-%02d",
                                      ^~~~
drivers/scsi/myrb.c:1052:26: note: directive argument in the range [0, 255]
  sprintf(cb->fw_version, "%d.%02d-%c-%02d",
                          ^~~~~~~~~~~~~~~~~
drivers/scsi/myrb.c:1052:2: note: 'sprintf' output between 10 and 14 bytes into a destination of size 12
  sprintf(cb->fw_version, "%d.%02d-%c-%02d",
  ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   enquiry2->fw.major_version,
   ~~~~~~~~~~~~~~~~~~~~~~~~~~~
   enquiry2->fw.minor_version,
   ~~~~~~~~~~~~~~~~~~~~~~~~~~~
   enquiry2->fw.firmware_type,
   ~~~~~~~~~~~~~~~~~~~~~~~~~~~
   enquiry2->fw.turn_id);
   ~~~~~~~~~~~~~~~~~~~~~

I have not checked whether there are appropriate range checks before the
sprintf, but there is a range check after it that will bail out in case
of out of range version numbers. This means we can simply use snprintf()
instead of sprintf() to limit the output buffer size, and it will work
correctly.

Fixes: 081ff398c56c ("scsi: myrb: Add Mylex RAID controller (block interface)")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/myrb.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/myrb.c b/drivers/scsi/myrb.c
index aeb282f617c5c..0642f2d0a3bb6 100644
--- a/drivers/scsi/myrb.c
+++ b/drivers/scsi/myrb.c
@@ -1049,7 +1049,8 @@ static int myrb_get_hba_config(struct myrb_hba *cb)
 		enquiry2->fw.firmware_type = '0';
 		enquiry2->fw.turn_id = 0;
 	}
-	sprintf(cb->fw_version, "%d.%02d-%c-%02d",
+	snprintf(cb->fw_version, sizeof(cb->fw_version),
+		"%d.%02d-%c-%02d",
 		enquiry2->fw.major_version,
 		enquiry2->fw.minor_version,
 		enquiry2->fw.firmware_type,
-- 
GitLab


From f8d294324598ec85bea2779512e48c94cbe4d7c6 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 2 Nov 2018 16:35:48 +0100
Subject: [PATCH 0648/1890] scsi: lpfc: fix remoteport access

The addition of a spinlock in lpfc_debugfs_nodelist_data() introduced
a bug that lets us not skip NULL pointers correctly, as noticed by
gcc-8:

drivers/scsi/lpfc/lpfc_debugfs.c: In function 'lpfc_debugfs_nodelist_data.constprop':
drivers/scsi/lpfc/lpfc_debugfs.c:728:13: error: 'nrport' may be used uninitialized in this function [-Werror=maybe-uninitialized]
   if (nrport->port_role & FC_PORT_ROLE_NVME_INITIATOR)

This changes the logic back to what it was, while keeping the added
spinlock.

Fixes: 9e210178267b ("scsi: lpfc: Synchronize access to remoteport via rport")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_debugfs.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index 0c8005bb0f53f..34d311a7dbef1 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -698,6 +698,8 @@ lpfc_debugfs_nodelist_data(struct lpfc_vport *vport, char *buf, int size)
 		rport = lpfc_ndlp_get_nrport(ndlp);
 		if (rport)
 			nrport = rport->remoteport;
+		else
+			nrport = NULL;
 		spin_unlock(&phba->hbalock);
 		if (!nrport)
 			continue;
-- 
GitLab


From 77409c4cdc44560e1b3b839e62d7f73478199680 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 2 Nov 2018 16:44:13 +0100
Subject: [PATCH 0649/1890] scsi: myrs: avoid stack overflow warning

Putting a 1024 byte data structure on the stack is generally a bad idea.
On 32-bit systems, it also triggers a compile-time warning when building
with -Og:

drivers/scsi/myrs.c: In function 'myrs_get_ctlr_info':
drivers/scsi/myrs.c:212:1: error: the frame size of 1028 bytes is larger than 1024 bytes [-Werror=frame-larger-than=]

We only really need three members of the structure, so just read them
manually here instead of copying the entire structure.

Fixes: 77266186397c ("scsi: myrs: Add Mylex RAID controller (SCSI interface)")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/myrs.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/myrs.c b/drivers/scsi/myrs.c
index 0264a2e2bc190..b8d54ef8cf6df 100644
--- a/drivers/scsi/myrs.c
+++ b/drivers/scsi/myrs.c
@@ -163,9 +163,12 @@ static unsigned char myrs_get_ctlr_info(struct myrs_hba *cs)
 	dma_addr_t ctlr_info_addr;
 	union myrs_sgl *sgl;
 	unsigned char status;
-	struct myrs_ctlr_info old;
+	unsigned short ldev_present, ldev_critical, ldev_offline;
+
+	ldev_present = cs->ctlr_info->ldev_present;
+	ldev_critical = cs->ctlr_info->ldev_critical;
+	ldev_offline = cs->ctlr_info->ldev_offline;
 
-	memcpy(&old, cs->ctlr_info, sizeof(struct myrs_ctlr_info));
 	ctlr_info_addr = dma_map_single(&cs->pdev->dev, cs->ctlr_info,
 					sizeof(struct myrs_ctlr_info),
 					DMA_FROM_DEVICE);
@@ -198,9 +201,9 @@ static unsigned char myrs_get_ctlr_info(struct myrs_hba *cs)
 		    cs->ctlr_info->rbld_active +
 		    cs->ctlr_info->exp_active != 0)
 			cs->needs_update = true;
-		if (cs->ctlr_info->ldev_present != old.ldev_present ||
-		    cs->ctlr_info->ldev_critical != old.ldev_critical ||
-		    cs->ctlr_info->ldev_offline != old.ldev_offline)
+		if (cs->ctlr_info->ldev_present != ldev_present ||
+		    cs->ctlr_info->ldev_critical != ldev_critical ||
+		    cs->ctlr_info->ldev_offline != ldev_offline)
 			shost_printk(KERN_INFO, cs->host,
 				     "Logical drive count changes (%d/%d/%d)\n",
 				     cs->ctlr_info->ldev_critical,
-- 
GitLab


From a3ecf48248a393438e77e569a0047e968e0ec6c6 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 2 Nov 2018 16:47:23 +0100
Subject: [PATCH 0650/1890] scsi: myrs: only build on little-endian platforms

Reading throught the new driver, I noticed that this cannot work on
big-endian CPUs, and the old DAC960 had exactly the same behavior.

To document this for the future, add a Kconfig dependency that prevents it
from being included in big-endian kernels.  Since the hardware is really
old and we never had a working driver on it for big-endian platforms,
it's unlikely to make a difference to users.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index f07444d30b216..640cd1b31a18d 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -578,6 +578,7 @@ config SCSI_MYRB
 config SCSI_MYRS
 	tristate "Mylex DAC960/DAC1100 PCI RAID Controller (SCSI Interface)"
 	depends on PCI
+	depends on !CPU_BIG_ENDIAN || COMPILE_TEST
 	select RAID_ATTRS
 	help
 	  This driver adds support for the Mylex DAC960, AcceleRAID, and
-- 
GitLab


From e34ff8edcae89922d187425ab0b82e6a039aa371 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Fri, 26 Oct 2018 15:07:31 +0800
Subject: [PATCH 0651/1890] scsi: hisi_sas: Remove set but not used variable
 'dq_list'

Fixes gcc '-Wunused-but-set-variable' warning:

drivers/scsi/hisi_sas/hisi_sas_v1_hw.c: In function 'start_delivery_v1_hw':
drivers/scsi/hisi_sas/hisi_sas_v1_hw.c:907:20: warning:
 variable 'dq_list' set but not used [-Wunused-but-set-variable]

drivers/scsi/hisi_sas/hisi_sas_v2_hw.c: In function 'start_delivery_v2_hw':
drivers/scsi/hisi_sas/hisi_sas_v2_hw.c:1671:20: warning:
 variable 'dq_list' set but not used [-Wunused-but-set-variable]

drivers/scsi/hisi_sas/hisi_sas_v3_hw.c: In function 'start_delivery_v3_hw':
drivers/scsi/hisi_sas/hisi_sas_v3_hw.c:889:20: warning:
 variable 'dq_list' set but not used [-Wunused-but-set-variable]

It never used since introduction in commit
fa222db0b036 ("scsi: hisi_sas: Don't lock DQ for complete task sending")

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Acked-by: John Garry <john.garry@huawei.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/hisi_sas/hisi_sas_v1_hw.c | 2 --
 drivers/scsi/hisi_sas/hisi_sas_v2_hw.c | 2 --
 drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 2 --
 3 files changed, 6 deletions(-)

diff --git a/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c
index f0e457e6884e5..8df822a4a1bd6 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c
@@ -904,11 +904,9 @@ static void start_delivery_v1_hw(struct hisi_sas_dq *dq)
 {
 	struct hisi_hba *hisi_hba = dq->hisi_hba;
 	struct hisi_sas_slot *s, *s1, *s2 = NULL;
-	struct list_head *dq_list;
 	int dlvry_queue = dq->id;
 	int wp;
 
-	dq_list = &dq->list;
 	list_for_each_entry_safe(s, s1, &dq->list, delivery) {
 		if (!s->ready)
 			break;
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c
index cc36b6473e986..77a85ead483e0 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c
@@ -1670,11 +1670,9 @@ static void start_delivery_v2_hw(struct hisi_sas_dq *dq)
 {
 	struct hisi_hba *hisi_hba = dq->hisi_hba;
 	struct hisi_sas_slot *s, *s1, *s2 = NULL;
-	struct list_head *dq_list;
 	int dlvry_queue = dq->id;
 	int wp;
 
-	dq_list = &dq->list;
 	list_for_each_entry_safe(s, s1, &dq->list, delivery) {
 		if (!s->ready)
 			break;
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
index bd4ce38b98d22..a369450a1fa7b 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
@@ -886,11 +886,9 @@ static void start_delivery_v3_hw(struct hisi_sas_dq *dq)
 {
 	struct hisi_hba *hisi_hba = dq->hisi_hba;
 	struct hisi_sas_slot *s, *s1, *s2 = NULL;
-	struct list_head *dq_list;
 	int dlvry_queue = dq->id;
 	int wp;
 
-	dq_list = &dq->list;
 	list_for_each_entry_safe(s, s1, &dq->list, delivery) {
 		if (!s->ready)
 			break;
-- 
GitLab


From 0d52e642c0ccd7a877a58b6ec23552eb35e7170c Mon Sep 17 00:00:00 2001
From: Masanari Iida <standby24x7@gmail.com>
Date: Sun, 28 Oct 2018 14:05:48 +0900
Subject: [PATCH 0652/1890] scsi: qla2xxx: Fix a typo in MODULE_PARM_DESC

This patch fixes a spelling typo in MODULE_PARM_DESC of
ql2xplogiabsentdevice.

Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qla2xxx/qla_os.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 518f15141170e..20c85eed1a750 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -67,7 +67,7 @@ module_param(ql2xplogiabsentdevice, int, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(ql2xplogiabsentdevice,
 		"Option to enable PLOGI to devices that are not present after "
 		"a Fabric scan.  This is needed for several broken switches. "
-		"Default is 0 - no PLOGI. 1 - perfom PLOGI.");
+		"Default is 0 - no PLOGI. 1 - perform PLOGI.");
 
 int ql2xloginretrycount = 0;
 module_param(ql2xloginretrycount, int, S_IRUGO);
-- 
GitLab


From 96edebd6bb995f2acb7694bed6e01bf6f5a7b634 Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Wed, 24 Oct 2018 18:45:33 +1100
Subject: [PATCH 0653/1890] scsi: NCR5380: Return false instead of NULL

I overlooked this statement when I recently converted the function result
type from struct scsi_cmnd * to bool. No change to object code.

Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/NCR5380.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/NCR5380.c b/drivers/scsi/NCR5380.c
index 8429c855701fc..01c23d27f290b 100644
--- a/drivers/scsi/NCR5380.c
+++ b/drivers/scsi/NCR5380.c
@@ -1198,7 +1198,7 @@ static bool NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd)
 
 out:
 	if (!hostdata->selecting)
-		return NULL;
+		return false;
 	hostdata->selecting = NULL;
 	return ret;
 }
-- 
GitLab


From 08fd9a82fda86529bb2f2af3c2f7cb657b4d3066 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 27 Aug 2018 18:30:16 +0300
Subject: [PATCH 0654/1890] usb: dwc3: core: Clean up ULPI device

If dwc3_core_init_mode() fails with deferred probe,
next probe fails on sysfs with

sysfs: cannot create duplicate filename '/devices/pci0000:00/0000:00:11.0/dwc3.0.auto/dwc3.0.auto.ulpi'

To avoid this failure, clean up ULPI device.

Cc: <stable@vger.kernel.org>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/dwc3/core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index becfbb87f791d..2f2048aa5fde1 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -1499,6 +1499,7 @@ static int dwc3_probe(struct platform_device *pdev)
 
 err5:
 	dwc3_event_buffers_cleanup(dwc);
+	dwc3_ulpi_exit(dwc);
 
 err4:
 	dwc3_free_scratch_buffers(dwc);
-- 
GitLab


From ba3a51ac32ebcf8d0a54b37f1af268ad8a31c52f Mon Sep 17 00:00:00 2001
From: Thinh Nguyen <Thinh.Nguyen@synopsys.com>
Date: Thu, 2 Aug 2018 20:17:16 -0700
Subject: [PATCH 0655/1890] usb: dwc3: gadget: Properly check last
 unaligned/zero chain TRB

Current check for the last extra TRB for zero and unaligned transfers
does not account for isoc OUT. The last TRB of the Buffer Descriptor for
isoc OUT transfers will be retired with HWO=0. As a result, we won't
return early. The req->remaining will be updated to include the BUFSIZ
count of the extra TRB, and the actual number of transferred bytes
calculation will be wrong.

To fix this, check whether it's a short or zero packet and the last TRB
chain bit to return early.

Fixes: c6267a51639b ("usb: dwc3: gadget: align transfers to wMaxPacketSize")
Cc: <stable@vger.kernel.org>
Signed-off-by: Thinh Nguyen <thinhn@synopsys.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/dwc3/gadget.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 679c12e145225..06e22afdf3d14 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -2259,7 +2259,7 @@ static int dwc3_gadget_ep_reclaim_completed_trb(struct dwc3_ep *dep,
 	 * with one TRB pending in the ring. We need to manually clear HWO bit
 	 * from that TRB.
 	 */
-	if ((req->zero || req->unaligned) && (trb->ctrl & DWC3_TRB_CTRL_HWO)) {
+	if ((req->zero || req->unaligned) && !(trb->ctrl & DWC3_TRB_CTRL_CHN)) {
 		trb->ctrl &= ~DWC3_TRB_CTRL_HWO;
 		return 1;
 	}
-- 
GitLab


From 0ae790683fc28bb718d74f87cdf753c6445fe28d Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 6 Nov 2018 19:23:28 +1100
Subject: [PATCH 0656/1890] powerpc/mm/64s: Consolidate SLB assertions

The code for assert_slb_exists() and assert_slb_notexists() is almost
identical, except for the polarity of the WARN_ON(). In a future patch
we'll need to modify this code, so consolidate it now into a single
function.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/slb.c | 29 +++++++++--------------------
 1 file changed, 9 insertions(+), 20 deletions(-)

diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index c3fdf2969d9fa..f3e002ee457be 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -58,7 +58,7 @@ static inline unsigned long mk_vsid_data(unsigned long ea, int ssize,
 	return __mk_vsid_data(get_kernel_vsid(ea, ssize), ssize, flags);
 }
 
-static void assert_slb_exists(unsigned long ea)
+static void assert_slb_presence(bool present, unsigned long ea)
 {
 #ifdef CONFIG_DEBUG_VM
 	unsigned long tmp;
@@ -66,19 +66,8 @@ static void assert_slb_exists(unsigned long ea)
 	WARN_ON_ONCE(mfmsr() & MSR_EE);
 
 	asm volatile("slbfee. %0, %1" : "=r"(tmp) : "r"(ea) : "cr0");
-	WARN_ON(tmp == 0);
-#endif
-}
 
-static void assert_slb_notexists(unsigned long ea)
-{
-#ifdef CONFIG_DEBUG_VM
-	unsigned long tmp;
-
-	WARN_ON_ONCE(mfmsr() & MSR_EE);
-
-	asm volatile("slbfee. %0, %1" : "=r"(tmp) : "r"(ea) : "cr0");
-	WARN_ON(tmp != 0);
+	WARN_ON(present == (tmp == 0));
 #endif
 }
 
@@ -114,7 +103,7 @@ static inline void create_shadowed_slbe(unsigned long ea, int ssize,
 	 */
 	slb_shadow_update(ea, ssize, flags, index);
 
-	assert_slb_notexists(ea);
+	assert_slb_presence(false, ea);
 	asm volatile("slbmte  %0,%1" :
 		     : "r" (mk_vsid_data(ea, ssize, flags)),
 		       "r" (mk_esid_data(ea, ssize, index))
@@ -137,7 +126,7 @@ void __slb_restore_bolted_realmode(void)
 		       "r" (be64_to_cpu(p->save_area[index].esid)));
 	}
 
-	assert_slb_exists(local_paca->kstack);
+	assert_slb_presence(true, local_paca->kstack);
 }
 
 /*
@@ -185,7 +174,7 @@ void slb_flush_and_restore_bolted(void)
 		     :: "r" (be64_to_cpu(p->save_area[KSTACK_INDEX].vsid)),
 			"r" (be64_to_cpu(p->save_area[KSTACK_INDEX].esid))
 		     : "memory");
-	assert_slb_exists(get_paca()->kstack);
+	assert_slb_presence(true, get_paca()->kstack);
 
 	get_paca()->slb_cache_ptr = 0;
 
@@ -443,9 +432,9 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
 				ea = (unsigned long)
 					get_paca()->slb_cache[i] << SID_SHIFT;
 				/*
-				 * Could assert_slb_exists here, but hypervisor
-				 * or machine check could have come in and
-				 * removed the entry at this point.
+				 * Could assert_slb_presence(true) here, but
+				 * hypervisor or machine check could have come
+				 * in and removed the entry at this point.
 				 */
 
 				slbie_data = ea;
@@ -676,7 +665,7 @@ static long slb_insert_entry(unsigned long ea, unsigned long context,
 	 * User preloads should add isync afterwards in case the kernel
 	 * accesses user memory before it returns to userspace with rfid.
 	 */
-	assert_slb_notexists(ea);
+	assert_slb_presence(false, ea);
 	asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data));
 
 	barrier();
-- 
GitLab


From 08e6a3434e2125e4b21d0d3f84678d427345bc0d Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 6 Nov 2018 19:25:18 +1100
Subject: [PATCH 0657/1890] powerpc/mm/64s: Use PPC_SLBFEE macro

Old toolchains don't know about slbfee and break the build, eg:
  {standard input}:37: Error: Unrecognized opcode: `slbfee.'

Fix it by using the macro version. We need to add an underscore
version that takes raw register numbers from the inline asm, rather
than our Rx macros.

Fixes: e15a4fea4dee ("powerpc/64s/hash: Add some SLB debugging tests")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/ppc-opcode.h | 2 ++
 arch/powerpc/mm/slb.c                 | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 6093bc8f74e51..a6e9e314c7077 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -493,6 +493,8 @@
 					__PPC_RS(t) | __PPC_RA0(a) | __PPC_RB(b))
 #define PPC_SLBFEE_DOT(t, b)	stringify_in_c(.long PPC_INST_SLBFEE | \
 					__PPC_RT(t) | __PPC_RB(b))
+#define __PPC_SLBFEE_DOT(t, b)	stringify_in_c(.long PPC_INST_SLBFEE |	\
+					       ___PPC_RT(t) | ___PPC_RB(b))
 #define PPC_ICBT(c,a,b)		stringify_in_c(.long PPC_INST_ICBT | \
 				       __PPC_CT(c) | __PPC_RA0(a) | __PPC_RB(b))
 /* PASemi instructions */
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index f3e002ee457be..457fd29448b1b 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -19,6 +19,7 @@
 #include <asm/mmu.h>
 #include <asm/mmu_context.h>
 #include <asm/paca.h>
+#include <asm/ppc-opcode.h>
 #include <asm/cputable.h>
 #include <asm/cacheflush.h>
 #include <asm/smp.h>
@@ -65,7 +66,7 @@ static void assert_slb_presence(bool present, unsigned long ea)
 
 	WARN_ON_ONCE(mfmsr() & MSR_EE);
 
-	asm volatile("slbfee. %0, %1" : "=r"(tmp) : "r"(ea) : "cr0");
+	asm volatile(__PPC_SLBFEE_DOT(%0, %1) : "=r"(tmp) : "r"(ea) : "cr0");
 
 	WARN_ON(present == (tmp == 0));
 #endif
-- 
GitLab


From 9586d569a369dc585a3e191dcabd72748e3c9c5c Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 6 Nov 2018 19:25:38 +1100
Subject: [PATCH 0658/1890] powerpc/mm/64s: Only use slbfee on CPUs that
 support it

The slbfee instruction was only added in ISA 2.05 (Power6), it's not
supported on older CPUs. We don't have a CPU feature for that ISA
version though, so just use the ISA 2.06 feature flag.

Fixes: e15a4fea4dee ("powerpc/64s/hash: Add some SLB debugging tests")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/slb.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 457fd29448b1b..b663a36f9ada1 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -66,6 +66,9 @@ static void assert_slb_presence(bool present, unsigned long ea)
 
 	WARN_ON_ONCE(mfmsr() & MSR_EE);
 
+	if (!cpu_has_feature(CPU_FTR_ARCH_206))
+		return;
+
 	asm volatile(__PPC_SLBFEE_DOT(%0, %1) : "=r"(tmp) : "r"(ea) : "cr0");
 
 	WARN_ON(present == (tmp == 0));
-- 
GitLab


From d9cccfa7c4d1d9ef967ec9308df7304a18609b30 Mon Sep 17 00:00:00 2001
From: Liam Merwick <Liam.Merwick@oracle.com>
Date: Fri, 2 Nov 2018 14:04:23 +0000
Subject: [PATCH 0659/1890] xen/grant-table: Fix incorrect
 gnttab_dma_free_pages() pr_debug message

If a call to xenmem_reservation_increase() in gnttab_dma_free_pages()
fails it triggers a message "Failed to decrease reservation..." which
should be "Failed to increase reservation..."

Fixes: 9bdc7304f536 ('xen/grant-table: Allow allocating buffers suitable for DMA')
Reported-by: Ross Philipson <ross.philipson@oracle.com>
Signed-off-by: Liam Merwick <liam.merwick@oracle.com>
Reviewed-by: Mark Kanda <mark.kanda@oracle.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 drivers/xen/grant-table.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 84575baceebc8..97341fa75458e 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -914,7 +914,7 @@ int gnttab_dma_free_pages(struct gnttab_dma_alloc_args *args)
 
 	ret = xenmem_reservation_increase(args->nr_pages, args->frames);
 	if (ret != args->nr_pages) {
-		pr_debug("Failed to decrease reservation for DMA buffer\n");
+		pr_debug("Failed to increase reservation for DMA buffer\n");
 		ret = -EFAULT;
 	} else {
 		ret = 0;
-- 
GitLab


From 6cc4a0863c9709c512280c64e698d68443ac8053 Mon Sep 17 00:00:00 2001
From: Manjunath Patil <manjunath.b.patil@oracle.com>
Date: Tue, 30 Oct 2018 09:49:21 -0700
Subject: [PATCH 0660/1890] xen-blkfront: fix kernel panic with negotiate_mq
 error path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

info->nr_rings isn't adjusted in case of ENOMEM error from
negotiate_mq(). This leads to kernel panic in error path.

Typical call stack involving panic -
 #8 page_fault at ffffffff8175936f
    [exception RIP: blkif_free_ring+33]
    RIP: ffffffffa0149491  RSP: ffff8804f7673c08  RFLAGS: 00010292
 ...
 #9 blkif_free at ffffffffa0149aaa [xen_blkfront]
 #10 talk_to_blkback at ffffffffa014c8cd [xen_blkfront]
 #11 blkback_changed at ffffffffa014ea8b [xen_blkfront]
 #12 xenbus_otherend_changed at ffffffff81424670
 #13 backend_changed at ffffffff81426dc3
 #14 xenwatch_thread at ffffffff81422f29
 #15 kthread at ffffffff810abe6a
 #16 ret_from_fork at ffffffff81754078

Cc: stable@vger.kernel.org
Fixes: 7ed8ce1c5fc7 ("xen-blkfront: move negotiate_mq to cover all cases of new VBDs")
Signed-off-by: Manjunath Patil <manjunath.b.patil@oracle.com>
Acked-by: Roger Pau MonnÃ© <roger.pau@citrix.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 drivers/block/xen-blkfront.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 9eea83ae01c6f..9ac3999a55b7a 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1919,6 +1919,7 @@ static int negotiate_mq(struct blkfront_info *info)
 			      GFP_KERNEL);
 	if (!info->rinfo) {
 		xenbus_dev_fatal(info->xbdev, -ENOMEM, "allocating ring_info structure");
+		info->nr_rings = 0;
 		return -ENOMEM;
 	}
 
-- 
GitLab


From ba26cd7d58dcc50e25c8f02b5376c05046b181dc Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Mon, 5 Nov 2018 08:58:35 +0100
Subject: [PATCH 0661/1890] mtd: sa1100: avoid VLA in sa1100_setup_mtd

Enabling -Wvla found another variable-length array with randconfig
testing:

drivers/mtd/maps/sa1100-flash.c: In function 'sa1100_setup_mtd':
drivers/mtd/maps/sa1100-flash.c:224:10: error: ISO C90 forbids variable length array 'cdev' [-Werror=vla]

Dynamically allocate the cdev array passed to mtd_concat_create()
instead of using a VLA.

Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Olof Johansson <olof@lixom.net>
---
 drivers/mtd/maps/sa1100-flash.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/maps/sa1100-flash.c b/drivers/mtd/maps/sa1100-flash.c
index 784c6e1a0391e..fd5fe12d74613 100644
--- a/drivers/mtd/maps/sa1100-flash.c
+++ b/drivers/mtd/maps/sa1100-flash.c
@@ -221,7 +221,14 @@ static struct sa_info *sa1100_setup_mtd(struct platform_device *pdev,
 		info->mtd = info->subdev[0].mtd;
 		ret = 0;
 	} else if (info->num_subdev > 1) {
-		struct mtd_info *cdev[nr];
+		struct mtd_info **cdev;
+
+		cdev = kmalloc_array(nr, sizeof(*cdev), GFP_KERNEL);
+		if (!cdev) {
+			ret = -ENOMEM;
+			goto err;
+		}
+
 		/*
 		 * We detected multiple devices.  Concatenate them together.
 		 */
@@ -230,6 +237,7 @@ static struct sa_info *sa1100_setup_mtd(struct platform_device *pdev,
 
 		info->mtd = mtd_concat_create(cdev, info->num_subdev,
 					      plat->name);
+		kfree(cdev);
 		if (info->mtd == NULL) {
 			ret = -ENXIO;
 			goto err;
-- 
GitLab


From df18bfd35bbf7cb1a420b5beede1de29343793b3 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 2 Nov 2018 07:09:07 -0400
Subject: [PATCH 0662/1890] media: v4l: fix uapi mpeg slice params definition

We get a headers_check warning about the newly defined ioctl command
structures:

./usr/include/linux/v4l2-controls.h:1105: found __[us]{8,16,32,64} type without #include <linux/types.h>

This is resolved by including linux/types.h, as suggested by the
warning, but there is another problem: Three of the four structures
have an odd number of __u8 headers, but are aligned to 32 bit in the
v4l2_ctrl_mpeg2_slice_params, so we get an implicit padding byte
for each one. To solve that, let's add explicit padding that can
be set to zero and verified in the kernel.

Fixes: c27bb30e7b6d ("media: v4l: Add definitions for MPEG-2 slice format and metadata")

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/v4l2-core/v4l2-ctrls.c | 5 +++++
 include/uapi/linux/v4l2-controls.h   | 7 +++++--
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c
index 6e37950292cd9..5f2b033a7a42f 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls.c
@@ -1664,6 +1664,11 @@ static int std_validate(const struct v4l2_ctrl *ctrl, u32 idx,
 		    p_mpeg2_slice_params->forward_ref_index >= VIDEO_MAX_FRAME)
 			return -EINVAL;
 
+		if (p_mpeg2_slice_params->pad ||
+		    p_mpeg2_slice_params->picture.pad ||
+		    p_mpeg2_slice_params->sequence.pad)
+			return -EINVAL;
+
 		return 0;
 
 	case V4L2_CTRL_TYPE_MPEG2_QUANTIZATION:
diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index 86a54916206fa..998983a6e6b71 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -47,11 +47,11 @@
  *  videodev2.h.
  */
 
-#include <linux/types.h>
-
 #ifndef __LINUX_V4L2_CONTROLS_H
 #define __LINUX_V4L2_CONTROLS_H
 
+#include <linux/types.h>
+
 /* Control classes */
 #define V4L2_CTRL_CLASS_USER		0x00980000	/* Old-style 'user' controls */
 #define V4L2_CTRL_CLASS_MPEG		0x00990000	/* MPEG-compression controls */
@@ -1112,6 +1112,7 @@ struct v4l2_mpeg2_sequence {
 	__u8	profile_and_level_indication;
 	__u8	progressive_sequence;
 	__u8	chroma_format;
+	__u8	pad;
 };
 
 struct v4l2_mpeg2_picture {
@@ -1130,6 +1131,7 @@ struct v4l2_mpeg2_picture {
 	__u8	alternate_scan;
 	__u8	repeat_first_field;
 	__u8	progressive_frame;
+	__u8	pad;
 };
 
 struct v4l2_ctrl_mpeg2_slice_params {
@@ -1144,6 +1146,7 @@ struct v4l2_ctrl_mpeg2_slice_params {
 
 	__u8	backward_ref_index;
 	__u8	forward_ref_index;
+	__u8	pad;
 };
 
 struct v4l2_ctrl_mpeg2_quantization {
-- 
GitLab


From 0ede1794b9667d9cd35d1ac27f2eeff52d5733d5 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Mon, 8 Oct 2018 18:11:28 -0400
Subject: [PATCH 0663/1890] media: tc358743: Remove unnecessary self assignment

Clang warns when a variable is assigned to itself.

drivers/media/i2c/tc358743.c:1921:7: warning: explicitly assigning value
of variable of type 'int' to itself [-Wself-assign]
                ret = ret;
                ~~~ ^ ~~~
1 warning generated.

Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Reviewed-by: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/i2c/tc358743.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/media/i2c/tc358743.c b/drivers/media/i2c/tc358743.c
index ca5d92942820a..41d470d9ca943 100644
--- a/drivers/media/i2c/tc358743.c
+++ b/drivers/media/i2c/tc358743.c
@@ -1918,7 +1918,6 @@ static int tc358743_probe_of(struct tc358743_state *state)
 	ret = v4l2_fwnode_endpoint_alloc_parse(of_fwnode_handle(ep), &endpoint);
 	if (ret) {
 		dev_err(dev, "failed to parse endpoint\n");
-		ret = ret;
 		goto put_node;
 	}
 
-- 
GitLab


From 7cf7b2e977abf3f992036939e35a8eab60013aff Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Wed, 10 Oct 2018 03:03:43 -0400
Subject: [PATCH 0664/1890] media: vicodec: lower minimum height to 360

Lower the minimum height to 360 to be consistent with the webcam input of vivid.

The 480 was rather arbitrary but it made it harder to use vivid as a source for
encoding since the default resolution when you load vivid is 640x360.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Reviewed-by: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/platform/vicodec/vicodec-core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/platform/vicodec/vicodec-core.c b/drivers/media/platform/vicodec/vicodec-core.c
index 1eb9132bfc85f..b292cff26c866 100644
--- a/drivers/media/platform/vicodec/vicodec-core.c
+++ b/drivers/media/platform/vicodec/vicodec-core.c
@@ -42,7 +42,7 @@ MODULE_PARM_DESC(debug, " activates debug info");
 #define MAX_WIDTH		4096U
 #define MIN_WIDTH		640U
 #define MAX_HEIGHT		2160U
-#define MIN_HEIGHT		480U
+#define MIN_HEIGHT		360U
 
 #define dprintk(dev, fmt, arg...) \
 	v4l2_dbg(1, debug, &dev->v4l2_dev, "%s: " fmt, __func__, ## arg)
-- 
GitLab


From 55623b4169056d7bb493d1c6f715991f8db67302 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Wed, 10 Oct 2018 07:12:15 -0400
Subject: [PATCH 0665/1890] media: cec: check for non-OK/NACK conditions while
 claiming a LA

During the configuration phase of a CEC adapter it is trying to claim a
free logical address by polling.

However, the code doesn't check if there were errors other than OK or NACK,
those are just treated as if the poll was NACKed.

Instead check for such errors and retry the poll. And if the problem persists
then don't claim this LA since there is something weird going on.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/cec/cec-adap.c | 47 ++++++++++++++++++++++++++++--------
 1 file changed, 37 insertions(+), 10 deletions(-)

diff --git a/drivers/media/cec/cec-adap.c b/drivers/media/cec/cec-adap.c
index 31d1f4ab915ea..6fcd575e1b883 100644
--- a/drivers/media/cec/cec-adap.c
+++ b/drivers/media/cec/cec-adap.c
@@ -1180,6 +1180,8 @@ static int cec_config_log_addr(struct cec_adapter *adap,
 {
 	struct cec_log_addrs *las = &adap->log_addrs;
 	struct cec_msg msg = { };
+	const unsigned int max_retries = 2;
+	unsigned int i;
 	int err;
 
 	if (cec_has_log_addr(adap, log_addr))
@@ -1188,19 +1190,44 @@ static int cec_config_log_addr(struct cec_adapter *adap,
 	/* Send poll message */
 	msg.len = 1;
 	msg.msg[0] = (log_addr << 4) | log_addr;
-	err = cec_transmit_msg_fh(adap, &msg, NULL, true);
 
-	/*
-	 * While trying to poll the physical address was reset
-	 * and the adapter was unconfigured, so bail out.
-	 */
-	if (!adap->is_configuring)
-		return -EINTR;
+	for (i = 0; i < max_retries; i++) {
+		err = cec_transmit_msg_fh(adap, &msg, NULL, true);
 
-	if (err)
-		return err;
+		/*
+		 * While trying to poll the physical address was reset
+		 * and the adapter was unconfigured, so bail out.
+		 */
+		if (!adap->is_configuring)
+			return -EINTR;
+
+		if (err)
+			return err;
 
-	if (msg.tx_status & CEC_TX_STATUS_OK)
+		/*
+		 * The message was aborted due to a disconnect or
+		 * unconfigure, just bail out.
+		 */
+		if (msg.tx_status & CEC_TX_STATUS_ABORTED)
+			return -EINTR;
+		if (msg.tx_status & CEC_TX_STATUS_OK)
+			return 0;
+		if (msg.tx_status & CEC_TX_STATUS_NACK)
+			break;
+		/*
+		 * Retry up to max_retries times if the message was neither
+		 * OKed or NACKed. This can happen due to e.g. a Lost
+		 * Arbitration condition.
+		 */
+	}
+
+	/*
+	 * If we are unable to get an OK or a NACK after max_retries attempts
+	 * (and note that each attempt already consists of four polls), then
+	 * then we assume that something is really weird and that it is not a
+	 * good idea to try and claim this logical address.
+	 */
+	if (i == max_retries)
 		return 0;
 
 	/*
-- 
GitLab


From 2efaf6ebb34fe66c613e94c6e97ae95879455bc3 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Thu, 11 Oct 2018 06:38:27 -0400
Subject: [PATCH 0666/1890] media: cec: increase debug level for 'queue full'

The "transmit queue full" message doesn't warrant debug level 1 since
it is already clear from the error code what's going on.

Bump to level 2.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/cec/cec-adap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/cec/cec-adap.c b/drivers/media/cec/cec-adap.c
index 6fcd575e1b883..65a933a21e685 100644
--- a/drivers/media/cec/cec-adap.c
+++ b/drivers/media/cec/cec-adap.c
@@ -807,7 +807,7 @@ int cec_transmit_msg_fh(struct cec_adapter *adap, struct cec_msg *msg,
 	}
 
 	if (adap->transmit_queue_sz >= CEC_MAX_MSG_TX_QUEUE_SZ) {
-		dprintk(1, "%s: transmit queue full\n", __func__);
+		dprintk(2, "%s: transmit queue full\n", __func__);
 		return -EBUSY;
 	}
 
-- 
GitLab


From ef86eaf97acd6d82cd3fd40f997b1c8c4895a443 Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@collabora.com>
Date: Thu, 18 Oct 2018 14:54:29 -0400
Subject: [PATCH 0667/1890] media: Rename vb2_m2m_request_queue ->
 v4l2_m2m_request_queue

To be consistent with the rest of the mem2mem helpers,
rename vb2_m2m_request_queue to v4l2_m2m_request_queue.

This is just a cosmetic change.

Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/platform/vim2m.c              | 2 +-
 drivers/media/v4l2-core/v4l2-mem2mem.c      | 4 ++--
 drivers/staging/media/sunxi/cedrus/cedrus.c | 2 +-
 include/media/v4l2-mem2mem.h                | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/media/platform/vim2m.c b/drivers/media/platform/vim2m.c
index af150a0395dfb..d82db738f174e 100644
--- a/drivers/media/platform/vim2m.c
+++ b/drivers/media/platform/vim2m.c
@@ -1009,7 +1009,7 @@ static const struct v4l2_m2m_ops m2m_ops = {
 
 static const struct media_device_ops m2m_media_ops = {
 	.req_validate = vb2_request_validate,
-	.req_queue = vb2_m2m_request_queue,
+	.req_queue = v4l2_m2m_request_queue,
 };
 
 static int vim2m_probe(struct platform_device *pdev)
diff --git a/drivers/media/v4l2-core/v4l2-mem2mem.c b/drivers/media/v4l2-core/v4l2-mem2mem.c
index d7806db222d83..1ed2465972aca 100644
--- a/drivers/media/v4l2-core/v4l2-mem2mem.c
+++ b/drivers/media/v4l2-core/v4l2-mem2mem.c
@@ -953,7 +953,7 @@ void v4l2_m2m_buf_queue(struct v4l2_m2m_ctx *m2m_ctx,
 }
 EXPORT_SYMBOL_GPL(v4l2_m2m_buf_queue);
 
-void vb2_m2m_request_queue(struct media_request *req)
+void v4l2_m2m_request_queue(struct media_request *req)
 {
 	struct media_request_object *obj, *obj_safe;
 	struct v4l2_m2m_ctx *m2m_ctx = NULL;
@@ -997,7 +997,7 @@ void vb2_m2m_request_queue(struct media_request *req)
 	if (m2m_ctx)
 		v4l2_m2m_try_schedule(m2m_ctx);
 }
-EXPORT_SYMBOL_GPL(vb2_m2m_request_queue);
+EXPORT_SYMBOL_GPL(v4l2_m2m_request_queue);
 
 /* Videobuf2 ioctl helpers */
 
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.c b/drivers/staging/media/sunxi/cedrus/cedrus.c
index 82558455384af..dd121f66fa2de 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus.c
+++ b/drivers/staging/media/sunxi/cedrus/cedrus.c
@@ -253,7 +253,7 @@ static const struct v4l2_m2m_ops cedrus_m2m_ops = {
 
 static const struct media_device_ops cedrus_m2m_media_ops = {
 	.req_validate	= cedrus_request_validate,
-	.req_queue	= vb2_m2m_request_queue,
+	.req_queue	= v4l2_m2m_request_queue,
 };
 
 static int cedrus_probe(struct platform_device *pdev)
diff --git a/include/media/v4l2-mem2mem.h b/include/media/v4l2-mem2mem.h
index 58c1ecf3d6489..5467264771ec1 100644
--- a/include/media/v4l2-mem2mem.h
+++ b/include/media/v4l2-mem2mem.h
@@ -624,7 +624,7 @@ v4l2_m2m_dst_buf_remove_by_idx(struct v4l2_m2m_ctx *m2m_ctx, unsigned int idx)
 
 /* v4l2 request helper */
 
-void vb2_m2m_request_queue(struct media_request *req);
+void v4l2_m2m_request_queue(struct media_request *req);
 
 /* v4l2 ioctl helpers */
 
-- 
GitLab


From 6282e916f774e37845c65d1eae9f8c649004f033 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 5 Nov 2018 14:54:56 +0100
Subject: [PATCH 0668/1890] ARM: 8809/1: proc-v7: fix Thumb annotation of
 cpu_v7_hvc_switch_mm

Due to what appears to be a copy/paste error, the opening ENTRY()
of cpu_v7_hvc_switch_mm() lacks a matching ENDPROC(), and instead,
the one for cpu_v7_smc_switch_mm() is duplicated.

Given that it is ENDPROC() that emits the Thumb annotation, the
cpu_v7_hvc_switch_mm() routine will be called in ARM mode on a
Thumb2 kernel, resulting in the following splat:

  Internal error: Oops - undefined instruction: 0 [#1] SMP THUMB2
  Modules linked in:
  CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.18.0-rc1-00030-g4d28ad89189d-dirty #488
  Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015
  PC is at cpu_v7_hvc_switch_mm+0x12/0x18
  LR is at flush_old_exec+0x31b/0x570
  pc : [<c0316efe>]    lr : [<c04117c7>]    psr: 00000013
  sp : ee899e50  ip : 00000000  fp : 00000001
  r10: eda28f34  r9 : eda31800  r8 : c12470e0
  r7 : eda1fc00  r6 : eda53000  r5 : 00000000  r4 : ee88c000
  r3 : c0316eec  r2 : 00000001  r1 : eda53000  r0 : 6da6c000
  Flags: nzcv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none

Note the 'ISA ARM' in the last line.

Fix this by using the correct name in ENDPROC().

Cc: <stable@vger.kernel.org>
Fixes: 10115105cb3a ("ARM: spectre-v2: add firmware based hardening")
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/mm/proc-v7.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index b78d59a1cc059..fccd4d99f505e 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -112,7 +112,7 @@ ENTRY(cpu_v7_hvc_switch_mm)
 	hvc	#0
 	ldmfd	sp!, {r0 - r3}
 	b	cpu_v7_switch_mm
-ENDPROC(cpu_v7_smc_switch_mm)
+ENDPROC(cpu_v7_hvc_switch_mm)
 #endif
 ENTRY(cpu_v7_iciallu_switch_mm)
 	mov	r3, #0
-- 
GitLab


From 86d4d068df573a8c2105554624796c086d6bec3d Mon Sep 17 00:00:00 2001
From: John David Anglin <dave.anglin@bell.net>
Date: Tue, 6 Nov 2018 12:00:01 +0100
Subject: [PATCH 0669/1890] parisc: Revert "Release spinlocks using ordered
 store"

This reverts commit d27dfa13b9f77ae7e6ed09d70a0426ed26c1a8f9.

Unfortunately, this patch needs to be reverted.  We need the full sync
barrier and not the limited barrier provided by using an ordered store.
The sync ensures that all accesses and cache purge instructions that
follow the sync are performed after all such instructions prior the sync
instruction have completed executing.

The patch breaks the rwlock implementation in glibc.  This caused the
test-lock application in the libprelude testsuite to hang.  With the
change reverted, the test runs correctly and the libprelude package
builds successfully.

Signed-off-by: John David Anglin <dave.anglin@bell.net>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/include/asm/spinlock.h |  4 ++--
 arch/parisc/kernel/syscall.S       | 12 ++++++++----
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h
index 16aec9ba2580a..8a63515f03bfe 100644
--- a/arch/parisc/include/asm/spinlock.h
+++ b/arch/parisc/include/asm/spinlock.h
@@ -37,8 +37,8 @@ static inline void arch_spin_unlock(arch_spinlock_t *x)
 	volatile unsigned int *a;
 
 	a = __ldcw_align(x);
-	/* Release with ordered store. */
-	__asm__ __volatile__("stw,ma %0,0(%1)" : : "r"(1), "r"(a) : "memory");
+	mb();
+	*a = 1;
 }
 
 static inline int arch_spin_trylock(arch_spinlock_t *x)
diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S
index 9505c317818df..a9bc90dc4ae75 100644
--- a/arch/parisc/kernel/syscall.S
+++ b/arch/parisc/kernel/syscall.S
@@ -640,7 +640,8 @@ cas_action:
 	sub,<>	%r28, %r25, %r0
 2:	stw	%r24, 0(%r26)
 	/* Free lock */
-	stw,ma	%r20, 0(%sr2,%r20)
+	sync
+	stw	%r20, 0(%sr2,%r20)
 #if ENABLE_LWS_DEBUG
 	/* Clear thread register indicator */
 	stw	%r0, 4(%sr2,%r20)
@@ -654,7 +655,8 @@ cas_action:
 3:		
 	/* Error occurred on load or store */
 	/* Free lock */
-	stw,ma	%r20, 0(%sr2,%r20)
+	sync
+	stw	%r20, 0(%sr2,%r20)
 #if ENABLE_LWS_DEBUG
 	stw	%r0, 4(%sr2,%r20)
 #endif
@@ -855,7 +857,8 @@ cas2_action:
 
 cas2_end:
 	/* Free lock */
-	stw,ma	%r20, 0(%sr2,%r20)
+	sync
+	stw	%r20, 0(%sr2,%r20)
 	/* Enable interrupts */
 	ssm	PSW_SM_I, %r0
 	/* Return to userspace, set no error */
@@ -865,7 +868,8 @@ cas2_end:
 22:
 	/* Error occurred on load or store */
 	/* Free lock */
-	stw,ma	%r20, 0(%sr2,%r20)
+	sync
+	stw	%r20, 0(%sr2,%r20)
 	ssm	PSW_SM_I, %r0
 	ldo	1(%r0),%r28
 	b	lws_exit
-- 
GitLab


From 8e88c29b351ed4e09dd63f825f1c8260b0cb0ab3 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sun, 23 Sep 2018 17:04:20 +0200
Subject: [PATCH 0670/1890] perf tools: Do not zero sample_id_all for group
 members

Andi reported following malfunction:

  # perf record -e '{ref-cycles,cycles}:S' -a sleep 1
  # perf script
  non matching sample_id_all

That's because we disable sample_id_all bit for non-sampling group
members. We can't do that, because it needs to be the same over the
whole event list. This patch keeps it untouched again.

Reported-by: Andi Kleen <andi@firstfloor.org>
Tested-by: Andi Kleen <andi@firstfloor.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180923150420.27327-1-jolsa@kernel.org
Fixes: e9add8bac6c6 ("perf evsel: Disable write_backward for leader sampling group events")
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/attr/test-record-group-sampling | 1 -
 tools/perf/util/evsel.c                          | 1 -
 2 files changed, 2 deletions(-)

diff --git a/tools/perf/tests/attr/test-record-group-sampling b/tools/perf/tests/attr/test-record-group-sampling
index 8a33ca4f9e1f7..f0729c454f160 100644
--- a/tools/perf/tests/attr/test-record-group-sampling
+++ b/tools/perf/tests/attr/test-record-group-sampling
@@ -37,4 +37,3 @@ sample_freq=0
 sample_period=0
 freq=0
 write_backward=0
-sample_id_all=0
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 6d187059a3736..d37bb1566cd9d 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -956,7 +956,6 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
 		attr->sample_freq    = 0;
 		attr->sample_period  = 0;
 		attr->write_backward = 0;
-		attr->sample_id_all  = 0;
 	}
 
 	if (opts->no_samples)
-- 
GitLab


From b50b769bcbc24d68d95870d87354ec45fa0f07bb Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Tue, 6 Nov 2018 05:54:48 -0500
Subject: [PATCH 0671/1890] media: dm365_ipipeif: better annotate a fall though

Shut up this warning:

	drivers/staging/media/davinci_vpfe/dm365_ipipeif.c: In function 'ipipeif_hw_setup':
	drivers/staging/media/davinci_vpfe/dm365_ipipeif.c:298:3: warning: this statement may fall through [-Wimplicit-fallthrough=]
	   switch (isif_port_if) {
	   ^~~~~~
	drivers/staging/media/davinci_vpfe/dm365_ipipeif.c:314:2: note: here
	  case IPIPEIF_SDRAM_YUV:
	  ^~~~

By annotating a fall though case at the right place.

Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/staging/media/davinci_vpfe/dm365_ipipeif.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/staging/media/davinci_vpfe/dm365_ipipeif.c b/drivers/staging/media/davinci_vpfe/dm365_ipipeif.c
index a53231b08d30e..e3425bf082ae9 100644
--- a/drivers/staging/media/davinci_vpfe/dm365_ipipeif.c
+++ b/drivers/staging/media/davinci_vpfe/dm365_ipipeif.c
@@ -310,6 +310,7 @@ static int ipipeif_hw_setup(struct v4l2_subdev *sd)
 			ipipeif_write(val, ipipeif_base_addr, IPIPEIF_CFG2);
 			break;
 		}
+		/* fall through */
 
 	case IPIPEIF_SDRAM_YUV:
 		/* Set clock divider */
-- 
GitLab


From 92539d3eda2c090b382699bbb896d4b54e9bdece Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Mon, 5 Nov 2018 09:35:44 -0500
Subject: [PATCH 0672/1890] media: v4l: event: Add subscription to list before
 calling "add" operation

Patch ad608fbcf166 changed how events were subscribed to address an issue
elsewhere. As a side effect of that change, the "add" callback was called
before the event subscription was added to the list of subscribed events,
causing the first event queued by the add callback (and possibly other
events arriving soon afterwards) to be lost.

Fix this by adding the subscription to the list before calling the "add"
callback, and clean up afterwards if that fails.

Fixes: ad608fbcf166 ("media: v4l: event: Prevent freeing event subscriptions while accessed")

Reported-by: Dave Stevenson <dave.stevenson@raspberrypi.org>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Tested-by: Dave Stevenson <dave.stevenson@raspberrypi.org>
Reviewed-by: Hans Verkuil <hans.verkuil@cisco.com>
Tested-by: Hans Verkuil <hans.verkuil@cisco.com>
Cc: stable@vger.kernel.org (for 4.14 and up)
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/v4l2-core/v4l2-event.c | 43 ++++++++++++++++------------
 1 file changed, 24 insertions(+), 19 deletions(-)

diff --git a/drivers/media/v4l2-core/v4l2-event.c b/drivers/media/v4l2-core/v4l2-event.c
index a3ef1f50a4b34..481e3c65cf97a 100644
--- a/drivers/media/v4l2-core/v4l2-event.c
+++ b/drivers/media/v4l2-core/v4l2-event.c
@@ -193,6 +193,22 @@ int v4l2_event_pending(struct v4l2_fh *fh)
 }
 EXPORT_SYMBOL_GPL(v4l2_event_pending);
 
+static void __v4l2_event_unsubscribe(struct v4l2_subscribed_event *sev)
+{
+	struct v4l2_fh *fh = sev->fh;
+	unsigned int i;
+
+	lockdep_assert_held(&fh->subscribe_lock);
+	assert_spin_locked(&fh->vdev->fh_lock);
+
+	/* Remove any pending events for this subscription */
+	for (i = 0; i < sev->in_use; i++) {
+		list_del(&sev->events[sev_pos(sev, i)].list);
+		fh->navailable--;
+	}
+	list_del(&sev->list);
+}
+
 int v4l2_event_subscribe(struct v4l2_fh *fh,
 			 const struct v4l2_event_subscription *sub, unsigned elems,
 			 const struct v4l2_subscribed_event_ops *ops)
@@ -224,27 +240,23 @@ int v4l2_event_subscribe(struct v4l2_fh *fh,
 
 	spin_lock_irqsave(&fh->vdev->fh_lock, flags);
 	found_ev = v4l2_event_subscribed(fh, sub->type, sub->id);
+	if (!found_ev)
+		list_add(&sev->list, &fh->subscribed);
 	spin_unlock_irqrestore(&fh->vdev->fh_lock, flags);
 
 	if (found_ev) {
 		/* Already listening */
 		kvfree(sev);
-		goto out_unlock;
-	}
-
-	if (sev->ops && sev->ops->add) {
+	} else if (sev->ops && sev->ops->add) {
 		ret = sev->ops->add(sev, elems);
 		if (ret) {
+			spin_lock_irqsave(&fh->vdev->fh_lock, flags);
+			__v4l2_event_unsubscribe(sev);
+			spin_unlock_irqrestore(&fh->vdev->fh_lock, flags);
 			kvfree(sev);
-			goto out_unlock;
 		}
 	}
 
-	spin_lock_irqsave(&fh->vdev->fh_lock, flags);
-	list_add(&sev->list, &fh->subscribed);
-	spin_unlock_irqrestore(&fh->vdev->fh_lock, flags);
-
-out_unlock:
 	mutex_unlock(&fh->subscribe_lock);
 
 	return ret;
@@ -279,7 +291,6 @@ int v4l2_event_unsubscribe(struct v4l2_fh *fh,
 {
 	struct v4l2_subscribed_event *sev;
 	unsigned long flags;
-	int i;
 
 	if (sub->type == V4L2_EVENT_ALL) {
 		v4l2_event_unsubscribe_all(fh);
@@ -291,14 +302,8 @@ int v4l2_event_unsubscribe(struct v4l2_fh *fh,
 	spin_lock_irqsave(&fh->vdev->fh_lock, flags);
 
 	sev = v4l2_event_subscribed(fh, sub->type, sub->id);
-	if (sev != NULL) {
-		/* Remove any pending events for this subscription */
-		for (i = 0; i < sev->in_use; i++) {
-			list_del(&sev->events[sev_pos(sev, i)].list);
-			fh->navailable--;
-		}
-		list_del(&sev->list);
-	}
+	if (sev != NULL)
+		__v4l2_event_unsubscribe(sev);
 
 	spin_unlock_irqrestore(&fh->vdev->fh_lock, flags);
 
-- 
GitLab


From 003aedaed65d4f71f3f122ea1e079c648bab113e Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Tue, 9 Oct 2018 07:29:14 -0400
Subject: [PATCH 0673/1890] media: docs: Document metadata format in struct
 v4l2_format

The format fields in struct v4l2_format were otherwise documented but the
meta field was missing. Document it.

Reported-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/uapi/v4l/dev-meta.rst     | 2 +-
 Documentation/media/uapi/v4l/vidioc-g-fmt.rst | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/Documentation/media/uapi/v4l/dev-meta.rst b/Documentation/media/uapi/v4l/dev-meta.rst
index f7ac8d0d3af14..b65dc078abeb8 100644
--- a/Documentation/media/uapi/v4l/dev-meta.rst
+++ b/Documentation/media/uapi/v4l/dev-meta.rst
@@ -40,7 +40,7 @@ To use the :ref:`format` ioctls applications set the ``type`` field of the
 the desired operation. Both drivers and applications must set the remainder of
 the :c:type:`v4l2_format` structure to 0.
 
-.. _v4l2-meta-format:
+.. c:type:: v4l2_meta_format
 
 .. tabularcolumns:: |p{1.4cm}|p{2.2cm}|p{13.9cm}|
 
diff --git a/Documentation/media/uapi/v4l/vidioc-g-fmt.rst b/Documentation/media/uapi/v4l/vidioc-g-fmt.rst
index 3ead350e099f9..9ea494a8facab 100644
--- a/Documentation/media/uapi/v4l/vidioc-g-fmt.rst
+++ b/Documentation/media/uapi/v4l/vidioc-g-fmt.rst
@@ -132,6 +132,11 @@ The format as returned by :ref:`VIDIOC_TRY_FMT <VIDIOC_G_FMT>` must be identical
       - ``sdr``
       - Definition of a data format, see :ref:`pixfmt`, used by SDR
 	capture and output devices.
+    * -
+      - struct :c:type:`v4l2_meta_format`
+      - ``meta``
+      - Definition of a metadata format, see :ref:`meta-formats`, used by
+	metadata capture devices.
     * -
       - __u8
       - ``raw_data``\ [200]
-- 
GitLab


From 30efae3d789cd0714ef795545a46749236e29558 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Tue, 9 Oct 2018 07:49:49 -0400
Subject: [PATCH 0674/1890] media: omap3isp: Unregister media device as first

While there are issues related to object lifetime management, unregister the
media device first when the driver is being unbound. This is slightly
safer.

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/platform/omap3isp/isp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/media/platform/omap3isp/isp.c b/drivers/media/platform/omap3isp/isp.c
index 77fb7987b42f3..13f2828d880df 100644
--- a/drivers/media/platform/omap3isp/isp.c
+++ b/drivers/media/platform/omap3isp/isp.c
@@ -1587,6 +1587,8 @@ static void isp_pm_complete(struct device *dev)
 
 static void isp_unregister_entities(struct isp_device *isp)
 {
+	media_device_unregister(&isp->media_dev);
+
 	omap3isp_csi2_unregister_entities(&isp->isp_csi2a);
 	omap3isp_ccp2_unregister_entities(&isp->isp_ccp2);
 	omap3isp_ccdc_unregister_entities(&isp->isp_ccdc);
@@ -1597,7 +1599,6 @@ static void isp_unregister_entities(struct isp_device *isp)
 	omap3isp_stat_unregister_entities(&isp->isp_hist);
 
 	v4l2_device_unregister(&isp->v4l2_dev);
-	media_device_unregister(&isp->media_dev);
 	media_device_cleanup(&isp->media_dev);
 }
 
-- 
GitLab


From 32388d6ef7cffc7d8291b67f8dfa26acd45217fd Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Wed, 10 Oct 2018 04:01:05 -0400
Subject: [PATCH 0675/1890] media: ipu3-cio2: Unregister device nodes first,
 then release resources

While there are issues related to object lifetime management, unregister
the media device first, followed immediately by other device nodes when
the driver is being unbound. Only then the resources needed by the driver
may be released. This is slightly safer.

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Tested-by: Bingbu Cao <bingbu.cao@intel.com>
Reviewed-by: Bingbu Cao <bingbu.cao@intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/pci/intel/ipu3/ipu3-cio2.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/media/pci/intel/ipu3/ipu3-cio2.c b/drivers/media/pci/intel/ipu3/ipu3-cio2.c
index 452eb9b42140b..723022ef36629 100644
--- a/drivers/media/pci/intel/ipu3/ipu3-cio2.c
+++ b/drivers/media/pci/intel/ipu3/ipu3-cio2.c
@@ -1846,12 +1846,12 @@ static void cio2_pci_remove(struct pci_dev *pci_dev)
 	struct cio2_device *cio2 = pci_get_drvdata(pci_dev);
 	unsigned int i;
 
+	media_device_unregister(&cio2->media_dev);
 	cio2_notifier_exit(cio2);
-	cio2_fbpt_exit_dummy(cio2);
 	for (i = 0; i < CIO2_QUEUES; i++)
 		cio2_queue_exit(cio2, &cio2->queue[i]);
+	cio2_fbpt_exit_dummy(cio2);
 	v4l2_device_unregister(&cio2->v4l2_dev);
-	media_device_unregister(&cio2->media_dev);
 	media_device_cleanup(&cio2->media_dev);
 	mutex_destroy(&cio2->lock);
 }
-- 
GitLab


From 4e26f692e2e2aa4d7d6ddb3c4d3dec17f45d6495 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Wed, 10 Oct 2018 04:04:18 -0400
Subject: [PATCH 0676/1890] media: ipu3-cio2: Use cio2_queues_exit

The ipu3-cio2 driver has a function to tear down video devices as well as
the associated video buffer queues. Use it.

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Tested-by: Bingbu Cao <bingbu.cao@intel.com>
Reviewed-by: Bingbu Cao <bingbu.cao@intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/pci/intel/ipu3/ipu3-cio2.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/media/pci/intel/ipu3/ipu3-cio2.c b/drivers/media/pci/intel/ipu3/ipu3-cio2.c
index 723022ef36629..447baaebca448 100644
--- a/drivers/media/pci/intel/ipu3/ipu3-cio2.c
+++ b/drivers/media/pci/intel/ipu3/ipu3-cio2.c
@@ -1844,12 +1844,10 @@ static int cio2_pci_probe(struct pci_dev *pci_dev,
 static void cio2_pci_remove(struct pci_dev *pci_dev)
 {
 	struct cio2_device *cio2 = pci_get_drvdata(pci_dev);
-	unsigned int i;
 
 	media_device_unregister(&cio2->media_dev);
 	cio2_notifier_exit(cio2);
-	for (i = 0; i < CIO2_QUEUES; i++)
-		cio2_queue_exit(cio2, &cio2->queue[i]);
+	cio2_queues_exit(cio2);
 	cio2_fbpt_exit_dummy(cio2);
 	v4l2_device_unregister(&cio2->v4l2_dev);
 	media_device_cleanup(&cio2->media_dev);
-- 
GitLab


From 8bd66d147c88bd441178c7b4c774ae5a185f19b8 Mon Sep 17 00:00:00 2001
From: "ndesaulniers@google.com" <ndesaulniers@google.com>
Date: Wed, 31 Oct 2018 12:39:01 -0700
Subject: [PATCH 0677/1890] include/linux/compiler*.h: define asm_volatile_goto

asm_volatile_goto should also be defined for other compilers that support
asm goto.

Fixes commit 815f0ddb346c ("include/linux/compiler*.h: make compiler-*.h
mutually exclusive").

Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 include/linux/compiler_types.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 3439d7d0249aa..4a3f9c09c92d0 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -130,6 +130,10 @@ struct ftrace_likely_data {
 # define randomized_struct_fields_end
 #endif
 
+#ifndef asm_volatile_goto
+#define asm_volatile_goto(x...) asm goto(x)
+#endif
+
 /* Are two types/vars the same type (ignoring qualifiers)? */
 #define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
 
-- 
GitLab


From 18354b422ce4ce1124277dfe8f4f094bae0102ad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Tue, 23 Oct 2018 21:21:02 +0300
Subject: [PATCH 0678/1890] drm/i915: Don't apply the 16Gb DIMM wm latency w/a
 to BXT/GLK
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The 16Gb DIMM w/a is not applicable to BXT or GLK. Limit it to
the appropriate platforms.

This was especially harsh on GLK since we don't even try to read
the DIMM information on that platforms, hence valid_dimm was
always false and thus we always tried to apply the w/a.
Furthermore the w/a pushed the level 0 latency above the
level 1 latency, which doesn't really make sense.

v2: Do the check when populating is_16gb_dimm (Mahesh)

Cc: Mahesh Kumar <mahesh1.kumar@intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Fixes: 86b592876cb6 ("drm/i915: Implement 16GB dimm wa for latency level-0")
Signed-off-by: Ville SyrjÃ¤lÃ¤ <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181023182102.31549-1-ville.syrjala@linux.intel.com
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Mahesh Kumar <mahesh1.sh.kumar@gmail.com>
(cherry picked from commit 5d6f36b27d2764f3dc940606ee6b7ec5c669af3e)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c | 15 ++++++++-------
 drivers/gpu/drm/i915/i915_drv.h |  1 -
 drivers/gpu/drm/i915/intel_pm.c |  3 +--
 3 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 44e2c0f5ec502..ffdbbac4400ea 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1175,8 +1175,6 @@ skl_dram_get_channels_info(struct drm_i915_private *dev_priv)
 		return -EINVAL;
 	}
 
-	dram_info->valid_dimm = true;
-
 	/*
 	 * If any of the channel is single rank channel, worst case output
 	 * will be same as if single rank memory, so consider single rank
@@ -1193,8 +1191,7 @@ skl_dram_get_channels_info(struct drm_i915_private *dev_priv)
 		return -EINVAL;
 	}
 
-	if (ch0.is_16gb_dimm || ch1.is_16gb_dimm)
-		dram_info->is_16gb_dimm = true;
+	dram_info->is_16gb_dimm = ch0.is_16gb_dimm || ch1.is_16gb_dimm;
 
 	dev_priv->dram_info.symmetric_memory = intel_is_dram_symmetric(val_ch0,
 								       val_ch1,
@@ -1314,7 +1311,6 @@ bxt_get_dram_info(struct drm_i915_private *dev_priv)
 		return -EINVAL;
 	}
 
-	dram_info->valid_dimm = true;
 	dram_info->valid = true;
 	return 0;
 }
@@ -1327,12 +1323,17 @@ intel_get_dram_info(struct drm_i915_private *dev_priv)
 	int ret;
 
 	dram_info->valid = false;
-	dram_info->valid_dimm = false;
-	dram_info->is_16gb_dimm = false;
 	dram_info->rank = I915_DRAM_RANK_INVALID;
 	dram_info->bandwidth_kbps = 0;
 	dram_info->num_channels = 0;
 
+	/*
+	 * Assume 16Gb DIMMs are present until proven otherwise.
+	 * This is only used for the level 0 watermark latency
+	 * w/a which does not apply to bxt/glk.
+	 */
+	dram_info->is_16gb_dimm = !IS_GEN9_LP(dev_priv);
+
 	if (INTEL_GEN(dev_priv) < 9 || IS_GEMINILAKE(dev_priv))
 		return;
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 8624b4bdc242d..9102571e9692d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1948,7 +1948,6 @@ struct drm_i915_private {
 
 	struct dram_info {
 		bool valid;
-		bool valid_dimm;
 		bool is_16gb_dimm;
 		u8 num_channels;
 		enum dram_rank {
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 1db9b83282750..245f0022bcfd0 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -2881,8 +2881,7 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv,
 		 * any underrun. If not able to get Dimm info assume 16GB dimm
 		 * to avoid any underrun.
 		 */
-		if (!dev_priv->dram_info.valid_dimm ||
-		    dev_priv->dram_info.is_16gb_dimm)
+		if (dev_priv->dram_info.is_16gb_dimm)
 			wm[0] += 1;
 
 	} else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
-- 
GitLab


From 6503493145cba4413ecd3d4d153faeef4a1e9b85 Mon Sep 17 00:00:00 2001
From: Clint Taylor <clinton.a.taylor@intel.com>
Date: Thu, 25 Oct 2018 11:52:00 -0700
Subject: [PATCH 0679/1890] drm/i915/hdmi: Add HDMI 2.0 audio clock recovery N
 values

HDMI 2.0 594Mhz modes were incorrectly selecting 25.200Mhz Automatic N value
mode instead of HDMI specification values.

V2: Fix 88.2 Hz N value

Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: stable@vger.kernel.org
Signed-off-by: Clint Taylor <clinton.a.taylor@intel.com>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1540493521-1746-2-git-send-email-clinton.a.taylor@intel.com
(cherry picked from commit 5a400aa3c562c4a726b4da286e63c96db905ade1)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_audio.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c
index 769f3f5866611..ee3ca2de983b9 100644
--- a/drivers/gpu/drm/i915/intel_audio.c
+++ b/drivers/gpu/drm/i915/intel_audio.c
@@ -144,6 +144,9 @@ static const struct {
 /* HDMI N/CTS table */
 #define TMDS_297M 297000
 #define TMDS_296M 296703
+#define TMDS_594M 594000
+#define TMDS_593M 593407
+
 static const struct {
 	int sample_rate;
 	int clock;
@@ -164,6 +167,20 @@ static const struct {
 	{ 176400, TMDS_297M, 18816, 247500 },
 	{ 192000, TMDS_296M, 23296, 281250 },
 	{ 192000, TMDS_297M, 20480, 247500 },
+	{ 44100, TMDS_593M, 8918, 937500 },
+	{ 44100, TMDS_594M, 9408, 990000 },
+	{ 48000, TMDS_593M, 5824, 562500 },
+	{ 48000, TMDS_594M, 6144, 594000 },
+	{ 32000, TMDS_593M, 5824, 843750 },
+	{ 32000, TMDS_594M, 3072, 445500 },
+	{ 88200, TMDS_593M, 17836, 937500 },
+	{ 88200, TMDS_594M, 18816, 990000 },
+	{ 96000, TMDS_593M, 11648, 562500 },
+	{ 96000, TMDS_594M, 12288, 594000 },
+	{ 176400, TMDS_593M, 35672, 937500 },
+	{ 176400, TMDS_594M, 37632, 990000 },
+	{ 192000, TMDS_593M, 23296, 562500 },
+	{ 192000, TMDS_594M, 24576, 594000 },
 };
 
 /* get AUD_CONFIG_PIXEL_CLOCK_HDMI_* value for mode */
-- 
GitLab


From c58281056a8b26d5d9dc15c19859a7880835ef44 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 25 Oct 2018 10:18:22 +0100
Subject: [PATCH 0680/1890] drm/i915: Mark up GTT sizes as u64

Since we use a 64b virtual GTT irrespective of the system, we want to
ensure that the GTT computations remains 64b even on 32b systems,
including treatment of huge virtual pages.

No code generation changes on 64b:

Reported-by: Sergii Romantsov <sergii.romantsov@globallogic.com>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108282
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: stable@vger.kernel.org
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181025091823.20571-1-chris@chris-wilson.co.uk
(cherry picked from commit 9125963a9494253fa5a29cc1b4169885d2be7042)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c           | 2 +-
 drivers/gpu/drm/i915/i915_gem_gtt.h           | 6 +++---
 drivers/gpu/drm/i915/selftests/huge_pages.c   | 2 +-
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 6 +++---
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 56c7f86373112..47c3025437990 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1757,7 +1757,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *base, struct seq_file *m)
 			if (i == 4)
 				continue;
 
-			seq_printf(m, "\t\t(%03d, %04d) %08lx: ",
+			seq_printf(m, "\t\t(%03d, %04d) %08llx: ",
 				   pde, pte,
 				   (pde * GEN6_PTES + pte) * I915_GTT_PAGE_SIZE);
 			for (i = 0; i < 4; i++) {
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 7e2af5f4f39bc..aa83070430361 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -42,9 +42,9 @@
 #include "i915_selftest.h"
 #include "i915_timeline.h"
 
-#define I915_GTT_PAGE_SIZE_4K BIT(12)
-#define I915_GTT_PAGE_SIZE_64K BIT(16)
-#define I915_GTT_PAGE_SIZE_2M BIT(21)
+#define I915_GTT_PAGE_SIZE_4K	BIT_ULL(12)
+#define I915_GTT_PAGE_SIZE_64K	BIT_ULL(16)
+#define I915_GTT_PAGE_SIZE_2M	BIT_ULL(21)
 
 #define I915_GTT_PAGE_SIZE I915_GTT_PAGE_SIZE_4K
 #define I915_GTT_MAX_PAGE_SIZE I915_GTT_PAGE_SIZE_2M
diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c
index 8d03f64eabd71..5c22f2c8d4cfe 100644
--- a/drivers/gpu/drm/i915/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/selftests/huge_pages.c
@@ -551,7 +551,7 @@ static int igt_mock_ppgtt_misaligned_dma(void *arg)
 			err = igt_check_page_sizes(vma);
 
 			if (vma->page_sizes.gtt != I915_GTT_PAGE_SIZE_4K) {
-				pr_err("page_sizes.gtt=%u, expected %lu\n",
+				pr_err("page_sizes.gtt=%u, expected %llu\n",
 				       vma->page_sizes.gtt, I915_GTT_PAGE_SIZE_4K);
 				err = -EINVAL;
 			}
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index 8e2e269db97e8..127d815136717 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -1337,7 +1337,7 @@ static int igt_gtt_reserve(void *arg)
 		GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 		if (vma->node.start != total ||
 		    vma->node.size != 2*I915_GTT_PAGE_SIZE) {
-			pr_err("i915_gem_gtt_reserve (pass 1) placement failed, found (%llx + %llx), expected (%llx + %lx)\n",
+			pr_err("i915_gem_gtt_reserve (pass 1) placement failed, found (%llx + %llx), expected (%llx + %llx)\n",
 			       vma->node.start, vma->node.size,
 			       total, 2*I915_GTT_PAGE_SIZE);
 			err = -EINVAL;
@@ -1386,7 +1386,7 @@ static int igt_gtt_reserve(void *arg)
 		GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 		if (vma->node.start != total ||
 		    vma->node.size != 2*I915_GTT_PAGE_SIZE) {
-			pr_err("i915_gem_gtt_reserve (pass 2) placement failed, found (%llx + %llx), expected (%llx + %lx)\n",
+			pr_err("i915_gem_gtt_reserve (pass 2) placement failed, found (%llx + %llx), expected (%llx + %llx)\n",
 			       vma->node.start, vma->node.size,
 			       total, 2*I915_GTT_PAGE_SIZE);
 			err = -EINVAL;
@@ -1430,7 +1430,7 @@ static int igt_gtt_reserve(void *arg)
 		GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 		if (vma->node.start != offset ||
 		    vma->node.size != 2*I915_GTT_PAGE_SIZE) {
-			pr_err("i915_gem_gtt_reserve (pass 3) placement failed, found (%llx + %llx), expected (%llx + %lx)\n",
+			pr_err("i915_gem_gtt_reserve (pass 3) placement failed, found (%llx + %llx), expected (%llx + %llx)\n",
 			       vma->node.start, vma->node.size,
 			       offset, 2*I915_GTT_PAGE_SIZE);
 			err = -EINVAL;
-- 
GitLab


From 085603287452fc96376ed4888bf29f8c095d2b40 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 25 Oct 2018 10:18:23 +0100
Subject: [PATCH 0681/1890] drm/i915: Compare user's 64b GTT offset even on 32b

Beware mixing unsigned long constants and 64b values, as on 32b the
constant will be zero extended and discard the high 32b when used as
a mask!

Reported-by: Sergii Romantsov <sergii.romantsov@globallogic.com>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108282
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: stable@vger.kernel.org
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181025091823.20571-2-chris@chris-wilson.co.uk
(cherry picked from commit 6fc4e48f9ed46e9adff236a0c350074aafa3b7fa)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/gtt.h             | 1 -
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +-
 drivers/gpu/drm/i915/i915_gem_gtt.h        | 2 ++
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h
index 7a9b36176efb7..bfb6f652b09fc 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.h
+++ b/drivers/gpu/drm/i915/gvt/gtt.h
@@ -35,7 +35,6 @@
 #define _GVT_GTT_H_
 
 #define I915_GTT_PAGE_SHIFT         12
-#define I915_GTT_PAGE_MASK		(~(I915_GTT_PAGE_SIZE - 1))
 
 struct intel_vgpu_mm;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 09187286d3462..1aaccbe7e1deb 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -460,7 +460,7 @@ eb_validate_vma(struct i915_execbuffer *eb,
 	 * any non-page-aligned or non-canonical addresses.
 	 */
 	if (unlikely(entry->flags & EXEC_OBJECT_PINNED &&
-		     entry->offset != gen8_canonical_addr(entry->offset & PAGE_MASK)))
+		     entry->offset != gen8_canonical_addr(entry->offset & I915_GTT_PAGE_MASK)))
 		return -EINVAL;
 
 	/* pad_to_size was once a reserved field, so sanitize it */
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index aa83070430361..5d2c5ba55ad83 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -49,6 +49,8 @@
 #define I915_GTT_PAGE_SIZE I915_GTT_PAGE_SIZE_4K
 #define I915_GTT_MAX_PAGE_SIZE I915_GTT_PAGE_SIZE_2M
 
+#define I915_GTT_PAGE_MASK -I915_GTT_PAGE_SIZE
+
 #define I915_GTT_MIN_ALIGNMENT I915_GTT_PAGE_SIZE
 
 #define I915_FENCE_REG_NONE -1
-- 
GitLab


From 2c2f6e30d5f29691e3563d334ce208d3a1907f49 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Thu, 25 Oct 2018 17:56:36 -0700
Subject: [PATCH 0682/1890] drm/i915/glk: Remove 99% limitation.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

While checking the opportunity to add a display_gen
check to allow glk and cnl to be on same bucket I noticed
these FIXME cases here.

So I got the confirmation from HW architect that we actually
never needed this workaround.

"GLK supports 2 pixel per clock, so pixel clock can be up to 2 * cdclk."

So, this reverts commit 97f55ca5b662 ("drm/i915/glk: limit pixel
 clock to 99% of cdclk workaround")

Fixes: 97f55ca5b662 ("drm/i915/glk: limit pixel clock to 99% of cdclk workaround")

Cc: Ville SyrjÃ¤lÃ¤ <ville.syrjala@linux.intel.com>
Cc: Madhav Chauhan <madhav.chauhan@intel.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Clinton Taylor <clinton.a.taylor@intel.com>
Cc: Arthur J Runyan <arthur.j.runyan@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Ville SyrjÃ¤lÃ¤ <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181026005636.22274-1-rodrigo.vivi@intel.com
(cherry picked from commit 42882336e62aab00278114392a16374f272a0c99)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_cdclk.c | 18 ++----------------
 1 file changed, 2 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c
index 29075c7634280..8d74276029e62 100644
--- a/drivers/gpu/drm/i915/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/intel_cdclk.c
@@ -2138,16 +2138,8 @@ void intel_set_cdclk(struct drm_i915_private *dev_priv,
 static int intel_pixel_rate_to_cdclk(struct drm_i915_private *dev_priv,
 				     int pixel_rate)
 {
-	if (INTEL_GEN(dev_priv) >= 10)
+	if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv))
 		return DIV_ROUND_UP(pixel_rate, 2);
-	else if (IS_GEMINILAKE(dev_priv))
-		/*
-		 * FIXME: Avoid using a pixel clock that is more than 99% of the cdclk
-		 * as a temporary workaround. Use a higher cdclk instead. (Note that
-		 * intel_compute_max_dotclk() limits the max pixel clock to 99% of max
-		 * cdclk.)
-		 */
-		return DIV_ROUND_UP(pixel_rate * 100, 2 * 99);
 	else if (IS_GEN9(dev_priv) ||
 		 IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv))
 		return pixel_rate;
@@ -2543,14 +2535,8 @@ static int intel_compute_max_dotclk(struct drm_i915_private *dev_priv)
 {
 	int max_cdclk_freq = dev_priv->max_cdclk_freq;
 
-	if (INTEL_GEN(dev_priv) >= 10)
+	if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv))
 		return 2 * max_cdclk_freq;
-	else if (IS_GEMINILAKE(dev_priv))
-		/*
-		 * FIXME: Limiting to 99% as a temporary workaround. See
-		 * intel_min_cdclk() for details.
-		 */
-		return 2 * max_cdclk_freq * 99 / 100;
 	else if (IS_GEN9(dev_priv) ||
 		 IS_BROADWELL(dev_priv) || IS_HASWELL(dev_priv))
 		return max_cdclk_freq;
-- 
GitLab


From 76271ef2638ca8e4bf2884cad664a34be0d5a42b Mon Sep 17 00:00:00 2001
From: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com>
Date: Fri, 5 Oct 2018 11:56:42 -0700
Subject: [PATCH 0683/1890] drm/i915: Fix VIDEO_DIP_CTL bit shifts

The shifts for VSC_SELECT bits are wrong, fix it. Good thing is the
definitions are unused.

v2: Moves definitions in another patch (Manasi)
Cc: Manasi Navare <manasi.d.navare@intel.com>
Cc: Anusha Srivatsa <anusha.srivatsa@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Fixes: 7af2be6d54d4 ("drm/i915/icl: Add VIDEO_DIP registers")
Signed-off-by: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com>
Reviewed-by: Manasi Navare <manasi.d.navare@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181005185643.31660-1-dhinakaran.pandiyan@intel.com
(cherry picked from commit 09209662618f9fdc38b8d4da39040c8829fd2d57)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_reg.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 7c491ea3d052a..f2d92aee721bd 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -4593,12 +4593,12 @@ enum {
 
 #define  DRM_DIP_ENABLE			(1 << 28)
 #define  PSR_VSC_BIT_7_SET		(1 << 27)
-#define  VSC_SELECT_MASK		(0x3 << 26)
-#define  VSC_SELECT_SHIFT		26
-#define  VSC_DIP_HW_HEA_DATA		(0 << 26)
-#define  VSC_DIP_HW_HEA_SW_DATA		(1 << 26)
-#define  VSC_DIP_HW_DATA_SW_HEA		(2 << 26)
-#define  VSC_DIP_SW_HEA_DATA		(3 << 26)
+#define  VSC_SELECT_MASK		(0x3 << 25)
+#define  VSC_SELECT_SHIFT		25
+#define  VSC_DIP_HW_HEA_DATA		(0 << 25)
+#define  VSC_DIP_HW_HEA_SW_DATA		(1 << 25)
+#define  VSC_DIP_HW_DATA_SW_HEA		(2 << 25)
+#define  VSC_DIP_SW_HEA_DATA		(3 << 25)
 #define  VDIP_ENABLE_PPS		(1 << 24)
 
 /* Panel power sequencing */
-- 
GitLab


From f42f343887016330b321dd40eebc68c7292e4f1b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Mon, 29 Oct 2018 16:00:31 +0200
Subject: [PATCH 0684/1890] drm/i915: Fix error handling for the NV12 fb
 dimensions check
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Let's not leak obj->framebuffer_references when we decide that
the framebuffer domensions are not suitable for NV12.

Cc: stable@vger.kernel.org
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Vidya Srinivas <vidya.srinivas@intel.com>
Fixes: e44134f2673c ("drm/i915: Add NV12 support to intel_framebuffer_init")
Signed-off-by: Ville SyrjÃ¤lÃ¤ <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181029140031.11765-1-ville.syrjala@linux.intel.com
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
(cherry picked from commit 3b90946fcb6f13b65888c380461793a9dea9d1f4)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_display.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 9741cc419e1b2..b8dfdbc9ca1f6 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -14646,7 +14646,7 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 	     fb->height < SKL_MIN_YUV_420_SRC_H ||
 	     (fb->width % 4) != 0 || (fb->height % 4) != 0)) {
 		DRM_DEBUG_KMS("src dimensions not correct for NV12\n");
-		return -EINVAL;
+		goto err;
 	}
 
 	for (i = 0; i < fb->format->num_planes; i++) {
-- 
GitLab


From e528c2affcf216b3d02b22004895cb678769629b Mon Sep 17 00:00:00 2001
From: Manasi Navare <manasi.d.navare@intel.com>
Date: Tue, 23 Oct 2018 12:12:47 -0700
Subject: [PATCH 0685/1890] drm/i915/icl: Fix the macros for DFLEXDPMLE
 register bits
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch fixes the macros used for defining the DFLEXDPMLE
register bit fields. This accounts for changes in the spec.

Fixes: a2bc69a1a9d6 ("drm/i915/icl: Add register definition for DFLEXDPMLE")
Cc: Animesh Manna <animesh.manna@intel.com>
Cc: Paulo Zanoni <paulo.r.zanoni@intel.com>
Cc: Jose Roberto de Souza <jose.souza@intel.com>
Cc: <stable@vger.kernel.org> # v4.19+
Signed-off-by: Manasi Navare <manasi.d.navare@intel.com>
Reviewed-by: JosÃ© Roberto de Souza <jose.souza@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181023191248.26418-1-manasi.d.navare@intel.com
(cherry picked from commit b4335ec0a3ee6229a570755f8fb95dc8a7c694f2)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_reg.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index f2d92aee721bd..e31c27e45734e 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -2095,8 +2095,12 @@ enum i915_power_well_id {
 
 /* ICL PHY DFLEX registers */
 #define PORT_TX_DFLEXDPMLE1		_MMIO(0x1638C0)
-#define   DFLEXDPMLE1_DPMLETC_MASK(n)	(0xf << (4 * (n)))
-#define   DFLEXDPMLE1_DPMLETC(n, x)	((x) << (4 * (n)))
+#define   DFLEXDPMLE1_DPMLETC_MASK(tc_port)	(0xf << (4 * (tc_port)))
+#define   DFLEXDPMLE1_DPMLETC_ML0(tc_port)	(1 << (4 * (tc_port)))
+#define   DFLEXDPMLE1_DPMLETC_ML1_0(tc_port)	(3 << (4 * (tc_port)))
+#define   DFLEXDPMLE1_DPMLETC_ML3(tc_port)	(8 << (4 * (tc_port)))
+#define   DFLEXDPMLE1_DPMLETC_ML3_2(tc_port)	(12 << (4 * (tc_port)))
+#define   DFLEXDPMLE1_DPMLETC_ML3_0(tc_port)	(15 << (4 * (tc_port)))
 
 /* BXT PHY Ref registers */
 #define _PORT_REF_DW3_A			0x16218C
-- 
GitLab


From 0014868b9c3c1dda1de6711cf58c3486fb422d07 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 2 Nov 2018 16:12:09 +0000
Subject: [PATCH 0686/1890] drm/i915: Mark pin flags as u64

Since the flags are being used to operate on a u64 variable, they too
need to be marked as such so that the inverses are full width (and not
zero extended on 32b kernels and bdw+).

Reported-by: Sergii Romantsov <sergii.romantsov@globallogic.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: stable@vger.kernel.org
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181102161232.17742-2-chris@chris-wilson.co.uk
(cherry picked from commit 83b466b1dc5f0b4d33f0a901e8b00197a8f3582d)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.h | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 5d2c5ba55ad83..28039290655cb 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -661,20 +661,20 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
 			u64 start, u64 end, unsigned int flags);
 
 /* Flags used by pin/bind&friends. */
-#define PIN_NONBLOCK		BIT(0)
-#define PIN_MAPPABLE		BIT(1)
-#define PIN_ZONE_4G		BIT(2)
-#define PIN_NONFAULT		BIT(3)
-#define PIN_NOEVICT		BIT(4)
-
-#define PIN_MBZ			BIT(5) /* I915_VMA_PIN_OVERFLOW */
-#define PIN_GLOBAL		BIT(6) /* I915_VMA_GLOBAL_BIND */
-#define PIN_USER		BIT(7) /* I915_VMA_LOCAL_BIND */
-#define PIN_UPDATE		BIT(8)
-
-#define PIN_HIGH		BIT(9)
-#define PIN_OFFSET_BIAS		BIT(10)
-#define PIN_OFFSET_FIXED	BIT(11)
+#define PIN_NONBLOCK		BIT_ULL(0)
+#define PIN_MAPPABLE		BIT_ULL(1)
+#define PIN_ZONE_4G		BIT_ULL(2)
+#define PIN_NONFAULT		BIT_ULL(3)
+#define PIN_NOEVICT		BIT_ULL(4)
+
+#define PIN_MBZ			BIT_ULL(5) /* I915_VMA_PIN_OVERFLOW */
+#define PIN_GLOBAL		BIT_ULL(6) /* I915_VMA_GLOBAL_BIND */
+#define PIN_USER		BIT_ULL(7) /* I915_VMA_LOCAL_BIND */
+#define PIN_UPDATE		BIT_ULL(8)
+
+#define PIN_HIGH		BIT_ULL(9)
+#define PIN_OFFSET_BIAS		BIT_ULL(10)
+#define PIN_OFFSET_FIXED	BIT_ULL(11)
 #define PIN_OFFSET_MASK		(-I915_GTT_PAGE_SIZE)
 
 #endif
-- 
GitLab


From 6a8915d0f8cf323e1beb792a33095cf652db4056 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Mon, 5 Nov 2018 21:46:04 +0200
Subject: [PATCH 0687/1890] drm/i915: Don't oops during modeset shutdown after
 lpe audio deinit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We deinit the lpe audio device before we call
drm_atomic_helper_shutdown(), which means the platform device
may already be gone when it comes time to shut down the crtc.
As we don't know when the last reference to the platform
device gets dropped by the audio driver we can't assume that
the device and its data are still around when turning off the
crtc. Mark the platform device as gone as soon as we do the
audio deinit.

Cc: stable@vger.kernel.org
Signed-off-by: Ville SyrjÃ¤lÃ¤ <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181105194604.6994-1-ville.syrjala@linux.intel.com
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
(cherry picked from commit f45a7977d1140c11f334e01a9f77177ed68e3bfa)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_lpe_audio.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_lpe_audio.c b/drivers/gpu/drm/i915/intel_lpe_audio.c
index cdf19553ffacd..5d5336fbe7b05 100644
--- a/drivers/gpu/drm/i915/intel_lpe_audio.c
+++ b/drivers/gpu/drm/i915/intel_lpe_audio.c
@@ -297,8 +297,10 @@ void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv)
 	lpe_audio_platdev_destroy(dev_priv);
 
 	irq_free_desc(dev_priv->lpe_audio.irq);
-}
 
+	dev_priv->lpe_audio.irq = -1;
+	dev_priv->lpe_audio.platdev = NULL;
+}
 
 /**
  * intel_lpe_audio_notify() - notify lpe audio event
-- 
GitLab


From 5e93a125f521efd00d71af31c2a301f3d46af48c Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 5 Nov 2018 12:28:07 +0100
Subject: [PATCH 0688/1890] ALSA: hda - Fix incorrect clearance of
 thinkpad_acpi hooks

Since the commit c647f806b8c2 ("ALSA: hda - Allow multiple ADCs for
mic mute LED controls") we allow enabling the mic mute LED with
multiple ADCs.  The commit changed the function return value to be
zero or a negative error, while this change was overlooked in the
thinkpad_acpi helper code where it still expects a positive return
value for success.  This eventually leads to a NULL dereference on a
system that has only a mic mute LED.

This patch corrects the return value check in the corresponding code
as well.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=201621
Fixes: c647f806b8c2 ("ALSA: hda - Allow multiple ADCs for mic mute LED controls")
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/thinkpad_helper.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sound/pci/hda/thinkpad_helper.c b/sound/pci/hda/thinkpad_helper.c
index 97f49b751e6eb..568575b72f2f7 100644
--- a/sound/pci/hda/thinkpad_helper.c
+++ b/sound/pci/hda/thinkpad_helper.c
@@ -58,8 +58,8 @@ static void hda_fixup_thinkpad_acpi(struct hda_codec *codec,
 			removefunc = false;
 		}
 		if (led_set_func(TPACPI_LED_MICMUTE, false) >= 0 &&
-		    snd_hda_gen_add_micmute_led(codec,
-						update_tpacpi_micmute) > 0)
+		    !snd_hda_gen_add_micmute_led(codec,
+						 update_tpacpi_micmute))
 			removefunc = false;
 	}
 
-- 
GitLab


From 506481b20e818db40b6198815904ecd2d6daee64 Mon Sep 17 00:00:00 2001
From: Robbie Ko <robbieko@synology.com>
Date: Tue, 30 Oct 2018 18:04:04 +0800
Subject: [PATCH 0689/1890] Btrfs: fix cur_offset in the error case for nocow

When the cow_file_range fails, the related resources are unlocked
according to the range [start..end), so the unlock cannot be repeated in
run_delalloc_nocow.

In some cases (e.g. cur_offset <= end && cow_start != -1), cur_offset is
not updated correctly, so move the cur_offset update before
cow_file_range.

  kernel BUG at mm/page-writeback.c:2663!
  Internal error: Oops - BUG: 0 [#1] SMP
  CPU: 3 PID: 31525 Comm: kworker/u8:7 Tainted: P O
  Hardware name: Realtek_RTD1296 (DT)
  Workqueue: writeback wb_workfn (flush-btrfs-1)
  task: ffffffc076db3380 ti: ffffffc02e9ac000 task.ti: ffffffc02e9ac000
  PC is at clear_page_dirty_for_io+0x1bc/0x1e8
  LR is at clear_page_dirty_for_io+0x14/0x1e8
  pc : [<ffffffc00033c91c>] lr : [<ffffffc00033c774>] pstate: 40000145
  sp : ffffffc02e9af4f0
  Process kworker/u8:7 (pid: 31525, stack limit = 0xffffffc02e9ac020)
  Call trace:
  [<ffffffc00033c91c>] clear_page_dirty_for_io+0x1bc/0x1e8
  [<ffffffbffc514674>] extent_clear_unlock_delalloc+0x1e4/0x210 [btrfs]
  [<ffffffbffc4fb168>] run_delalloc_nocow+0x3b8/0x948 [btrfs]
  [<ffffffbffc4fb948>] run_delalloc_range+0x250/0x3a8 [btrfs]
  [<ffffffbffc514c0c>] writepage_delalloc.isra.21+0xbc/0x1d8 [btrfs]
  [<ffffffbffc516048>] __extent_writepage+0xe8/0x248 [btrfs]
  [<ffffffbffc51630c>] extent_write_cache_pages.isra.17+0x164/0x378 [btrfs]
  [<ffffffbffc5185a8>] extent_writepages+0x48/0x68 [btrfs]
  [<ffffffbffc4f5828>] btrfs_writepages+0x20/0x30 [btrfs]
  [<ffffffc00033d758>] do_writepages+0x30/0x88
  [<ffffffc0003ba0f4>] __writeback_single_inode+0x34/0x198
  [<ffffffc0003ba6c4>] writeback_sb_inodes+0x184/0x3c0
  [<ffffffc0003ba96c>] __writeback_inodes_wb+0x6c/0xc0
  [<ffffffc0003bac20>] wb_writeback+0x1b8/0x1c0
  [<ffffffc0003bb0f0>] wb_workfn+0x150/0x250
  [<ffffffc0002b0014>] process_one_work+0x1dc/0x388
  [<ffffffc0002b02f0>] worker_thread+0x130/0x500
  [<ffffffc0002b6344>] kthread+0x10c/0x110
  [<ffffffc000284590>] ret_from_fork+0x10/0x40
  Code: d503201f a9025bb5 a90363b7 f90023b9 (d4210000)

CC: stable@vger.kernel.org # 4.4+
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Robbie Ko <robbieko@synology.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index f4d31fd62eed7..55761b1519f57 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1531,12 +1531,11 @@ static noinline int run_delalloc_nocow(struct inode *inode,
 	}
 	btrfs_release_path(path);
 
-	if (cur_offset <= end && cow_start == (u64)-1) {
+	if (cur_offset <= end && cow_start == (u64)-1)
 		cow_start = cur_offset;
-		cur_offset = end;
-	}
 
 	if (cow_start != (u64)-1) {
+		cur_offset = end;
 		ret = cow_file_range(inode, locked_page, cow_start, end, end,
 				     page_started, nr_written, 1, NULL);
 		if (ret)
-- 
GitLab


From fcd5e74288f7d36991b1f0fb96b8c57079645e38 Mon Sep 17 00:00:00 2001
From: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Date: Wed, 24 Oct 2018 20:24:03 +0800
Subject: [PATCH 0690/1890] btrfs: fix pinned underflow after transaction
 aborted

When running generic/475, we may get the following warning in dmesg:

[ 6902.102154] WARNING: CPU: 3 PID: 18013 at fs/btrfs/extent-tree.c:9776 btrfs_free_block_groups+0x2af/0x3b0 [btrfs]
[ 6902.109160] CPU: 3 PID: 18013 Comm: umount Tainted: G        W  O      4.19.0-rc8+ #8
[ 6902.110971] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
[ 6902.112857] RIP: 0010:btrfs_free_block_groups+0x2af/0x3b0 [btrfs]
[ 6902.118921] RSP: 0018:ffffc9000459bdb0 EFLAGS: 00010286
[ 6902.120315] RAX: ffff880175050bb0 RBX: ffff8801124a8000 RCX: 0000000000170007
[ 6902.121969] RDX: 0000000000000002 RSI: 0000000000170007 RDI: ffffffff8125fb74
[ 6902.123716] RBP: ffff880175055d10 R08: 0000000000000000 R09: 0000000000000000
[ 6902.125417] R10: 0000000000000000 R11: 0000000000000000 R12: ffff880175055d88
[ 6902.127129] R13: ffff880175050bb0 R14: 0000000000000000 R15: dead000000000100
[ 6902.129060] FS:  00007f4507223780(0000) GS:ffff88017ba00000(0000) knlGS:0000000000000000
[ 6902.130996] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 6902.132558] CR2: 00005623599cac78 CR3: 000000014b700001 CR4: 00000000003606e0
[ 6902.134270] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 6902.135981] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 6902.137836] Call Trace:
[ 6902.138939]  close_ctree+0x171/0x330 [btrfs]
[ 6902.140181]  ? kthread_stop+0x146/0x1f0
[ 6902.141277]  generic_shutdown_super+0x6c/0x100
[ 6902.142517]  kill_anon_super+0x14/0x30
[ 6902.143554]  btrfs_kill_super+0x13/0x100 [btrfs]
[ 6902.144790]  deactivate_locked_super+0x2f/0x70
[ 6902.146014]  cleanup_mnt+0x3b/0x70
[ 6902.147020]  task_work_run+0x9e/0xd0
[ 6902.148036]  do_syscall_64+0x470/0x600
[ 6902.149142]  ? trace_hardirqs_off_thunk+0x1a/0x1c
[ 6902.150375]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
[ 6902.151640] RIP: 0033:0x7f45077a6a7b
[ 6902.157324] RSP: 002b:00007ffd589f3e68 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6
[ 6902.159187] RAX: 0000000000000000 RBX: 000055e8eec732b0 RCX: 00007f45077a6a7b
[ 6902.160834] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 000055e8eec73490
[ 6902.162526] RBP: 0000000000000000 R08: 000055e8eec734b0 R09: 00007ffd589f26c0
[ 6902.164141] R10: 0000000000000000 R11: 0000000000000246 R12: 000055e8eec73490
[ 6902.165815] R13: 00007f4507ac61a4 R14: 0000000000000000 R15: 00007ffd589f40d8
[ 6902.167553] irq event stamp: 0
[ 6902.168998] hardirqs last  enabled at (0): [<0000000000000000>]           (null)
[ 6902.170731] hardirqs last disabled at (0): [<ffffffff810cd810>] copy_process.part.55+0x3b0/0x1f00
[ 6902.172773] softirqs last  enabled at (0): [<ffffffff810cd810>] copy_process.part.55+0x3b0/0x1f00
[ 6902.174671] softirqs last disabled at (0): [<0000000000000000>]           (null)
[ 6902.176407] ---[ end trace 463138c2986b275c ]---
[ 6902.177636] BTRFS info (device dm-3): space_info 4 has 273465344 free, is not full
[ 6902.179453] BTRFS info (device dm-3): space_info total=276824064, used=4685824, pinned=18446744073708158976, reserved=0, may_use=0, readonly=65536

In the above line there's "pinned=18446744073708158976" which is an
unsigned u64 value of -1392640, an obvious underflow.

When transaction_kthread is running cleanup_transaction(), another
fsstress is running btrfs_commit_transaction(). The
btrfs_finish_extent_commit() may get the same range as
btrfs_destroy_pinned_extent() got, which causes the pinned underflow.

Fixes: d4b450cd4b33 ("Btrfs: fix race between transaction commit and empty block group removal")
CC: stable@vger.kernel.org # 4.4+
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index b0ab41da91d12..00ee5e37e9897 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4359,13 +4359,23 @@ static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,
 	unpin = pinned_extents;
 again:
 	while (1) {
+		/*
+		 * The btrfs_finish_extent_commit() may get the same range as
+		 * ours between find_first_extent_bit and clear_extent_dirty.
+		 * Hence, hold the unused_bg_unpin_mutex to avoid double unpin
+		 * the same extent range.
+		 */
+		mutex_lock(&fs_info->unused_bg_unpin_mutex);
 		ret = find_first_extent_bit(unpin, 0, &start, &end,
 					    EXTENT_DIRTY, NULL);
-		if (ret)
+		if (ret) {
+			mutex_unlock(&fs_info->unused_bg_unpin_mutex);
 			break;
+		}
 
 		clear_extent_dirty(unpin, start, end);
 		btrfs_error_unpin_extent_range(fs_info, start, end);
+		mutex_unlock(&fs_info->unused_bg_unpin_mutex);
 		cond_resched();
 	}
 
-- 
GitLab


From 008c6753f7e070c77c70d708a6bf0255b4381763 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Mon, 29 Oct 2018 09:42:06 +0000
Subject: [PATCH 0691/1890] Btrfs: fix missing data checksums after a ranged
 fsync (msync)

Recently we got a massive simplification for fsync, where for the fast
path we no longer log new extents while their respective ordered extents
are still running.

However that simplification introduced a subtle regression for the case
where we use a ranged fsync (msync). Consider the following example:

               CPU 0                                    CPU 1

                                            mmap write to range [2Mb, 4Mb[
  mmap write to range [512Kb, 1Mb[
  msync range [512K, 1Mb[
    --> triggers fast fsync
        (BTRFS_INODE_NEEDS_FULL_SYNC
         not set)
    --> creates extent map A for this
        range and adds it to list of
        modified extents
    --> starts ordered extent A for
        this range
    --> waits for it to complete

                                            writeback triggered for range
                                            [2Mb, 4Mb[
                                              --> create extent map B and
                                                  adds it to the list of
                                                  modified extents
                                              --> creates ordered extent B

    --> start looking for and logging
        modified extents
    --> logs extent maps A and B
    --> finds checksums for extent A
        in the csum tree, but not for
        extent B
  fsync (msync) finishes

                                              --> ordered extent B
                                                  finishes and its
                                                  checksums are added
                                                  to the csum tree

                                <power cut>

After replaying the log, we have the extent covering the range [2Mb, 4Mb[
but do not have the data checksum items covering that file range.

This happens because at the very beginning of an fsync (btrfs_sync_file())
we start and wait for IO in the given range [512Kb, 1Mb[ and therefore
wait for any ordered extents in that range to complete before we start
logging the extents. However if right before we start logging the extent
in our range [512Kb, 1Mb[, writeback is started for any other dirty range,
such as the range [2Mb, 4Mb[ due to memory pressure or a concurrent fsync
or msync (btrfs_sync_file() starts writeback before acquiring the inode's
lock), an ordered extent is created for that other range and a new extent
map is created to represent that range and added to the inode's list of
modified extents.

That means that we will see that other extent in that list when collecting
extents for logging (done at btrfs_log_changed_extents()) and log the
extent before the respective ordered extent finishes - namely before the
checksum items are added to the checksums tree, which is where
log_extent_csums() looks for the checksums, therefore making us log an
extent without logging its checksums. Before that massive simplification
of fsync, this wasn't a problem because besides looking for checkums in
the checksums tree, we also looked for them in any ordered extent still
running.

The consequence of data checksums missing for a file range is that users
attempting to read the affected file range will get -EIO errors and dmesg
reports the following:

 [10188.358136] BTRFS info (device sdc): no csum found for inode 297 start 57344
 [10188.359278] BTRFS warning (device sdc): csum failed root 5 ino 297 off 57344 csum 0x98f94189 expected csum 0x00000000 mirror 1

So fix this by skipping extents outside of our logging range at
btrfs_log_changed_extents() and leaving them on the list of modified
extents so that any subsequent ranged fsync may collect them if needed.
Also, if we find a hole extent outside of the range still log it, just
to prevent having gaps between extent items after replaying the log,
otherwise fsck will complain when we are not using the NO_HOLES feature
(fstest btrfs/056 triggers such case).

Fixes: e7175a692765 ("btrfs: remove the wait ordered logic in the log_one_extent path")
CC: stable@vger.kernel.org # 4.19+
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tree-log.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index e07f3376b7dfc..a5ce99a6c9365 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4396,6 +4396,23 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
 	logged_end = end;
 
 	list_for_each_entry_safe(em, n, &tree->modified_extents, list) {
+		/*
+		 * Skip extents outside our logging range. It's important to do
+		 * it for correctness because if we don't ignore them, we may
+		 * log them before their ordered extent completes, and therefore
+		 * we could log them without logging their respective checksums
+		 * (the checksum items are added to the csum tree at the very
+		 * end of btrfs_finish_ordered_io()). Also leave such extents
+		 * outside of our range in the list, since we may have another
+		 * ranged fsync in the near future that needs them. If an extent
+		 * outside our range corresponds to a hole, log it to avoid
+		 * leaving gaps between extents (fsck will complain when we are
+		 * not using the NO_HOLES feature).
+		 */
+		if ((em->start > end || em->start + em->len <= start) &&
+		    em->block_start != EXTENT_MAP_HOLE)
+			continue;
+
 		list_del_init(&em->list);
 		/*
 		 * Just an arbitrary number, this can be really CPU intensive
-- 
GitLab


From 761333f2f50ccc887aa9957ae829300262c0d15b Mon Sep 17 00:00:00 2001
From: Shaokun Zhang <zhangshaokun@hisilicon.com>
Date: Mon, 5 Nov 2018 18:49:09 +0800
Subject: [PATCH 0692/1890] btrfs: tree-checker: Fix misleading group system
 information

block_group_err shows the group system as a decimal value with a '0x'
prefix, which is somewhat misleading.

Fix it to print hexadecimal, as was intended.

Fixes: fce466eab7ac6 ("btrfs: tree-checker: Verify block_group_item")
CC: stable@vger.kernel.org # 4.19+
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tree-checker.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index cab0b1f1f7417..efcf89a8ba44c 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -440,7 +440,7 @@ static int check_block_group_item(struct btrfs_fs_info *fs_info,
 	    type != (BTRFS_BLOCK_GROUP_METADATA |
 			   BTRFS_BLOCK_GROUP_DATA)) {
 		block_group_err(fs_info, leaf, slot,
-"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llu or 0x%llx",
+"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llx or 0x%llx",
 			type, hweight64(type),
 			BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA,
 			BTRFS_BLOCK_GROUP_SYSTEM,
-- 
GitLab


From 7e17916b35797396f681a3270245fd29c1e4c250 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sat, 3 Nov 2018 16:39:28 +0100
Subject: [PATCH 0693/1890] btrfs: avoid link error with CONFIG_NO_AUTO_INLINE

Note: this patch fixes a problem in a feature outside of btrfs ("kernel
hacking: add a config option to disable compiler auto-inlining") and is
applied ahead of time due to cross-subsystem dependencies.

On 32-bit ARM with gcc-8, I see a link error with the addition of the
CONFIG_NO_AUTO_INLINE option:

fs/btrfs/super.o: In function `btrfs_statfs':
super.c:(.text+0x67b8): undefined reference to `__aeabi_uldivmod'
super.c:(.text+0x67fc): undefined reference to `__aeabi_uldivmod'
super.c:(.text+0x6858): undefined reference to `__aeabi_uldivmod'
super.c:(.text+0x6920): undefined reference to `__aeabi_uldivmod'
super.c:(.text+0x693c): undefined reference to `__aeabi_uldivmod'
fs/btrfs/super.o:super.c:(.text+0x6958): more undefined references to `__aeabi_uldivmod' follow

So far this is the only file that shows the behavior, so I'd propose
to just work around it by marking the functions as 'static inline'
that normally get inlined here.

The reference to __aeabi_uldivmod comes from a div_u64() which has an
optimization for a constant division that uses a straight '/' operator
when the result should be known to the compiler. My interpretation is
that as we turn off inlining, gcc still expects the result to be constant
but fails to use that constant value.

Link: https://lkml.kernel.org/r/20181103153941.1881966-1-arnd@arndb.de
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Changbin Du <changbin.du@gmail.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
[ add the note ]
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/super.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index b362b45dd7578..cbc9d0d2c12de 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1916,7 +1916,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
 }
 
 /* Used to sort the devices by max_avail(descending sort) */
-static int btrfs_cmp_device_free_bytes(const void *dev_info1,
+static inline int btrfs_cmp_device_free_bytes(const void *dev_info1,
 				       const void *dev_info2)
 {
 	if (((struct btrfs_device_info *)dev_info1)->max_avail >
@@ -1945,8 +1945,8 @@ static inline void btrfs_descending_sort_devices(
  * The helper to calc the free space on the devices that can be used to store
  * file data.
  */
-static int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
-				       u64 *free_bytes)
+static inline int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
+					      u64 *free_bytes)
 {
 	struct btrfs_device_info *devices_info;
 	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
-- 
GitLab


From 4222ea7100c0e37adace2790c8822758bbeee179 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 24 Oct 2018 10:13:03 +0100
Subject: [PATCH 0694/1890] Btrfs: fix deadlock on tree root leaf when finding
 free extent

When we are writing out a free space cache, during the transaction commit
phase, we can end up in a deadlock which results in a stack trace like the
following:

 schedule+0x28/0x80
 btrfs_tree_read_lock+0x8e/0x120 [btrfs]
 ? finish_wait+0x80/0x80
 btrfs_read_lock_root_node+0x2f/0x40 [btrfs]
 btrfs_search_slot+0xf6/0x9f0 [btrfs]
 ? evict_refill_and_join+0xd0/0xd0 [btrfs]
 ? inode_insert5+0x119/0x190
 btrfs_lookup_inode+0x3a/0xc0 [btrfs]
 ? kmem_cache_alloc+0x166/0x1d0
 btrfs_iget+0x113/0x690 [btrfs]
 __lookup_free_space_inode+0xd8/0x150 [btrfs]
 lookup_free_space_inode+0x5b/0xb0 [btrfs]
 load_free_space_cache+0x7c/0x170 [btrfs]
 ? cache_block_group+0x72/0x3b0 [btrfs]
 cache_block_group+0x1b3/0x3b0 [btrfs]
 ? finish_wait+0x80/0x80
 find_free_extent+0x799/0x1010 [btrfs]
 btrfs_reserve_extent+0x9b/0x180 [btrfs]
 btrfs_alloc_tree_block+0x1b3/0x4f0 [btrfs]
 __btrfs_cow_block+0x11d/0x500 [btrfs]
 btrfs_cow_block+0xdc/0x180 [btrfs]
 btrfs_search_slot+0x3bd/0x9f0 [btrfs]
 btrfs_lookup_inode+0x3a/0xc0 [btrfs]
 ? kmem_cache_alloc+0x166/0x1d0
 btrfs_update_inode_item+0x46/0x100 [btrfs]
 cache_save_setup+0xe4/0x3a0 [btrfs]
 btrfs_start_dirty_block_groups+0x1be/0x480 [btrfs]
 btrfs_commit_transaction+0xcb/0x8b0 [btrfs]

At cache_save_setup() we need to update the inode item of a block group's
cache which is located in the tree root (fs_info->tree_root), which means
that it may result in COWing a leaf from that tree. If that happens we
need to find a free metadata extent and while looking for one, if we find
a block group which was not cached yet we attempt to load its cache by
calling cache_block_group(). However this function will try to load the
inode of the free space cache, which requires finding the matching inode
item in the tree root - if that inode item is located in the same leaf as
the inode item of the space cache we are updating at cache_save_setup(),
we end up in a deadlock, since we try to obtain a read lock on the same
extent buffer that we previously write locked.

So fix this by using the tree root's commit root when searching for a
block group's free space cache inode item when we are attempting to load
a free space cache. This is safe since block groups once loaded stay in
memory forever, as well as their caches, so after they are first loaded
we will never need to read their inode items again. For new block groups,
once they are created they get their ->cached field set to
BTRFS_CACHE_FINISHED meaning we will not need to read their inode item.

Reported-by: Andrew Nelson <andrew.s.nelson@gmail.com>
Link: https://lore.kernel.org/linux-btrfs/CAPTELenq9x5KOWuQ+fa7h1r3nsJG8vyiTH8+ifjURc_duHh2Wg@mail.gmail.com/
Fixes: 9d66e233c704 ("Btrfs: load free space cache if it exists")
Tested-by: Andrew Nelson <andrew.s.nelson@gmail.com>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h            |  3 +++
 fs/btrfs/free-space-cache.c | 22 +++++++++++++++++++++-
 fs/btrfs/inode.c            | 32 ++++++++++++++++++++++----------
 3 files changed, 46 insertions(+), 11 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 68ca41dbbef38..f7f955109d8b5 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3163,6 +3163,9 @@ void btrfs_destroy_inode(struct inode *inode);
 int btrfs_drop_inode(struct inode *inode);
 int __init btrfs_init_cachep(void);
 void __cold btrfs_destroy_cachep(void);
+struct inode *btrfs_iget_path(struct super_block *s, struct btrfs_key *location,
+			      struct btrfs_root *root, int *new,
+			      struct btrfs_path *path);
 struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
 			 struct btrfs_root *root, int *was_new);
 struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 4ba0aedc878bd..74aa552f47930 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -75,7 +75,8 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
 	 * sure NOFS is set to keep us from deadlocking.
 	 */
 	nofs_flag = memalloc_nofs_save();
-	inode = btrfs_iget(fs_info->sb, &location, root, NULL);
+	inode = btrfs_iget_path(fs_info->sb, &location, root, NULL, path);
+	btrfs_release_path(path);
 	memalloc_nofs_restore(nofs_flag);
 	if (IS_ERR(inode))
 		return inode;
@@ -838,6 +839,25 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
 	path->search_commit_root = 1;
 	path->skip_locking = 1;
 
+	/*
+	 * We must pass a path with search_commit_root set to btrfs_iget in
+	 * order to avoid a deadlock when allocating extents for the tree root.
+	 *
+	 * When we are COWing an extent buffer from the tree root, when looking
+	 * for a free extent, at extent-tree.c:find_free_extent(), we can find
+	 * block group without its free space cache loaded. When we find one
+	 * we must load its space cache which requires reading its free space
+	 * cache's inode item from the root tree. If this inode item is located
+	 * in the same leaf that we started COWing before, then we end up in
+	 * deadlock on the extent buffer (trying to read lock it when we
+	 * previously write locked it).
+	 *
+	 * It's safe to read the inode item using the commit root because
+	 * block groups, once loaded, stay in memory forever (until they are
+	 * removed) as well as their space caches once loaded. New block groups
+	 * once created get their ->cached field set to BTRFS_CACHE_FINISHED so
+	 * we will never try to read their inode item while the fs is mounted.
+	 */
 	inode = lookup_free_space_inode(fs_info, block_group, path);
 	if (IS_ERR(inode)) {
 		btrfs_free_path(path);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 55761b1519f57..e71daadd2f75a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3569,10 +3569,11 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
 /*
  * read an inode from the btree into the in-memory inode
  */
-static int btrfs_read_locked_inode(struct inode *inode)
+static int btrfs_read_locked_inode(struct inode *inode,
+				   struct btrfs_path *in_path)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-	struct btrfs_path *path;
+	struct btrfs_path *path = in_path;
 	struct extent_buffer *leaf;
 	struct btrfs_inode_item *inode_item;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -3588,15 +3589,18 @@ static int btrfs_read_locked_inode(struct inode *inode)
 	if (!ret)
 		filled = true;
 
-	path = btrfs_alloc_path();
-	if (!path)
-		return -ENOMEM;
+	if (!path) {
+		path = btrfs_alloc_path();
+		if (!path)
+			return -ENOMEM;
+	}
 
 	memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
 
 	ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
 	if (ret) {
-		btrfs_free_path(path);
+		if (path != in_path)
+			btrfs_free_path(path);
 		return ret;
 	}
 
@@ -3721,7 +3725,8 @@ static int btrfs_read_locked_inode(struct inode *inode)
 				  btrfs_ino(BTRFS_I(inode)),
 				  root->root_key.objectid, ret);
 	}
-	btrfs_free_path(path);
+	if (path != in_path)
+		btrfs_free_path(path);
 
 	if (!maybe_acls)
 		cache_no_acl(inode);
@@ -5643,8 +5648,9 @@ static struct inode *btrfs_iget_locked(struct super_block *s,
 /* Get an inode object given its location and corresponding root.
  * Returns in *is_new if the inode was read from disk
  */
-struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
-			 struct btrfs_root *root, int *new)
+struct inode *btrfs_iget_path(struct super_block *s, struct btrfs_key *location,
+			      struct btrfs_root *root, int *new,
+			      struct btrfs_path *path)
 {
 	struct inode *inode;
 
@@ -5655,7 +5661,7 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
 	if (inode->i_state & I_NEW) {
 		int ret;
 
-		ret = btrfs_read_locked_inode(inode);
+		ret = btrfs_read_locked_inode(inode, path);
 		if (!ret) {
 			inode_tree_add(inode);
 			unlock_new_inode(inode);
@@ -5677,6 +5683,12 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
 	return inode;
 }
 
+struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
+			 struct btrfs_root *root, int *new)
+{
+	return btrfs_iget_path(s, location, root, new, NULL);
+}
+
 static struct inode *new_simple_dir(struct super_block *s,
 				    struct btrfs_key *key,
 				    struct btrfs_root *root)
-- 
GitLab


From 11023d3f5fdf89bba5e1142127701ca6e6014587 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Mon, 5 Nov 2018 11:14:05 +0000
Subject: [PATCH 0695/1890] Btrfs: fix infinite loop on inode eviction after
 deduplication of eof block

If we attempt to deduplicate the last block of a file A into the middle of
a file B, and file A's size is not a multiple of the block size, we end
rounding the deduplication length to 0 bytes, to avoid the data corruption
issue fixed by commit de02b9f6bb65 ("Btrfs: fix data corruption when
deduplicating between different files"). However a length of zero will
cause the insertion of an extent state with a start value greater (by 1)
then the end value, leading to a corrupt extent state that will trigger a
warning and cause chaos such as an infinite loop during inode eviction.
Example trace:

 [96049.833585] ------------[ cut here ]------------
 [96049.833714] WARNING: CPU: 0 PID: 24448 at fs/btrfs/extent_io.c:436 insert_state+0x101/0x120 [btrfs]
 [96049.833767] CPU: 0 PID: 24448 Comm: xfs_io Not tainted 4.19.0-rc7-btrfs-next-39 #1
 [96049.833768] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.2-0-gf9626ccb91-prebuilt.qemu-project.org 04/01/2014
 [96049.833780] RIP: 0010:insert_state+0x101/0x120 [btrfs]
 [96049.833783] RSP: 0018:ffffafd2c3707af0 EFLAGS: 00010282
 [96049.833785] RAX: 0000000000000000 RBX: 000000000004dfff RCX: 0000000000000006
 [96049.833786] RDX: 0000000000000007 RSI: ffff99045c143230 RDI: ffff99047b2168a0
 [96049.833787] RBP: ffff990457851cd0 R08: 0000000000000001 R09: 0000000000000000
 [96049.833787] R10: ffffafd2c3707ab8 R11: 0000000000000000 R12: ffff9903b93b12c8
 [96049.833788] R13: 000000000004e000 R14: ffffafd2c3707b80 R15: ffffafd2c3707b78
 [96049.833790] FS:  00007f5c14e7d700(0000) GS:ffff99047b200000(0000) knlGS:0000000000000000
 [96049.833791] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 [96049.833792] CR2: 00007f5c146abff8 CR3: 0000000115f4c004 CR4: 00000000003606f0
 [96049.833795] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
 [96049.833796] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
 [96049.833796] Call Trace:
 [96049.833809]  __set_extent_bit+0x46c/0x6a0 [btrfs]
 [96049.833823]  lock_extent_bits+0x6b/0x210 [btrfs]
 [96049.833831]  ? _raw_spin_unlock+0x24/0x30
 [96049.833841]  ? test_range_bit+0xdf/0x130 [btrfs]
 [96049.833853]  lock_extent_range+0x8e/0x150 [btrfs]
 [96049.833864]  btrfs_double_extent_lock+0x78/0xb0 [btrfs]
 [96049.833875]  btrfs_extent_same_range+0x14e/0x550 [btrfs]
 [96049.833885]  ? rcu_read_lock_sched_held+0x3f/0x70
 [96049.833890]  ? __kmalloc_node+0x2b0/0x2f0
 [96049.833899]  ? btrfs_dedupe_file_range+0x19a/0x280 [btrfs]
 [96049.833909]  btrfs_dedupe_file_range+0x270/0x280 [btrfs]
 [96049.833916]  vfs_dedupe_file_range_one+0xd9/0xe0
 [96049.833919]  vfs_dedupe_file_range+0x131/0x1b0
 [96049.833924]  do_vfs_ioctl+0x272/0x6e0
 [96049.833927]  ? __fget+0x113/0x200
 [96049.833931]  ksys_ioctl+0x70/0x80
 [96049.833933]  __x64_sys_ioctl+0x16/0x20
 [96049.833937]  do_syscall_64+0x60/0x1b0
 [96049.833939]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
 [96049.833941] RIP: 0033:0x7f5c1478ddd7
 [96049.833943] RSP: 002b:00007ffe15b196a8 EFLAGS: 00000202 ORIG_RAX: 0000000000000010
 [96049.833945] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f5c1478ddd7
 [96049.833946] RDX: 00005625ece322d0 RSI: 00000000c0189436 RDI: 0000000000000004
 [96049.833947] RBP: 0000000000000000 R08: 00007f5c14a46f48 R09: 0000000000000040
 [96049.833948] R10: 0000000000000541 R11: 0000000000000202 R12: 0000000000000000
 [96049.833949] R13: 0000000000000000 R14: 0000000000000004 R15: 00005625ece322d0
 [96049.833954] irq event stamp: 6196
 [96049.833956] hardirqs last  enabled at (6195): [<ffffffff91b00663>] console_unlock+0x503/0x640
 [96049.833958] hardirqs last disabled at (6196): [<ffffffff91a037dd>] trace_hardirqs_off_thunk+0x1a/0x1c
 [96049.833959] softirqs last  enabled at (6114): [<ffffffff92600370>] __do_softirq+0x370/0x421
 [96049.833964] softirqs last disabled at (6095): [<ffffffff91a8dd4d>] irq_exit+0xcd/0xe0
 [96049.833965] ---[ end trace db7b05f01b7fa10c ]---
 [96049.935816] R13: 0000000000000000 R14: 00005562e5259240 R15: 00007ffff092b910
 [96049.935822] irq event stamp: 6584
 [96049.935823] hardirqs last  enabled at (6583): [<ffffffff91b00663>] console_unlock+0x503/0x640
 [96049.935825] hardirqs last disabled at (6584): [<ffffffff91a037dd>] trace_hardirqs_off_thunk+0x1a/0x1c
 [96049.935827] softirqs last  enabled at (6328): [<ffffffff92600370>] __do_softirq+0x370/0x421
 [96049.935828] softirqs last disabled at (6313): [<ffffffff91a8dd4d>] irq_exit+0xcd/0xe0
 [96049.935829] ---[ end trace db7b05f01b7fa123 ]---
 [96049.935840] ------------[ cut here ]------------
 [96049.936065] WARNING: CPU: 1 PID: 24463 at fs/btrfs/extent_io.c:436 insert_state+0x101/0x120 [btrfs]
 [96049.936107] CPU: 1 PID: 24463 Comm: umount Tainted: G        W         4.19.0-rc7-btrfs-next-39 #1
 [96049.936108] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.2-0-gf9626ccb91-prebuilt.qemu-project.org 04/01/2014
 [96049.936117] RIP: 0010:insert_state+0x101/0x120 [btrfs]
 [96049.936119] RSP: 0018:ffffafd2c3637bc0 EFLAGS: 00010282
 [96049.936120] RAX: 0000000000000000 RBX: 000000000004dfff RCX: 0000000000000006
 [96049.936121] RDX: 0000000000000007 RSI: ffff990445cf88e0 RDI: ffff99047b2968a0
 [96049.936122] RBP: ffff990457851cd0 R08: 0000000000000001 R09: 0000000000000000
 [96049.936123] R10: ffffafd2c3637b88 R11: 0000000000000000 R12: ffff9904574301e8
 [96049.936124] R13: 000000000004e000 R14: ffffafd2c3637c50 R15: ffffafd2c3637c48
 [96049.936125] FS:  00007fe4b87e72c0(0000) GS:ffff99047b280000(0000) knlGS:0000000000000000
 [96049.936126] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 [96049.936128] CR2: 00005562e52618d8 CR3: 00000001151c8005 CR4: 00000000003606e0
 [96049.936129] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
 [96049.936131] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
 [96049.936131] Call Trace:
 [96049.936141]  __set_extent_bit+0x46c/0x6a0 [btrfs]
 [96049.936154]  lock_extent_bits+0x6b/0x210 [btrfs]
 [96049.936167]  btrfs_evict_inode+0x1e1/0x5a0 [btrfs]
 [96049.936172]  evict+0xbf/0x1c0
 [96049.936174]  dispose_list+0x51/0x80
 [96049.936176]  evict_inodes+0x193/0x1c0
 [96049.936180]  generic_shutdown_super+0x3f/0x110
 [96049.936182]  kill_anon_super+0xe/0x30
 [96049.936189]  btrfs_kill_super+0x13/0x100 [btrfs]
 [96049.936191]  deactivate_locked_super+0x3a/0x70
 [96049.936193]  cleanup_mnt+0x3b/0x80
 [96049.936195]  task_work_run+0x93/0xc0
 [96049.936198]  exit_to_usermode_loop+0xfa/0x100
 [96049.936201]  do_syscall_64+0x17f/0x1b0
 [96049.936202]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
 [96049.936204] RIP: 0033:0x7fe4b80cfb37
 [96049.936206] RSP: 002b:00007ffff092b688 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6
 [96049.936207] RAX: 0000000000000000 RBX: 00005562e5259060 RCX: 00007fe4b80cfb37
 [96049.936208] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 00005562e525faa0
 [96049.936209] RBP: 00005562e525faa0 R08: 00005562e525f770 R09: 0000000000000015
 [96049.936210] R10: 00000000000006b4 R11: 0000000000000246 R12: 00007fe4b85d1e64
 [96049.936211] R13: 0000000000000000 R14: 00005562e5259240 R15: 00007ffff092b910
 [96049.936211] R13: 0000000000000000 R14: 00005562e5259240 R15: 00007ffff092b910
 [96049.936216] irq event stamp: 6616
 [96049.936219] hardirqs last  enabled at (6615): [<ffffffff91b00663>] console_unlock+0x503/0x640
 [96049.936219] hardirqs last disabled at (6616): [<ffffffff91a037dd>] trace_hardirqs_off_thunk+0x1a/0x1c
 [96049.936222] softirqs last  enabled at (6328): [<ffffffff92600370>] __do_softirq+0x370/0x421
 [96049.936222] softirqs last disabled at (6313): [<ffffffff91a8dd4d>] irq_exit+0xcd/0xe0
 [96049.936223] ---[ end trace db7b05f01b7fa124 ]---

The second stack trace, from inode eviction, is repeated forever due to
the infinite loop during eviction.

This is the same type of problem fixed way back in 2015 by commit
113e8283869b ("Btrfs: fix inode eviction infinite loop after extent_same
ioctl") and commit ccccf3d67294 ("Btrfs: fix inode eviction infinite loop
after cloning into it").

So fix this by returning immediately if the deduplication range length
gets rounded down to 0 bytes, as there is nothing that needs to be done in
such case.

Example reproducer:

 $ mkfs.btrfs -f /dev/sdb
 $ mount /dev/sdb /mnt

 $ xfs_io -f -c "pwrite -S 0xe6 0 100" /mnt/foo
 $ xfs_io -f -c "pwrite -S 0xe6 0 1M" /mnt/bar

 # Unmount the filesystem and mount it again so that we start without any
 # extent state records when we ask for the deduplication.
 $ umount /mnt
 $ mount /dev/sdb /mnt

 $ xfs_io -c "dedupe /mnt/foo 0 500K 100" /mnt/bar

 # This unmount triggers the infinite loop.
 $ umount /mnt

A test case for fstests will follow soon.

Fixes: de02b9f6bb65 ("Btrfs: fix data corruption when deduplicating between different files")
CC: <stable@vger.kernel.org> # 4.19+
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ioctl.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index a990a90451392..ed3ba55f65ac1 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3488,6 +3488,8 @@ static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen,
 			const u64 sz = BTRFS_I(src)->root->fs_info->sectorsize;
 
 			len = round_down(i_size_read(src), sz) - loff;
+			if (len == 0)
+				return 0;
 			olen = len;
 		}
 	}
-- 
GitLab


From ac765f83f1397646c11092a032d4f62c3d478b81 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Mon, 5 Nov 2018 11:14:17 +0000
Subject: [PATCH 0696/1890] Btrfs: fix data corruption due to cloning of eof
 block

We currently allow cloning a range from a file which includes the last
block of the file even if the file's size is not aligned to the block
size. This is fine and useful when the destination file has the same size,
but when it does not and the range ends somewhere in the middle of the
destination file, it leads to corruption because the bytes between the EOF
and the end of the block have undefined data (when there is support for
discard/trimming they have a value of 0x00).

Example:

 $ mkfs.btrfs -f /dev/sdb
 $ mount /dev/sdb /mnt

 $ export foo_size=$((256 * 1024 + 100))
 $ xfs_io -f -c "pwrite -S 0x3c 0 $foo_size" /mnt/foo
 $ xfs_io -f -c "pwrite -S 0xb5 0 1M" /mnt/bar

 $ xfs_io -c "reflink /mnt/foo 0 512K $foo_size" /mnt/bar

 $ od -A d -t x1 /mnt/bar
 0000000 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5
 *
 0524288 3c 3c 3c 3c 3c 3c 3c 3c 3c 3c 3c 3c 3c 3c 3c 3c
 *
 0786528 3c 3c 3c 3c 00 00 00 00 00 00 00 00 00 00 00 00
 0786544 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
 *
 0790528 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5
 *
 1048576

The bytes in the range from 786532 (512Kb + 256Kb + 100 bytes) to 790527
(512Kb + 256Kb + 4Kb - 1) got corrupted, having now a value of 0x00 instead
of 0xb5.

This is similar to the problem we had for deduplication that got recently
fixed by commit de02b9f6bb65 ("Btrfs: fix data corruption when
deduplicating between different files").

Fix this by not allowing such operations to be performed and return the
errno -EINVAL to user space. This is what XFS is doing as well at the VFS
level. This change however now makes us return -EINVAL instead of
-EOPNOTSUPP for cases where the source range maps to an inline extent and
the destination range's end is smaller then the destination file's size,
since the detection of inline extents is done during the actual process of
dropping file extent items (at __btrfs_drop_extents()). Returning the
-EINVAL error is done early on and solely based on the input parameters
(offsets and length) and destination file's size. This makes us consistent
with XFS and anyone else supporting cloning since this case is now checked
at a higher level in the VFS and is where the -EINVAL will be returned
from starting with kernel 4.20 (the VFS changed was introduced in 4.20-rc1
by commit 07d19dc9fbe9 ("vfs: avoid problematic remapping requests into
partial EOF block"). So this change is more geared towards stable kernels,
as it's unlikely the new VFS checks get removed intentionally.

A test case for fstests follows soon, as well as an update to filter
existing tests that expect -EOPNOTSUPP to accept -EINVAL as well.

CC: <stable@vger.kernel.org> # 4.4+
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ioctl.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index ed3ba55f65ac1..95f9625dccc4a 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -4279,9 +4279,17 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
 		goto out_unlock;
 	if (len == 0)
 		olen = len = src->i_size - off;
-	/* if we extend to eof, continue to block boundary */
-	if (off + len == src->i_size)
+	/*
+	 * If we extend to eof, continue to block boundary if and only if the
+	 * destination end offset matches the destination file's size, otherwise
+	 * we would be corrupting data by placing the eof block into the middle
+	 * of a file.
+	 */
+	if (off + len == src->i_size) {
+		if (!IS_ALIGNED(len, bs) && destoff + len < inode->i_size)
+			goto out_unlock;
 		len = ALIGN(src->i_size, bs) - off;
+	}
 
 	if (len == 0) {
 		ret = 0;
-- 
GitLab


From 19ed3e2dd8549c1a34914e8dad01b64e7837645a Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Mon, 5 Nov 2018 10:18:58 +0800
Subject: [PATCH 0697/1890] iommu/vt-d: Fix NULL pointer dereference in
 prq_event_thread()

When handling page request without pasid event, go to "no_pasid"
branch instead of "bad_req". Otherwise, a NULL pointer deference
will happen there.

Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Sohil Mehta <sohil.mehta@intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Fixes: a222a7f0bb6c9 'iommu/vt-d: Implement page request handling'
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-svm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index db301efe126d4..8871509075267 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -595,7 +595,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
 			pr_err("%s: Page request without PASID: %08llx %08llx\n",
 			       iommu->name, ((unsigned long long *)req)[0],
 			       ((unsigned long long *)req)[1]);
-			goto bad_req;
+			goto no_pasid;
 		}
 
 		if (!svm || svm->pasid != req->pasid) {
-- 
GitLab


From 132bf6723749f7219c399831eeb286dbbb985429 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Tue, 6 Nov 2018 07:50:50 -0800
Subject: [PATCH 0698/1890] xfs: Fix error code in 'xfs_ioc_getbmap()'

In this function, once 'buf' has been allocated, we unconditionally
return 0.
However, 'error' is set to some error codes in several error handling
paths.
Before commit 232b51948b99 ("xfs: simplify the xfs_getbmap interface")
this was not an issue because all error paths were returning directly,
but now that some cleanup at the end may be needed, we must propagate the
error code.

Fixes: 232b51948b99 ("xfs: simplify the xfs_getbmap interface")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_ioctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 6e2c08f30f602..6ecdbb3af7de5 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1608,7 +1608,7 @@ xfs_ioc_getbmap(
 	error = 0;
 out_free_buf:
 	kmem_free(buf);
-	return 0;
+	return error;
 }
 
 struct getfsmap_info {
-- 
GitLab


From bdec055bb9f262964c4f5cb330dab26646c345c6 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 6 Nov 2018 07:50:50 -0800
Subject: [PATCH 0699/1890] xfs: print buffer offsets when dumping corrupt
 buffers

Use DUMP_PREFIX_OFFSET when printing hex dumps of corrupt buffers
because modern Linux now prints a 32-bit hash of our 64-bit pointer when
using DUMP_PREFIX_ADDRESS:

00000000b4bb4297: 00 00 00 00 00 00 00 00 3b ee 00 00 00 00 00 00  ........;.......
00000005ec77e26: 00 00 00 00 02 d0 5a 00 00 00 00 00 00 00 00 00  ......Z.........
000000015938018: 21 98 e8 b4 fd de 4c 07 bc ea 3c e5 ae b4 7c 48  !.....L...<...|H

This is totally worthless for a sequential dump since we probably only
care about tracking the buffer offsets and afaik there's no way to
recover the actual pointer from the hashed value.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/xfs_message.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c
index 576c375ce12a8..6b736ea58d354 100644
--- a/fs/xfs/xfs_message.c
+++ b/fs/xfs/xfs_message.c
@@ -107,5 +107,5 @@ assfail(char *expr, char *file, int line)
 void
 xfs_hex_dump(void *p, int length)
 {
-	print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_ADDRESS, 16, 1, p, length, 1);
+	print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_OFFSET, 16, 1, p, length, 1);
 }
-- 
GitLab


From 837514f7a4ca4aca06aec5caa5ff56d33ef06976 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Tue, 6 Nov 2018 07:50:50 -0800
Subject: [PATCH 0700/1890] xfs: fix overflow in xfs_attr3_leaf_verify

generic/070 on 64k block size filesystems is failing with a verifier
corruption on writeback or an attribute leaf block:

[   94.973083] XFS (pmem0): Metadata corruption detected at xfs_attr3_leaf_verify+0x246/0x260, xfs_attr3_leaf block 0x811480
[   94.975623] XFS (pmem0): Unmount and run xfs_repair
[   94.976720] XFS (pmem0): First 128 bytes of corrupted metadata buffer:
[   94.978270] 000000004b2e7b45: 00 00 00 00 00 00 00 00 3b ee 00 00 00 00 00 00  ........;.......
[   94.980268] 000000006b1db90b: 00 00 00 00 00 81 14 80 00 00 00 00 00 00 00 00  ................
[   94.982251] 00000000433f2407: 22 7b 5c 82 2d 5c 47 4c bb 31 1c 37 fa a9 ce d6  "{\.-\GL.1.7....
[   94.984157] 0000000010dc7dfb: 00 00 00 00 00 81 04 8a 00 0a 18 e8 dd 94 01 00  ................
[   94.986215] 00000000d5a19229: 00 a0 dc f4 fe 98 01 68 f0 d8 07 e0 00 00 00 00  .......h........
[   94.988171] 00000000521df36c: 0c 2d 32 e2 fe 20 01 00 0c 2d 58 65 fe 0c 01 00  .-2.. ...-Xe....
[   94.990162] 000000008477ae06: 0c 2d 5b 66 fe 8c 01 00 0c 2d 71 35 fe 7c 01 00  .-[f.....-q5.|..
[   94.992139] 00000000a4a6bca6: 0c 2d 72 37 fc d4 01 00 0c 2d d8 b8 f0 90 01 00  .-r7.....-......
[   94.994789] XFS (pmem0): xfs_do_force_shutdown(0x8) called from line 1453 of file fs/xfs/xfs_buf.c. Return address = ffffffff815365f3

This is failing this check:

                end = ichdr.freemap[i].base + ichdr.freemap[i].size;
                if (end < ichdr.freemap[i].base)
>>>>>                   return __this_address;
                if (end > mp->m_attr_geo->blksize)
                        return __this_address;

And from the buffer output above, the freemap array is:

	freemap[0].base = 0x00a0
	freemap[0].size = 0xdcf4	end = 0xdd94
	freemap[1].base = 0xfe98
	freemap[1].size = 0x0168	end = 0x10000
	freemap[2].base = 0xf0d8
	freemap[2].size = 0x07e0	end = 0xf8b8

These all look valid - the block size is 0x10000 and so from the
last check in the above verifier fragment we know that the end
of freemap[1] is valid. The problem is that end is declared as:

	uint16_t	end;

And (uint16_t)0x10000 = 0. So we have a verifier bug here, not a
corruption. Fix the verifier to use uint32_t types for the check and
hence avoid the overflow.

Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=201577
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_attr_leaf.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 6fc5425b1474a..2652d00842d6b 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -243,7 +243,7 @@ xfs_attr3_leaf_verify(
 	struct xfs_mount		*mp = bp->b_target->bt_mount;
 	struct xfs_attr_leafblock	*leaf = bp->b_addr;
 	struct xfs_attr_leaf_entry	*entries;
-	uint16_t			end;
+	uint32_t			end;	/* must be 32bit - see below */
 	int				i;
 
 	xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf);
@@ -293,6 +293,11 @@ xfs_attr3_leaf_verify(
 	/*
 	 * Quickly check the freemap information.  Attribute data has to be
 	 * aligned to 4-byte boundaries, and likewise for the free space.
+	 *
+	 * Note that for 64k block size filesystems, the freemap entries cannot
+	 * overflow as they are only be16 fields. However, when checking end
+	 * pointer of the freemap, we have to be careful to detect overflows and
+	 * so use uint32_t for those checks.
 	 */
 	for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
 		if (ichdr.freemap[i].base > mp->m_attr_geo->blksize)
@@ -303,7 +308,9 @@ xfs_attr3_leaf_verify(
 			return __this_address;
 		if (ichdr.freemap[i].size & 0x3)
 			return __this_address;
-		end = ichdr.freemap[i].base + ichdr.freemap[i].size;
+
+		/* be care of 16 bit overflows here */
+		end = (uint32_t)ichdr.freemap[i].base + ichdr.freemap[i].size;
 		if (end < ichdr.freemap[i].base)
 			return __this_address;
 		if (end > mp->m_attr_geo->blksize)
-- 
GitLab


From 9de9aa45e9bd67232e000cca42ceb134b8ae51b6 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Tue, 16 Oct 2018 11:56:26 +0300
Subject: [PATCH 0701/1890] tools/power/cpupower: fix compilation with
 STATIC=true

Rename duplicate sysfs_read_file into cpupower_read_sysfs and fix linking.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Acked-by: Thomas Renninger <trenn@suse.de>
Cc: <stable@vger.kernel.org>
Signed-off-by: Shuah Khan (Samsung OSG) <shuah@kernel.org>
---
 tools/power/cpupower/bench/Makefile        | 2 +-
 tools/power/cpupower/lib/cpufreq.c         | 2 +-
 tools/power/cpupower/lib/cpuidle.c         | 2 +-
 tools/power/cpupower/lib/cpupower.c        | 4 ++--
 tools/power/cpupower/lib/cpupower_intern.h | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tools/power/cpupower/bench/Makefile b/tools/power/cpupower/bench/Makefile
index d79ab161cc75f..f68b4bc552739 100644
--- a/tools/power/cpupower/bench/Makefile
+++ b/tools/power/cpupower/bench/Makefile
@@ -9,7 +9,7 @@ endif
 ifeq ($(strip $(STATIC)),true)
 LIBS = -L../ -L$(OUTPUT) -lm
 OBJS = $(OUTPUT)main.o $(OUTPUT)parse.o $(OUTPUT)system.o $(OUTPUT)benchmark.o \
-       $(OUTPUT)../lib/cpufreq.o $(OUTPUT)../lib/sysfs.o
+       $(OUTPUT)../lib/cpufreq.o $(OUTPUT)../lib/cpupower.o
 else
 LIBS = -L../ -L$(OUTPUT) -lm -lcpupower
 OBJS = $(OUTPUT)main.o $(OUTPUT)parse.o $(OUTPUT)system.o $(OUTPUT)benchmark.o
diff --git a/tools/power/cpupower/lib/cpufreq.c b/tools/power/cpupower/lib/cpufreq.c
index 1b993fe1ce237..0c0f3e3f0d803 100644
--- a/tools/power/cpupower/lib/cpufreq.c
+++ b/tools/power/cpupower/lib/cpufreq.c
@@ -28,7 +28,7 @@ static unsigned int sysfs_cpufreq_read_file(unsigned int cpu, const char *fname,
 
 	snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpufreq/%s",
 			 cpu, fname);
-	return sysfs_read_file(path, buf, buflen);
+	return cpupower_read_sysfs(path, buf, buflen);
 }
 
 /* helper function to write a new value to a /sys file */
diff --git a/tools/power/cpupower/lib/cpuidle.c b/tools/power/cpupower/lib/cpuidle.c
index 9bd4c7655fdb2..852d25462388c 100644
--- a/tools/power/cpupower/lib/cpuidle.c
+++ b/tools/power/cpupower/lib/cpuidle.c
@@ -319,7 +319,7 @@ static unsigned int sysfs_cpuidle_read_file(const char *fname, char *buf,
 
 	snprintf(path, sizeof(path), PATH_TO_CPU "cpuidle/%s", fname);
 
-	return sysfs_read_file(path, buf, buflen);
+	return cpupower_read_sysfs(path, buf, buflen);
 }
 
 
diff --git a/tools/power/cpupower/lib/cpupower.c b/tools/power/cpupower/lib/cpupower.c
index 9c395ec924def..9711d628b0f44 100644
--- a/tools/power/cpupower/lib/cpupower.c
+++ b/tools/power/cpupower/lib/cpupower.c
@@ -15,7 +15,7 @@
 #include "cpupower.h"
 #include "cpupower_intern.h"
 
-unsigned int sysfs_read_file(const char *path, char *buf, size_t buflen)
+unsigned int cpupower_read_sysfs(const char *path, char *buf, size_t buflen)
 {
 	int fd;
 	ssize_t numread;
@@ -95,7 +95,7 @@ static int sysfs_topology_read_file(unsigned int cpu, const char *fname, int *re
 
 	snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/topology/%s",
 			 cpu, fname);
-	if (sysfs_read_file(path, linebuf, MAX_LINE_LEN) == 0)
+	if (cpupower_read_sysfs(path, linebuf, MAX_LINE_LEN) == 0)
 		return -1;
 	*result = strtol(linebuf, &endp, 0);
 	if (endp == linebuf || errno == ERANGE)
diff --git a/tools/power/cpupower/lib/cpupower_intern.h b/tools/power/cpupower/lib/cpupower_intern.h
index 92affdfbe4174..4887c76d23f86 100644
--- a/tools/power/cpupower/lib/cpupower_intern.h
+++ b/tools/power/cpupower/lib/cpupower_intern.h
@@ -3,4 +3,4 @@
 #define MAX_LINE_LEN 4096
 #define SYSFS_PATH_MAX 255
 
-unsigned int sysfs_read_file(const char *path, char *buf, size_t buflen);
+unsigned int cpupower_read_sysfs(const char *path, char *buf, size_t buflen);
-- 
GitLab


From 4bf3bd0f15a9c81064c0b430d04d221ffcc503cc Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 16 Oct 2018 17:06:08 +0200
Subject: [PATCH 0702/1890] tools cpupower debug: Allow to use outside build
 flags

Adding CFLAGS and LDFLAGS to be used during the build.

Cc: Thomas Renninger <trenn@suse.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Thomas Renninger <trenn@suse.de>
Signed-off-by: Shuah Khan (Samsung OSG) <shuah@kernel.org>
---
 tools/power/cpupower/debug/x86_64/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/power/cpupower/debug/x86_64/Makefile b/tools/power/cpupower/debug/x86_64/Makefile
index 59af84b8ef455..b1b6c43644e79 100644
--- a/tools/power/cpupower/debug/x86_64/Makefile
+++ b/tools/power/cpupower/debug/x86_64/Makefile
@@ -13,10 +13,10 @@ INSTALL = /usr/bin/install
 default: all
 
 $(OUTPUT)centrino-decode: ../i386/centrino-decode.c
-	$(CC) $(CFLAGS) -o $@ $<
+	$(CC) $(CFLAGS) -o $@ $(LDFLAGS) $<
 
 $(OUTPUT)powernow-k8-decode: ../i386/powernow-k8-decode.c
-	$(CC) $(CFLAGS) -o $@ $<
+	$(CC) $(CFLAGS) -o $@ $(LDFLAGS) $<
 
 all: $(OUTPUT)centrino-decode $(OUTPUT)powernow-k8-decode
 
-- 
GitLab


From dbc4ca339c8dbdd8652ce57c16bf5ef45ee4307e Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 16 Oct 2018 17:06:09 +0200
Subject: [PATCH 0703/1890] tools cpupower: Override CFLAGS assignments

So user could specify outside CFLAGS values.

Cc: Thomas Renninger <trenn@suse.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Thomas Renninger <trenn@suse.de>
Signed-off-by: Shuah Khan (Samsung OSG) <shuah@kernel.org>
---
 tools/power/cpupower/Makefile | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile
index 1dd5f4fcffd53..db66a952c1739 100644
--- a/tools/power/cpupower/Makefile
+++ b/tools/power/cpupower/Makefile
@@ -129,7 +129,7 @@ WARNINGS += $(call cc-supports,-Wno-pointer-sign)
 WARNINGS += $(call cc-supports,-Wdeclaration-after-statement)
 WARNINGS += -Wshadow
 
-CFLAGS += -DVERSION=\"$(VERSION)\" -DPACKAGE=\"$(PACKAGE)\" \
+override CFLAGS += -DVERSION=\"$(VERSION)\" -DPACKAGE=\"$(PACKAGE)\" \
 		-DPACKAGE_BUGREPORT=\"$(PACKAGE_BUGREPORT)\" -D_GNU_SOURCE
 
 UTIL_OBJS =  utils/helpers/amd.o utils/helpers/msr.o \
@@ -156,12 +156,12 @@ LIB_SRC = 	lib/cpufreq.c lib/cpupower.c lib/cpuidle.c
 LIB_OBJS = 	lib/cpufreq.o lib/cpupower.o lib/cpuidle.o
 LIB_OBJS :=	$(addprefix $(OUTPUT),$(LIB_OBJS))
 
-CFLAGS +=	-pipe
+override CFLAGS +=	-pipe
 
 ifeq ($(strip $(NLS)),true)
 	INSTALL_NLS += install-gmo
 	COMPILE_NLS += create-gmo
-	CFLAGS += -DNLS
+	override CFLAGS += -DNLS
 endif
 
 ifeq ($(strip $(CPUFREQ_BENCH)),true)
@@ -175,7 +175,7 @@ ifeq ($(strip $(STATIC)),true)
         UTIL_SRC += $(LIB_SRC)
 endif
 
-CFLAGS += $(WARNINGS)
+override CFLAGS += $(WARNINGS)
 
 ifeq ($(strip $(V)),false)
 	QUIET=@
@@ -188,10 +188,10 @@ export QUIET ECHO
 
 # if DEBUG is enabled, then we do not strip or optimize
 ifeq ($(strip $(DEBUG)),true)
-	CFLAGS += -O1 -g -DDEBUG
+	override CFLAGS += -O1 -g -DDEBUG
 	STRIPCMD = /bin/true -Since_we_are_debugging
 else
-	CFLAGS += $(OPTIMIZATION) -fomit-frame-pointer
+	override CFLAGS += $(OPTIMIZATION) -fomit-frame-pointer
 	STRIPCMD = $(STRIP) -s --remove-section=.note --remove-section=.comment
 endif
 
-- 
GitLab


From 7f4cedd882f7cae83177066c2b239ef457ce4a42 Mon Sep 17 00:00:00 2001
From: Giulio Benetti <giulio.benetti@micronovasrl.com>
Date: Fri, 5 Oct 2018 23:59:50 +0200
Subject: [PATCH 0704/1890] drm/sun4i: tcon: fix check of tcon->panel null
 pointer

Since tcon->panel is a pointer returned by of_drm_find_panel() need to
check if it is not NULL, hence a valid pointer.
IS_ERR() instead checks return error values, not NULL pointers.

Substitute "if (!IS_ERR(tcon->panel))" with "if (tcon->panel)".

Signed-off-by: Giulio Benetti <giulio.benetti@micronovasrl.com>
Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181005215951.99003-1-giulio.benetti@micronovasrl.com
---
 drivers/gpu/drm/sun4i/sun4i_lvds.c | 4 ++--
 drivers/gpu/drm/sun4i/sun4i_rgb.c  | 4 ++--
 drivers/gpu/drm/sun4i/sun4i_tcon.c | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/sun4i/sun4i_lvds.c b/drivers/gpu/drm/sun4i/sun4i_lvds.c
index af7dcb6da3514..e7eb0d1e17be5 100644
--- a/drivers/gpu/drm/sun4i/sun4i_lvds.c
+++ b/drivers/gpu/drm/sun4i/sun4i_lvds.c
@@ -75,7 +75,7 @@ static void sun4i_lvds_encoder_enable(struct drm_encoder *encoder)
 
 	DRM_DEBUG_DRIVER("Enabling LVDS output\n");
 
-	if (!IS_ERR(tcon->panel)) {
+	if (tcon->panel) {
 		drm_panel_prepare(tcon->panel);
 		drm_panel_enable(tcon->panel);
 	}
@@ -88,7 +88,7 @@ static void sun4i_lvds_encoder_disable(struct drm_encoder *encoder)
 
 	DRM_DEBUG_DRIVER("Disabling LVDS output\n");
 
-	if (!IS_ERR(tcon->panel)) {
+	if (tcon->panel) {
 		drm_panel_disable(tcon->panel);
 		drm_panel_unprepare(tcon->panel);
 	}
diff --git a/drivers/gpu/drm/sun4i/sun4i_rgb.c b/drivers/gpu/drm/sun4i/sun4i_rgb.c
index bf068da6b12e1..f4a22689eb54c 100644
--- a/drivers/gpu/drm/sun4i/sun4i_rgb.c
+++ b/drivers/gpu/drm/sun4i/sun4i_rgb.c
@@ -135,7 +135,7 @@ static void sun4i_rgb_encoder_enable(struct drm_encoder *encoder)
 
 	DRM_DEBUG_DRIVER("Enabling RGB output\n");
 
-	if (!IS_ERR(tcon->panel)) {
+	if (tcon->panel) {
 		drm_panel_prepare(tcon->panel);
 		drm_panel_enable(tcon->panel);
 	}
@@ -148,7 +148,7 @@ static void sun4i_rgb_encoder_disable(struct drm_encoder *encoder)
 
 	DRM_DEBUG_DRIVER("Disabling RGB output\n");
 
-	if (!IS_ERR(tcon->panel)) {
+	if (tcon->panel) {
 		drm_panel_disable(tcon->panel);
 		drm_panel_unprepare(tcon->panel);
 	}
diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c
index c78cd35a1294b..e4b3bd0307ef9 100644
--- a/drivers/gpu/drm/sun4i/sun4i_tcon.c
+++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c
@@ -555,7 +555,7 @@ static void sun4i_tcon0_mode_set_rgb(struct sun4i_tcon *tcon,
 	 * Following code is a way to avoid quirks all around TCON
 	 * and DOTCLOCK drivers.
 	 */
-	if (!IS_ERR(tcon->panel)) {
+	if (tcon->panel) {
 		struct drm_panel *panel = tcon->panel;
 		struct drm_connector *connector = panel->connector;
 		struct drm_display_info display_info = connector->display_info;
-- 
GitLab


From a8939766c75c06b5a0ab691ecbba9347e4e520cf Mon Sep 17 00:00:00 2001
From: Giulio Benetti <giulio.benetti@micronovasrl.com>
Date: Fri, 5 Oct 2018 23:59:51 +0200
Subject: [PATCH 0705/1890] drm/sun4i: tcon: prevent tcon->panel dereference if
 NULL

If tcon->panel pointer is NULL, trying to dereference from it
(i.e. tcon->panel->connector) will cause a null pointer dereference.

Add tcon->panel null pointer check before calling
sun4i_tcon0_mode_set_dithering().

Signed-off-by: Giulio Benetti <giulio.benetti@micronovasrl.com>
Fixes: f11adcecbd5f ("drm/sun4i: tcon: Add dithering support for
                      RGB565/RGB666 LCD panels")
Reviewed-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181005215951.99003-2-giulio.benetti@micronovasrl.com
---
 drivers/gpu/drm/sun4i/sun4i_tcon.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c
index e4b3bd0307ef9..f949287d926cd 100644
--- a/drivers/gpu/drm/sun4i/sun4i_tcon.c
+++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c
@@ -491,7 +491,8 @@ static void sun4i_tcon0_mode_set_rgb(struct sun4i_tcon *tcon,
 	sun4i_tcon0_mode_set_common(tcon, mode);
 
 	/* Set dithering if needed */
-	sun4i_tcon0_mode_set_dithering(tcon, tcon->panel->connector);
+	if (tcon->panel)
+		sun4i_tcon0_mode_set_dithering(tcon, tcon->panel->connector);
 
 	/* Adjust clock delay */
 	clk_delay = sun4i_tcon_get_clk_delay(mode, 0);
-- 
GitLab


From df5e31c204b34e8d9e5ec33f5b28e960c4f25e14 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Thu, 25 Oct 2018 16:05:36 +0300
Subject: [PATCH 0706/1890] drm/i915: Fix ilk+ watermarks when disabling pipes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We're no longer programming any watermarks when we're disabling
a pipe. That means ilk_wm_merge() & co. will keep considering
the any pipe that is getting disabled as still enabled. Thus we
either get no LP1+ watermakrs (ilk-ivb), or we get suboptimal
ones (hsw-bdw).

This seems to have been broken by commit b6b178a77210 ("drm/i915:
Calculate ironlake intermediate watermarks correctly, v2."). Before
that we apparently had some difference between the intermediate
and optimal watermarks and so we would program the optiomal ones.
Now intermediate and optimal are identical for disabled pipes
and so we don't program either.

Fix this by programming the intermediate watermarks even for
disabled pipes. We were already doing that for skl+. We'll
leave out gmch platforms for now since those do the merging
in a different manner and should work as is. We'll want to
unify this eventually, but play it safe for now and just put
in a FIXME.

Cc: stable@vger.kernel.org
Cc: Matt Roper <matthew.d.roper@intel.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Fixes: b6b178a77210 ("drm/i915: Calculate ironlake intermediate watermarks correctly, v2.")
Signed-off-by: Ville SyrjÃ¤lÃ¤ <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181025130536.29024-1-ville.syrjala@linux.intel.com
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> #irc
(cherry picked from commit a748faea3bfd7fd1d1485bc1c426c7d460cc6503)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_display.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index b8dfdbc9ca1f6..23d8008a93bb6 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -12768,17 +12768,12 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state)
 			intel_check_cpu_fifo_underruns(dev_priv);
 			intel_check_pch_fifo_underruns(dev_priv);
 
-			if (!new_crtc_state->active) {
-				/*
-				 * Make sure we don't call initial_watermarks
-				 * for ILK-style watermark updates.
-				 *
-				 * No clue what this is supposed to achieve.
-				 */
-				if (INTEL_GEN(dev_priv) >= 9)
-					dev_priv->display.initial_watermarks(intel_state,
-									     to_intel_crtc_state(new_crtc_state));
-			}
+			/* FIXME unify this for all platforms */
+			if (!new_crtc_state->active &&
+			    !HAS_GMCH_DISPLAY(dev_priv) &&
+			    dev_priv->display.initial_watermarks)
+				dev_priv->display.initial_watermarks(intel_state,
+								     to_intel_crtc_state(new_crtc_state));
 		}
 	}
 
-- 
GitLab


From f98e8a572bddbf27032114127d2fcc78fa5e6a9d Mon Sep 17 00:00:00 2001
From: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
Date: Thu, 1 Nov 2018 14:15:49 +0100
Subject: [PATCH 0707/1890] clk: fixed-factor: fix of_node_get-put imbalance

When the fixed factor clock is created by devicetree,
of_clk_add_provider is called.  Add a call to
of_clk_del_provider in the remove function to balance
it out.

Reported-by: Alan Tull <atull@kernel.org>
Fixes: 971451b3b15d ("clk: fixed-factor: Convert into a module platform driver")
Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/clk-fixed-factor.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/clk/clk-fixed-factor.c b/drivers/clk/clk-fixed-factor.c
index ef0ca9414f371..ff83e899df71f 100644
--- a/drivers/clk/clk-fixed-factor.c
+++ b/drivers/clk/clk-fixed-factor.c
@@ -210,6 +210,7 @@ static int of_fixed_factor_clk_remove(struct platform_device *pdev)
 {
 	struct clk *clk = platform_get_drvdata(pdev);
 
+	of_clk_del_provider(pdev->dev.of_node);
 	clk_unregister_fixed_factor(clk);
 
 	return 0;
-- 
GitLab


From 98ee3fc7ef8395f8b7a379e6608aee91efc66d48 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Tue, 6 Nov 2018 17:25:37 +0100
Subject: [PATCH 0708/1890] mtd: nand: Fix nanddev_pos_next_page() kernel-doc
 header

Function name is wrong in the kernel-doc header.

Fixes: 9c3736a3de21 ("mtd: nand: Add core infrastructure to deal with NAND devices")
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Reviewed-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 include/linux/mtd/nand.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 78b86dea2f294..7f53ece2c039a 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -568,7 +568,7 @@ static inline void nanddev_pos_next_eraseblock(struct nand_device *nand,
 }
 
 /**
- * nanddev_pos_next_eraseblock() - Move a position to the next page
+ * nanddev_pos_next_page() - Move a position to the next page
  * @nand: NAND device
  * @pos: the position to update
  *
-- 
GitLab


From 461cf036057477805a8a391e5fd0f5264a5e56a8 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 19 Oct 2018 23:08:43 +0300
Subject: [PATCH 0709/1890] ath9k: Fix a locking bug in ath9k_add_interface()

We tried to revert commit d9c52fd17cb4 ("ath9k: fix tx99 with monitor
mode interface") but accidentally missed part of the locking change.

The lock has to be held earlier so that we're holding it when we do
"sc->tx99_vif = vif;" and also there in the current code there is a
stray unlock before we have taken the lock.

Fixes: 6df0580be8bc ("ath9k: add back support for using active monitor interfaces for tx99")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/ath9k/main.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c
index 1e3b5f4a4cf92..f23cb2f3d296a 100644
--- a/drivers/net/wireless/ath/ath9k/main.c
+++ b/drivers/net/wireless/ath/ath9k/main.c
@@ -1251,6 +1251,7 @@ static int ath9k_add_interface(struct ieee80211_hw *hw,
 	struct ath_vif *avp = (void *)vif->drv_priv;
 	struct ath_node *an = &avp->mcast_node;
 
+	mutex_lock(&sc->mutex);
 	if (IS_ENABLED(CONFIG_ATH9K_TX99)) {
 		if (sc->cur_chan->nvifs >= 1) {
 			mutex_unlock(&sc->mutex);
@@ -1259,8 +1260,6 @@ static int ath9k_add_interface(struct ieee80211_hw *hw,
 		sc->tx99_vif = vif;
 	}
 
-	mutex_lock(&sc->mutex);
-
 	ath_dbg(common, CONFIG, "Attach a VIF of type: %d\n", vif->type);
 	sc->cur_chan->nvifs++;
 
-- 
GitLab


From b630806d7ce2051a3306fa6f1cfa31bc870d7c4b Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Thu, 25 Oct 2018 10:57:21 -0700
Subject: [PATCH 0710/1890] wlcore: Fixup "Add support for optional wakeirq"

After commit 3c83dd577c7f ("wlcore: Add support for optional
wakeirq") landed upstream, I started seeing the following oops
on my HiKey board:

[    1.870279] Unable to handle kernel read from unreadable memory at virtual address 0000000000000010
[    1.870283] Mem abort info:
[    1.870287]   ESR = 0x96000005
[    1.870292]   Exception class = DABT (current EL), IL = 32 bits
[    1.870296]   SET = 0, FnV = 0
[    1.870299]   EA = 0, S1PTW = 0
[    1.870302] Data abort info:
[    1.870306]   ISV = 0, ISS = 0x00000005
[    1.870309]   CM = 0, WnR = 0
[    1.870312] [0000000000000010] user address but active_mm is swapper
[    1.870318] Internal error: Oops: 96000005 [#1] PREEMPT SMP
[    1.870327] CPU: 0 PID: 5 Comm: kworker/0:0 Not tainted 4.19.0-05129-gb3d1e8e #48
[    1.870331] Hardware name: HiKey Development Board (DT)
[    1.870350] Workqueue: events_freezable mmc_rescan
[    1.870358] pstate: 60400005 (nZCv daif +PAN -UAO)
[    1.870366] pc : wl1271_probe+0x210/0x350
[    1.870371] lr : wl1271_probe+0x210/0x350
[    1.870374] sp : ffffff80080739b0
[    1.870377] x29: ffffff80080739b0 x28: 0000000000000000
[    1.870384] x27: 0000000000000000 x26: 0000000000000000
[    1.870391] x25: 0000000000000036 x24: ffffffc074ecb598
[    1.870398] x23: ffffffc07ffdce78 x22: ffffffc0744ed808
[    1.870404] x21: ffffffc074ecbb98 x20: ffffff8008ff9000
[    1.870411] x19: ffffffc0744ed800 x18: ffffff8008ff9a48
[    1.870418] x17: 0000000000000000 x16: 0000000000000000
[    1.870425] x15: ffffffc074ecb503 x14: ffffffffffffffff
[    1.870431] x13: ffffffc074ecb502 x12: 0000000000000030
[    1.870438] x11: 0101010101010101 x10: 0000000000000040
[    1.870444] x9 : ffffffc075400248 x8 : ffffffc075400270
[    1.870451] x7 : 0000000000000000 x6 : 0000000000000000
[    1.870457] x5 : 0000000000000000 x4 : 0000000000000000
[    1.870463] x3 : 0000000000000000 x2 : 0000000000000000
[    1.870469] x1 : 0000000000000028 x0 : 0000000000000000
[    1.870477] Process kworker/0:0 (pid: 5, stack limit = 0x(____ptrval____))
[    1.870480] Call trace:
[    1.870485]  wl1271_probe+0x210/0x350
[    1.870491]  sdio_bus_probe+0x100/0x128
[    1.870500]  really_probe+0x1a8/0x2b8
[    1.870506]  driver_probe_device+0x58/0x100
[    1.870511]  __device_attach_driver+0x94/0xd8
[    1.870517]  bus_for_each_drv+0x70/0xc8
[    1.870522]  __device_attach+0xe0/0x140
[    1.870527]  device_initial_probe+0x10/0x18
[    1.870532]  bus_probe_device+0x94/0xa0
[    1.870537]  device_add+0x374/0x5b8
[    1.870542]  sdio_add_func+0x60/0x88
[    1.870546]  mmc_attach_sdio+0x1b0/0x358
[    1.870551]  mmc_rescan+0x2cc/0x390
[    1.870558]  process_one_work+0x12c/0x320
[    1.870563]  worker_thread+0x48/0x458
[    1.870569]  kthread+0xf8/0x128
[    1.870575]  ret_from_fork+0x10/0x18
[    1.870583] Code: 92400c21 b2760021 a90687a2 97e95bf9 (f9400803)
[    1.870587] ---[ end trace 1e15f81d3c139ca9 ]---

It seems since we don't have a wakeirq value in the dts, the wakeirq
value in wl1271_probe() is zero, which then causes trouble in
irqd_get_trigger_type(irq_get_irq_data(wakeirq)).

This patch tries to address this by checking if wakeirq is zero,
and not trying to add it to the resources if that is the case.

Fixes: 3c83dd577c7f ("wlcore: Add support for optional wakeirq")
Cc: Tony Lindgren <tony@atomide.com>
Cc: Kalle Valo <kvalo@codeaurora.org>
Cc: Eyal Reizer <eyalr@ti.com>
Cc: Anders Roxell <anders.roxell@linaro.org>
Cc: linux-wireless@vger.kernel.org
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Tested-by: Anders Roxell <anders.roxell@linaro.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ti/wlcore/sdio.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/ti/wlcore/sdio.c b/drivers/net/wireless/ti/wlcore/sdio.c
index 4c2154b9e6a3e..bd10165d7eec5 100644
--- a/drivers/net/wireless/ti/wlcore/sdio.c
+++ b/drivers/net/wireless/ti/wlcore/sdio.c
@@ -285,7 +285,7 @@ static int wl1271_probe(struct sdio_func *func,
 	struct resource res[2];
 	mmc_pm_flag_t mmcflags;
 	int ret = -ENOMEM;
-	int irq, wakeirq;
+	int irq, wakeirq, num_irqs;
 	const char *chip_family;
 
 	/* We are only able to handle the wlan function */
@@ -353,12 +353,17 @@ static int wl1271_probe(struct sdio_func *func,
 		       irqd_get_trigger_type(irq_get_irq_data(irq));
 	res[0].name = "irq";
 
-	res[1].start = wakeirq;
-	res[1].flags = IORESOURCE_IRQ |
-		       irqd_get_trigger_type(irq_get_irq_data(wakeirq));
-	res[1].name = "wakeirq";
 
-	ret = platform_device_add_resources(glue->core, res, ARRAY_SIZE(res));
+	if (wakeirq > 0) {
+		res[1].start = wakeirq;
+		res[1].flags = IORESOURCE_IRQ |
+			       irqd_get_trigger_type(irq_get_irq_data(wakeirq));
+		res[1].name = "wakeirq";
+		num_irqs = 2;
+	} else {
+		num_irqs = 1;
+	}
+	ret = platform_device_add_resources(glue->core, res, num_irqs);
 	if (ret) {
 		dev_err(glue->dev, "can't add resources\n");
 		goto out_dev_put;
-- 
GitLab


From 3401d42c7ea2d064d15c66698ff8eb96553179ce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
Date: Fri, 26 Oct 2018 12:50:39 +0200
Subject: [PATCH 0711/1890] brcmutil: really fix decoding channel info for 160
 MHz bandwidth
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previous commit /adding/ support for 160 MHz chanspecs was incomplete.
It didn't set bandwidth info and didn't extract control channel info. As
the result it was also using uninitialized "sb" var.

This change has been tested for two chanspecs found to be reported by
some devices/firmwares:
1) 60/160 (0xee32)
   Before: chnum:50 control_ch_num:36
    After: chnum:50 control_ch_num:60
2) 120/160 (0xed72)
   Before: chnum:114 control_ch_num:100
    After: chnum:114 control_ch_num:120

Fixes: 330994e8e8ec ("brcmfmac: fix for proper support of 160MHz bandwidth")
Signed-off-by: RafaÅ‚ MiÅ‚ecki <rafal@milecki.pl>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/broadcom/brcm80211/brcmutil/d11.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmutil/d11.c b/drivers/net/wireless/broadcom/brcm80211/brcmutil/d11.c
index e7584b842dce4..eb5db94f57453 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmutil/d11.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmutil/d11.c
@@ -193,6 +193,9 @@ static void brcmu_d11ac_decchspec(struct brcmu_chan *ch)
 		}
 		break;
 	case BRCMU_CHSPEC_D11AC_BW_160:
+		ch->bw = BRCMU_CHAN_BW_160;
+		ch->sb = brcmu_maskget16(ch->chspec, BRCMU_CHSPEC_D11AC_SB_MASK,
+					 BRCMU_CHSPEC_D11AC_SB_SHIFT);
 		switch (ch->sb) {
 		case BRCMU_CHAN_SB_LLL:
 			ch->control_ch_num -= CH_70MHZ_APART;
-- 
GitLab


From b374e8686fc35ae124e62dc78725ea656ba1ef8a Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 5 Nov 2018 16:51:47 +0100
Subject: [PATCH 0712/1890] mt76: fix building without CONFIG_LEDS_CLASS

When CONFIG_LEDS_CLASS is disabled, or it is a loadable module while
mt76 is built-in, we run into a link error:

drivers/net/wireless/mediatek/mt76/mac80211.o: In function `mt76_register_device':
mac80211.c:(.text+0xb78): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `devm_of_led_classdev_register'

We don't really need a hard dependency here as the driver can presumably
work just fine without LEDs, so this follows the iwlwifi example and
adds a separate Kconfig option for the LED support, this will be available
whenever it will link, and otherwise the respective code gets left out from
the driver object.

Fixes: 17f1de56df05 ("mt76: add common code shared between multiple chipsets")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/mediatek/mt76/Kconfig           | 6 ++++++
 drivers/net/wireless/mediatek/mt76/mac80211.c        | 8 +++++---
 drivers/net/wireless/mediatek/mt76/mt76x2/pci_init.c | 6 ++++--
 3 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/Kconfig b/drivers/net/wireless/mediatek/mt76/Kconfig
index 0ccbcd7e887d6..c30d8f5bbf2aa 100644
--- a/drivers/net/wireless/mediatek/mt76/Kconfig
+++ b/drivers/net/wireless/mediatek/mt76/Kconfig
@@ -1,6 +1,12 @@
 config MT76_CORE
 	tristate
 
+config MT76_LEDS
+	bool
+	depends on MT76_CORE
+	depends on LEDS_CLASS=y || MT76_CORE=LEDS_CLASS
+	default y
+
 config MT76_USB
 	tristate
 	depends on MT76_CORE
diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c
index 2a699e8b79bfb..7d219ff2d4802 100644
--- a/drivers/net/wireless/mediatek/mt76/mac80211.c
+++ b/drivers/net/wireless/mediatek/mt76/mac80211.c
@@ -345,9 +345,11 @@ int mt76_register_device(struct mt76_dev *dev, bool vht,
 	mt76_check_sband(dev, NL80211_BAND_2GHZ);
 	mt76_check_sband(dev, NL80211_BAND_5GHZ);
 
-	ret = mt76_led_init(dev);
-	if (ret)
-		return ret;
+	if (IS_ENABLED(CONFIG_MT76_LEDS)) {
+		ret = mt76_led_init(dev);
+		if (ret)
+			return ret;
+	}
 
 	return ieee80211_register_hw(hw);
 }
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci_init.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci_init.c
index 3824290b219d5..fd125722d1fb6 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci_init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci_init.c
@@ -507,8 +507,10 @@ int mt76x2_register_device(struct mt76x02_dev *dev)
 	mt76x2_dfs_init_detector(dev);
 
 	/* init led callbacks */
-	dev->mt76.led_cdev.brightness_set = mt76x2_led_set_brightness;
-	dev->mt76.led_cdev.blink_set = mt76x2_led_set_blink;
+	if (IS_ENABLED(CONFIG_MT76_LEDS)) {
+		dev->mt76.led_cdev.brightness_set = mt76x2_led_set_brightness;
+		dev->mt76.led_cdev.blink_set = mt76x2_led_set_blink;
+	}
 
 	ret = mt76_register_device(&dev->mt76, true, mt76x02_rates,
 				   ARRAY_SIZE(mt76x02_rates));
-- 
GitLab


From 9de57ff1566fd36a664c1b04a641e26fed472a9c Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Date: Tue, 6 Nov 2018 11:38:56 +0000
Subject: [PATCH 0713/1890] ASoC: qdsp6: q6asm-dai: Only add routing once.

q6asm routing gets added multiple times as part of dai probe.
Move this to q6routing routes which has those widgets defined, this also
fixes the issue where these are added each time at dai probe.

Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/qcom/qdsp6/q6asm-dai.c | 33 --------------------------------
 sound/soc/qcom/qdsp6/q6routing.c | 19 ++++++++++++++++++
 2 files changed, 19 insertions(+), 33 deletions(-)

diff --git a/sound/soc/qcom/qdsp6/q6asm-dai.c b/sound/soc/qcom/qdsp6/q6asm-dai.c
index a16c71c03058e..86115de5c1b2a 100644
--- a/sound/soc/qcom/qdsp6/q6asm-dai.c
+++ b/sound/soc/qcom/qdsp6/q6asm-dai.c
@@ -122,7 +122,6 @@ static struct snd_pcm_hardware q6asm_dai_hardware_playback = {
 			.rate_max =	48000,				\
 		},							\
 		.name = "MultiMedia"#num,				\
-		.probe = fe_dai_probe,					\
 		.id = MSM_FRONTEND_DAI_MULTIMEDIA##num,			\
 	}
 
@@ -511,38 +510,6 @@ static void q6asm_dai_pcm_free(struct snd_pcm *pcm)
 	}
 }
 
-static const struct snd_soc_dapm_route afe_pcm_routes[] = {
-	{"MM_DL1",  NULL, "MultiMedia1 Playback" },
-	{"MM_DL2",  NULL, "MultiMedia2 Playback" },
-	{"MM_DL3",  NULL, "MultiMedia3 Playback" },
-	{"MM_DL4",  NULL, "MultiMedia4 Playback" },
-	{"MM_DL5",  NULL, "MultiMedia5 Playback" },
-	{"MM_DL6",  NULL, "MultiMedia6 Playback" },
-	{"MM_DL7",  NULL, "MultiMedia7 Playback" },
-	{"MM_DL7",  NULL, "MultiMedia8 Playback" },
-	{"MultiMedia1 Capture", NULL, "MM_UL1"},
-	{"MultiMedia2 Capture", NULL, "MM_UL2"},
-	{"MultiMedia3 Capture", NULL, "MM_UL3"},
-	{"MultiMedia4 Capture", NULL, "MM_UL4"},
-	{"MultiMedia5 Capture", NULL, "MM_UL5"},
-	{"MultiMedia6 Capture", NULL, "MM_UL6"},
-	{"MultiMedia7 Capture", NULL, "MM_UL7"},
-	{"MultiMedia8 Capture", NULL, "MM_UL8"},
-
-};
-
-static int fe_dai_probe(struct snd_soc_dai *dai)
-{
-	struct snd_soc_dapm_context *dapm;
-
-	dapm = snd_soc_component_get_dapm(dai->component);
-	snd_soc_dapm_add_routes(dapm, afe_pcm_routes,
-				ARRAY_SIZE(afe_pcm_routes));
-
-	return 0;
-}
-
-
 static const struct snd_soc_component_driver q6asm_fe_dai_component = {
 	.name		= DRV_NAME,
 	.ops		= &q6asm_dai_ops,
diff --git a/sound/soc/qcom/qdsp6/q6routing.c b/sound/soc/qcom/qdsp6/q6routing.c
index c6b51571be945..d61b8404f7da9 100644
--- a/sound/soc/qcom/qdsp6/q6routing.c
+++ b/sound/soc/qcom/qdsp6/q6routing.c
@@ -909,6 +909,25 @@ static const struct snd_soc_dapm_route intercon[] = {
 	{"MM_UL6", NULL, "MultiMedia6 Mixer"},
 	{"MM_UL7", NULL, "MultiMedia7 Mixer"},
 	{"MM_UL8", NULL, "MultiMedia8 Mixer"},
+
+	{"MM_DL1",  NULL, "MultiMedia1 Playback" },
+	{"MM_DL2",  NULL, "MultiMedia2 Playback" },
+	{"MM_DL3",  NULL, "MultiMedia3 Playback" },
+	{"MM_DL4",  NULL, "MultiMedia4 Playback" },
+	{"MM_DL5",  NULL, "MultiMedia5 Playback" },
+	{"MM_DL6",  NULL, "MultiMedia6 Playback" },
+	{"MM_DL7",  NULL, "MultiMedia7 Playback" },
+	{"MM_DL8",  NULL, "MultiMedia8 Playback" },
+
+	{"MultiMedia1 Capture", NULL, "MM_UL1"},
+	{"MultiMedia2 Capture", NULL, "MM_UL2"},
+	{"MultiMedia3 Capture", NULL, "MM_UL3"},
+	{"MultiMedia4 Capture", NULL, "MM_UL4"},
+	{"MultiMedia5 Capture", NULL, "MM_UL5"},
+	{"MultiMedia6 Capture", NULL, "MM_UL6"},
+	{"MultiMedia7 Capture", NULL, "MM_UL7"},
+	{"MultiMedia8 Capture", NULL, "MM_UL8"},
+
 };
 
 static int routing_hw_params(struct snd_pcm_substream *substream,
-- 
GitLab


From e14856f6cfbb1b96aa45a68f188b147b5bde76b4 Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Date: Tue, 6 Nov 2018 11:38:57 +0000
Subject: [PATCH 0714/1890] ASoC: qdsp6: q6afe-dai: Fix the dai widgets

For some reason the dapm widgets are incorrectly defined from the start,
Not sure how we ended up with such thing. Fix them now!

Without this fix the backend dais are always powered up even if there
is no active stream.

Reported-by: Jimmy Cheng-Yi Chiang <cychiang@google.com>
Reported-by: Rohit kumar <rohitkr@codeaurora.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/qcom/qdsp6/q6afe-dai.c | 208 +++++++++++++++----------------
 1 file changed, 104 insertions(+), 104 deletions(-)

diff --git a/sound/soc/qcom/qdsp6/q6afe-dai.c b/sound/soc/qcom/qdsp6/q6afe-dai.c
index 60ff4a2d35774..8f6c8fc073a93 100644
--- a/sound/soc/qcom/qdsp6/q6afe-dai.c
+++ b/sound/soc/qcom/qdsp6/q6afe-dai.c
@@ -1112,204 +1112,204 @@ static int q6afe_of_xlate_dai_name(struct snd_soc_component *component,
 }
 
 static const struct snd_soc_dapm_widget q6afe_dai_widgets[] = {
-	SND_SOC_DAPM_AIF_OUT("HDMI_RX", "HDMI Playback", 0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("SLIMBUS_0_RX", "Slimbus Playback", 0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("SLIMBUS_1_RX", "Slimbus1 Playback", 0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("SLIMBUS_2_RX", "Slimbus2 Playback", 0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("SLIMBUS_3_RX", "Slimbus3 Playback", 0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("SLIMBUS_4_RX", "Slimbus4 Playback", 0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("SLIMBUS_5_RX", "Slimbus5 Playback", 0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("SLIMBUS_6_RX", "Slimbus6 Playback", 0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("SLIMBUS_0_TX", "Slimbus Capture", 0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("SLIMBUS_1_TX", "Slimbus1 Capture", 0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("SLIMBUS_2_TX", "Slimbus2 Capture", 0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("SLIMBUS_3_TX", "Slimbus3 Capture", 0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("SLIMBUS_4_TX", "Slimbus4 Capture", 0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("SLIMBUS_5_TX", "Slimbus5 Capture", 0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("SLIMBUS_6_TX", "Slimbus6 Capture", 0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("QUAT_MI2S_RX", "Quaternary MI2S Playback",
+	SND_SOC_DAPM_AIF_IN("HDMI_RX", NULL, 0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("SLIMBUS_0_RX", NULL, 0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("SLIMBUS_1_RX", NULL, 0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("SLIMBUS_2_RX", NULL, 0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("SLIMBUS_3_RX", NULL, 0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("SLIMBUS_4_RX", NULL, 0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("SLIMBUS_5_RX", NULL, 0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("SLIMBUS_6_RX", NULL, 0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("SLIMBUS_0_TX", NULL, 0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("SLIMBUS_1_TX", NULL, 0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("SLIMBUS_2_TX", NULL, 0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("SLIMBUS_3_TX", NULL, 0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("SLIMBUS_4_TX", NULL, 0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("SLIMBUS_5_TX", NULL, 0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("SLIMBUS_6_TX", NULL, 0, 0, 0, 0),
+	SND_SOC_DAPM_AIF_IN("QUAT_MI2S_RX", NULL,
 						0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("QUAT_MI2S_TX", "Quaternary MI2S Capture",
+	SND_SOC_DAPM_AIF_OUT("QUAT_MI2S_TX", NULL,
 						0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("TERT_MI2S_RX", "Tertiary MI2S Playback",
+	SND_SOC_DAPM_AIF_IN("TERT_MI2S_RX", NULL,
 						0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("TERT_MI2S_TX", "Tertiary MI2S Capture",
+	SND_SOC_DAPM_AIF_OUT("TERT_MI2S_TX", NULL,
 						0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("SEC_MI2S_RX", "Secondary MI2S Playback",
+	SND_SOC_DAPM_AIF_IN("SEC_MI2S_RX", NULL,
 			     0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("SEC_MI2S_TX", "Secondary MI2S Capture",
+	SND_SOC_DAPM_AIF_OUT("SEC_MI2S_TX", NULL,
 						0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("SEC_MI2S_RX_SD1",
+	SND_SOC_DAPM_AIF_IN("SEC_MI2S_RX_SD1",
 			"Secondary MI2S Playback SD1",
 			0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("PRI_MI2S_RX", "Primary MI2S Playback",
+	SND_SOC_DAPM_AIF_IN("PRI_MI2S_RX", NULL,
 			     0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("PRI_MI2S_TX", "Primary MI2S Capture",
+	SND_SOC_DAPM_AIF_OUT("PRI_MI2S_TX", NULL,
 						0, 0, 0, 0),
 
-	SND_SOC_DAPM_AIF_OUT("PRIMARY_TDM_RX_0", "Primary TDM0 Playback",
+	SND_SOC_DAPM_AIF_IN("PRIMARY_TDM_RX_0", NULL,
 			     0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("PRIMARY_TDM_RX_1", "Primary TDM1 Playback",
+	SND_SOC_DAPM_AIF_IN("PRIMARY_TDM_RX_1", NULL,
 			     0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("PRIMARY_TDM_RX_2", "Primary TDM2 Playback",
+	SND_SOC_DAPM_AIF_IN("PRIMARY_TDM_RX_2", NULL,
 			     0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("PRIMARY_TDM_RX_3", "Primary TDM3 Playback",
+	SND_SOC_DAPM_AIF_IN("PRIMARY_TDM_RX_3", NULL,
 			     0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("PRIMARY_TDM_RX_4", "Primary TDM4 Playback",
+	SND_SOC_DAPM_AIF_IN("PRIMARY_TDM_RX_4", NULL,
 			     0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("PRIMARY_TDM_RX_5", "Primary TDM5 Playback",
+	SND_SOC_DAPM_AIF_IN("PRIMARY_TDM_RX_5", NULL,
 			     0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("PRIMARY_TDM_RX_6", "Primary TDM6 Playback",
+	SND_SOC_DAPM_AIF_IN("PRIMARY_TDM_RX_6", NULL,
 			     0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("PRIMARY_TDM_RX_7", "Primary TDM7 Playback",
+	SND_SOC_DAPM_AIF_IN("PRIMARY_TDM_RX_7", NULL,
 			     0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("PRIMARY_TDM_TX_0", "Primary TDM0 Capture",
+	SND_SOC_DAPM_AIF_OUT("PRIMARY_TDM_TX_0", NULL,
 						0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("PRIMARY_TDM_TX_1", "Primary TDM1 Capture",
+	SND_SOC_DAPM_AIF_OUT("PRIMARY_TDM_TX_1", NULL,
 						0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("PRIMARY_TDM_TX_2", "Primary TDM2 Capture",
+	SND_SOC_DAPM_AIF_OUT("PRIMARY_TDM_TX_2", NULL,
 						0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("PRIMARY_TDM_TX_3", "Primary TDM3 Capture",
+	SND_SOC_DAPM_AIF_OUT("PRIMARY_TDM_TX_3", NULL,
 						0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("PRIMARY_TDM_TX_4", "Primary TDM4 Capture",
+	SND_SOC_DAPM_AIF_OUT("PRIMARY_TDM_TX_4", NULL,
 						0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("PRIMARY_TDM_TX_5", "Primary TDM5 Capture",
+	SND_SOC_DAPM_AIF_OUT("PRIMARY_TDM_TX_5", NULL,
 						0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("PRIMARY_TDM_TX_6", "Primary TDM6 Capture",
+	SND_SOC_DAPM_AIF_OUT("PRIMARY_TDM_TX_6", NULL,
 						0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("PRIMARY_TDM_TX_7", "Primary TDM7 Capture",
+	SND_SOC_DAPM_AIF_OUT("PRIMARY_TDM_TX_7", NULL,
 						0, 0, 0, 0),
 
-	SND_SOC_DAPM_AIF_OUT("SEC_TDM_RX_0", "Secondary TDM0 Playback",
+	SND_SOC_DAPM_AIF_IN("SEC_TDM_RX_0", NULL,
 			     0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("SEC_TDM_RX_1", "Secondary TDM1 Playback",
+	SND_SOC_DAPM_AIF_IN("SEC_TDM_RX_1", NULL,
 			     0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("SEC_TDM_RX_2", "Secondary TDM2 Playback",
+	SND_SOC_DAPM_AIF_IN("SEC_TDM_RX_2", NULL,
 			     0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("SEC_TDM_RX_3", "Secondary TDM3 Playback",
+	SND_SOC_DAPM_AIF_IN("SEC_TDM_RX_3", NULL,
 			     0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("SEC_TDM_RX_4", "Secondary TDM4 Playback",
+	SND_SOC_DAPM_AIF_IN("SEC_TDM_RX_4", NULL,
 			     0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("SEC_TDM_RX_5", "Secondary TDM5 Playback",
+	SND_SOC_DAPM_AIF_IN("SEC_TDM_RX_5", NULL,
 			     0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("SEC_TDM_RX_6", "Secondary TDM6 Playback",
+	SND_SOC_DAPM_AIF_IN("SEC_TDM_RX_6", NULL,
 			     0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("SEC_TDM_RX_7", "Secondary TDM7 Playback",
+	SND_SOC_DAPM_AIF_IN("SEC_TDM_RX_7", NULL,
 			     0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("SEC_TDM_TX_0", "Secondary TDM0 Capture",
+	SND_SOC_DAPM_AIF_OUT("SEC_TDM_TX_0", NULL,
 						0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("SEC_TDM_TX_1", "Secondary TDM1 Capture",
+	SND_SOC_DAPM_AIF_OUT("SEC_TDM_TX_1", NULL,
 						0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("SEC_TDM_TX_2", "Secondary TDM2 Capture",
+	SND_SOC_DAPM_AIF_OUT("SEC_TDM_TX_2", NULL,
 						0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("SEC_TDM_TX_3", "Secondary TDM3 Capture",
+	SND_SOC_DAPM_AIF_OUT("SEC_TDM_TX_3", NULL,
 						0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("SEC_TDM_TX_4", "Secondary TDM4 Capture",
+	SND_SOC_DAPM_AIF_OUT("SEC_TDM_TX_4", NULL,
 						0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("SEC_TDM_TX_5", "Secondary TDM5 Capture",
+	SND_SOC_DAPM_AIF_OUT("SEC_TDM_TX_5", NULL,
 						0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("SEC_TDM_TX_6", "Secondary TDM6 Capture",
+	SND_SOC_DAPM_AIF_OUT("SEC_TDM_TX_6", NULL,
 						0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("SEC_TDM_TX_7", "Secondary TDM7 Capture",
+	SND_SOC_DAPM_AIF_OUT("SEC_TDM_TX_7", NULL,
 						0, 0, 0, 0),
 
-	SND_SOC_DAPM_AIF_OUT("TERT_TDM_RX_0", "Tertiary TDM0 Playback",
+	SND_SOC_DAPM_AIF_IN("TERT_TDM_RX_0", NULL,
 			     0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("TERT_TDM_RX_1", "Tertiary TDM1 Playback",
+	SND_SOC_DAPM_AIF_IN("TERT_TDM_RX_1", NULL,
 			     0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("TERT_TDM_RX_2", "Tertiary TDM2 Playback",
+	SND_SOC_DAPM_AIF_IN("TERT_TDM_RX_2", NULL,
 			     0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("TERT_TDM_RX_3", "Tertiary TDM3 Playback",
+	SND_SOC_DAPM_AIF_IN("TERT_TDM_RX_3", NULL,
 			     0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("TERT_TDM_RX_4", "Tertiary TDM4 Playback",
+	SND_SOC_DAPM_AIF_IN("TERT_TDM_RX_4", NULL,
 			     0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("TERT_TDM_RX_5", "Tertiary TDM5 Playback",
+	SND_SOC_DAPM_AIF_IN("TERT_TDM_RX_5", NULL,
 			     0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("TERT_TDM_RX_6", "Tertiary TDM6 Playback",
+	SND_SOC_DAPM_AIF_IN("TERT_TDM_RX_6", NULL,
 			     0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("TERT_TDM_RX_7", "Tertiary TDM7 Playback",
+	SND_SOC_DAPM_AIF_IN("TERT_TDM_RX_7", NULL,
 			     0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("TERT_TDM_TX_0", "Tertiary TDM0 Capture",
+	SND_SOC_DAPM_AIF_OUT("TERT_TDM_TX_0", NULL,
 						0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("TERT_TDM_TX_1", "Tertiary TDM1 Capture",
+	SND_SOC_DAPM_AIF_OUT("TERT_TDM_TX_1", NULL,
 						0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("TERT_TDM_TX_2", "Tertiary TDM2 Capture",
+	SND_SOC_DAPM_AIF_OUT("TERT_TDM_TX_2", NULL,
 						0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("TERT_TDM_TX_3", "Tertiary TDM3 Capture",
+	SND_SOC_DAPM_AIF_OUT("TERT_TDM_TX_3", NULL,
 						0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("TERT_TDM_TX_4", "Tertiary TDM4 Capture",
+	SND_SOC_DAPM_AIF_OUT("TERT_TDM_TX_4", NULL,
 						0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("TERT_TDM_TX_5", "Tertiary TDM5 Capture",
+	SND_SOC_DAPM_AIF_OUT("TERT_TDM_TX_5", NULL,
 						0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("TERT_TDM_TX_6", "Tertiary TDM6 Capture",
+	SND_SOC_DAPM_AIF_OUT("TERT_TDM_TX_6", NULL,
 						0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("TERT_TDM_TX_7", "Tertiary TDM7 Capture",
+	SND_SOC_DAPM_AIF_OUT("TERT_TDM_TX_7", NULL,
 						0, 0, 0, 0),
 
-	SND_SOC_DAPM_AIF_OUT("QUAT_TDM_RX_0", "Quaternary TDM0 Playback",
+	SND_SOC_DAPM_AIF_IN("QUAT_TDM_RX_0", NULL,
 			     0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("QUAT_TDM_RX_1", "Quaternary TDM1 Playback",
+	SND_SOC_DAPM_AIF_IN("QUAT_TDM_RX_1", NULL,
 			     0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("QUAT_TDM_RX_2", "Quaternary TDM2 Playback",
+	SND_SOC_DAPM_AIF_IN("QUAT_TDM_RX_2", NULL,
 			     0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("QUAT_TDM_RX_3", "Quaternary TDM3 Playback",
+	SND_SOC_DAPM_AIF_IN("QUAT_TDM_RX_3", NULL,
 			     0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("QUAT_TDM_RX_4", "Quaternary TDM4 Playback",
+	SND_SOC_DAPM_AIF_IN("QUAT_TDM_RX_4", NULL,
 			     0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("QUAT_TDM_RX_5", "Quaternary TDM5 Playback",
+	SND_SOC_DAPM_AIF_IN("QUAT_TDM_RX_5", NULL,
 			     0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("QUAT_TDM_RX_6", "Quaternary TDM6 Playback",
+	SND_SOC_DAPM_AIF_IN("QUAT_TDM_RX_6", NULL,
 			     0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("QUAT_TDM_RX_7", "Quaternary TDM7 Playback",
+	SND_SOC_DAPM_AIF_IN("QUAT_TDM_RX_7", NULL,
 			     0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("QUAT_TDM_TX_0", "Quaternary TDM0 Capture",
+	SND_SOC_DAPM_AIF_OUT("QUAT_TDM_TX_0", NULL,
 						0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("QUAT_TDM_TX_1", "Quaternary TDM1 Capture",
+	SND_SOC_DAPM_AIF_OUT("QUAT_TDM_TX_1", NULL,
 						0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("QUAT_TDM_TX_2", "Quaternary TDM2 Capture",
+	SND_SOC_DAPM_AIF_OUT("QUAT_TDM_TX_2", NULL,
 						0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("QUAT_TDM_TX_3", "Quaternary TDM3 Capture",
+	SND_SOC_DAPM_AIF_OUT("QUAT_TDM_TX_3", NULL,
 						0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("QUAT_TDM_TX_4", "Quaternary TDM4 Capture",
+	SND_SOC_DAPM_AIF_OUT("QUAT_TDM_TX_4", NULL,
 						0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("QUAT_TDM_TX_5", "Quaternary TDM5 Capture",
+	SND_SOC_DAPM_AIF_OUT("QUAT_TDM_TX_5", NULL,
 						0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("QUAT_TDM_TX_6", "Quaternary TDM6 Capture",
+	SND_SOC_DAPM_AIF_OUT("QUAT_TDM_TX_6", NULL,
 						0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("QUAT_TDM_TX_7", "Quaternary TDM7 Capture",
+	SND_SOC_DAPM_AIF_OUT("QUAT_TDM_TX_7", NULL,
 						0, 0, 0, 0),
 
-	SND_SOC_DAPM_AIF_OUT("QUIN_TDM_RX_0", "Quinary TDM0 Playback",
+	SND_SOC_DAPM_AIF_IN("QUIN_TDM_RX_0", NULL,
 			     0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("QUIN_TDM_RX_1", "Quinary TDM1 Playback",
+	SND_SOC_DAPM_AIF_IN("QUIN_TDM_RX_1", NULL,
 			     0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("QUIN_TDM_RX_2", "Quinary TDM2 Playback",
+	SND_SOC_DAPM_AIF_IN("QUIN_TDM_RX_2", NULL,
 			     0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("QUIN_TDM_RX_3", "Quinary TDM3 Playback",
+	SND_SOC_DAPM_AIF_IN("QUIN_TDM_RX_3", NULL,
 			     0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("QUIN_TDM_RX_4", "Quinary TDM4 Playback",
+	SND_SOC_DAPM_AIF_IN("QUIN_TDM_RX_4", NULL,
 			     0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("QUIN_TDM_RX_5", "Quinary TDM5 Playback",
+	SND_SOC_DAPM_AIF_IN("QUIN_TDM_RX_5", NULL,
 			     0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("QUIN_TDM_RX_6", "Quinary TDM6 Playback",
+	SND_SOC_DAPM_AIF_IN("QUIN_TDM_RX_6", NULL,
 			     0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_OUT("QUIN_TDM_RX_7", "Quinary TDM7 Playback",
+	SND_SOC_DAPM_AIF_IN("QUIN_TDM_RX_7", NULL,
 			     0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("QUIN_TDM_TX_0", "Quinary TDM0 Capture",
+	SND_SOC_DAPM_AIF_OUT("QUIN_TDM_TX_0", NULL,
 						0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("QUIN_TDM_TX_1", "Quinary TDM1 Capture",
+	SND_SOC_DAPM_AIF_OUT("QUIN_TDM_TX_1", NULL,
 						0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("QUIN_TDM_TX_2", "Quinary TDM2 Capture",
+	SND_SOC_DAPM_AIF_OUT("QUIN_TDM_TX_2", NULL,
 						0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("QUIN_TDM_TX_3", "Quinary TDM3 Capture",
+	SND_SOC_DAPM_AIF_OUT("QUIN_TDM_TX_3", NULL,
 						0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("QUIN_TDM_TX_4", "Quinary TDM4 Capture",
+	SND_SOC_DAPM_AIF_OUT("QUIN_TDM_TX_4", NULL,
 						0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("QUIN_TDM_TX_5", "Quinary TDM5 Capture",
+	SND_SOC_DAPM_AIF_OUT("QUIN_TDM_TX_5", NULL,
 						0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("QUIN_TDM_TX_6", "Quinary TDM6 Capture",
+	SND_SOC_DAPM_AIF_OUT("QUIN_TDM_TX_6", NULL,
 						0, 0, 0, 0),
-	SND_SOC_DAPM_AIF_IN("QUIN_TDM_TX_7", "Quinary TDM7 Capture",
+	SND_SOC_DAPM_AIF_OUT("QUIN_TDM_TX_7", NULL,
 						0, 0, 0, 0),
 };
 
-- 
GitLab


From 313a06e636808387822af24c507cba92703568b1 Mon Sep 17 00:00:00 2001
From: Jeremy Linton <jeremy.linton@arm.com>
Date: Mon, 5 Nov 2018 18:14:41 -0600
Subject: [PATCH 0715/1890] lib/raid6: Fix arm64 test build

The lib/raid6/test fails to build the neon objects
on arm64 because the correct machine type is 'aarch64'.

Once this is correctly enabled, the neon recovery objects
need to be added to the build.

Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 lib/raid6/test/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index 5d73f5cb4d8a7..79777645cac9c 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -27,7 +27,7 @@ ifeq ($(ARCH),arm)
         CFLAGS += -I../../../arch/arm/include -mfpu=neon
         HAS_NEON = yes
 endif
-ifeq ($(ARCH),arm64)
+ifeq ($(ARCH),aarch64)
         CFLAGS += -I../../../arch/arm64/include
         HAS_NEON = yes
 endif
@@ -41,7 +41,7 @@ ifeq ($(IS_X86),yes)
 		    gcc -c -x assembler - >&/dev/null &&        \
 		    rm ./-.o && echo -DCONFIG_AS_AVX512=1)
 else ifeq ($(HAS_NEON),yes)
-        OBJS   += neon.o neon1.o neon2.o neon4.o neon8.o
+        OBJS   += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o
         CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1
 else
         HAS_ALTIVEC := $(shell printf '\#include <altivec.h>\nvector int a;\n' |\
-- 
GitLab


From 5d96c9342c23ee1d084802dcf064caa67ecaa45b Mon Sep 17 00:00:00 2001
From: Vishal Verma <vishal.l.verma@intel.com>
Date: Thu, 25 Oct 2018 18:37:28 -0600
Subject: [PATCH 0716/1890] acpi/nfit, x86/mce: Handle only uncorrectable
 machine checks

The MCE handler for nfit devices is called for memory errors on a
Non-Volatile DIMM and adds the error location to a 'badblocks' list.
This list is used by the various NVDIMM drivers to avoid consuming known
poison locations during IO.

The MCE handler gets called for both corrected and uncorrectable errors.
Until now, both kinds of errors have been added to the badblocks list.
However, corrected memory errors indicate that the problem has already
been fixed by hardware, and the resulting interrupt is merely a
notification to Linux.

As far as future accesses to that location are concerned, it is
perfectly fine to use, and thus doesn't need to be included in the above
badblocks list.

Add a check in the nfit MCE handler to filter out corrected mce events,
and only process uncorrectable errors.

Fixes: 6839a6d96f4e ("nfit: do an ARS scrub on hitting a latent media error")
Reported-by: Omar Avelar <omar.avelar@intel.com>
Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
CC: Arnd Bergmann <arnd@arndb.de>
CC: Dan Williams <dan.j.williams@intel.com>
CC: Dave Jiang <dave.jiang@intel.com>
CC: elliott@hpe.com
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Ingo Molnar <mingo@redhat.com>
CC: Len Brown <lenb@kernel.org>
CC: linux-acpi@vger.kernel.org
CC: linux-edac <linux-edac@vger.kernel.org>
CC: linux-nvdimm@lists.01.org
CC: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
CC: "Rafael J. Wysocki" <rjw@rjwysocki.net>
CC: Ross Zwisler <zwisler@kernel.org>
CC: stable <stable@vger.kernel.org>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Tony Luck <tony.luck@intel.com>
CC: x86-ml <x86@kernel.org>
CC: Yazen Ghannam <yazen.ghannam@amd.com>
Link: http://lkml.kernel.org/r/20181026003729.8420-1-vishal.l.verma@intel.com
---
 arch/x86/include/asm/mce.h       | 1 +
 arch/x86/kernel/cpu/mcheck/mce.c | 3 ++-
 drivers/acpi/nfit/mce.c          | 4 ++--
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 4da9b1c58d287..dbd9fe2f6163d 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -221,6 +221,7 @@ static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c) { return mce_am
 
 int mce_available(struct cpuinfo_x86 *c);
 bool mce_is_memory_error(struct mce *m);
+bool mce_is_correctable(struct mce *m);
 
 DECLARE_PER_CPU(unsigned, mce_exception_count);
 DECLARE_PER_CPU(unsigned, mce_poll_count);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 8c66d2fc8f81d..77527b8ea982a 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -534,7 +534,7 @@ bool mce_is_memory_error(struct mce *m)
 }
 EXPORT_SYMBOL_GPL(mce_is_memory_error);
 
-static bool mce_is_correctable(struct mce *m)
+bool mce_is_correctable(struct mce *m)
 {
 	if (m->cpuvendor == X86_VENDOR_AMD && m->status & MCI_STATUS_DEFERRED)
 		return false;
@@ -547,6 +547,7 @@ static bool mce_is_correctable(struct mce *m)
 
 	return true;
 }
+EXPORT_SYMBOL_GPL(mce_is_correctable);
 
 static bool cec_add_mce(struct mce *m)
 {
diff --git a/drivers/acpi/nfit/mce.c b/drivers/acpi/nfit/mce.c
index e9626bf6ca296..7a51707f87e95 100644
--- a/drivers/acpi/nfit/mce.c
+++ b/drivers/acpi/nfit/mce.c
@@ -25,8 +25,8 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
 	struct acpi_nfit_desc *acpi_desc;
 	struct nfit_spa *nfit_spa;
 
-	/* We only care about memory errors */
-	if (!mce_is_memory_error(mce))
+	/* We only care about uncorrectable memory errors */
+	if (!mce_is_memory_error(mce) || mce_is_correctable(mce))
 		return NOTIFY_DONE;
 
 	/*
-- 
GitLab


From e8a308e5f47e545e0d41d0686c00f5f5217c5f61 Mon Sep 17 00:00:00 2001
From: Vishal Verma <vishal.l.verma@intel.com>
Date: Thu, 25 Oct 2018 18:37:29 -0600
Subject: [PATCH 0717/1890] acpi/nfit, x86/mce: Validate a MCE's address before
 using it

The NFIT machine check handler uses the physical address from the mce
structure, and compares it against information in the ACPI NFIT table
to determine whether that location lies on an NVDIMM. The mce->addr
field however may not always be valid, and this is indicated by the
MCI_STATUS_ADDRV bit in the status field.

Export mce_usable_address() which already performs validation for the
address, and use it in the NFIT handler.

Fixes: 6839a6d96f4e ("nfit: do an ARS scrub on hitting a latent media error")
Reported-by: Robert Elliott <elliott@hpe.com>
Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
CC: Arnd Bergmann <arnd@arndb.de>
Cc: Dan Williams <dan.j.williams@intel.com>
CC: Dave Jiang <dave.jiang@intel.com>
CC: elliott@hpe.com
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Ingo Molnar <mingo@redhat.com>
CC: Len Brown <lenb@kernel.org>
CC: linux-acpi@vger.kernel.org
CC: linux-edac <linux-edac@vger.kernel.org>
CC: linux-nvdimm@lists.01.org
CC: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
CC: "Rafael J. Wysocki" <rjw@rjwysocki.net>
CC: Ross Zwisler <zwisler@kernel.org>
CC: stable <stable@vger.kernel.org>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Tony Luck <tony.luck@intel.com>
CC: x86-ml <x86@kernel.org>
CC: Yazen Ghannam <yazen.ghannam@amd.com>
Link: http://lkml.kernel.org/r/20181026003729.8420-2-vishal.l.verma@intel.com
---
 arch/x86/include/asm/mce.h       | 1 +
 arch/x86/kernel/cpu/mcheck/mce.c | 3 ++-
 drivers/acpi/nfit/mce.c          | 4 ++++
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index dbd9fe2f6163d..c1a812bd5a27d 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -222,6 +222,7 @@ static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c) { return mce_am
 int mce_available(struct cpuinfo_x86 *c);
 bool mce_is_memory_error(struct mce *m);
 bool mce_is_correctable(struct mce *m);
+int mce_usable_address(struct mce *m);
 
 DECLARE_PER_CPU(unsigned, mce_exception_count);
 DECLARE_PER_CPU(unsigned, mce_poll_count);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 77527b8ea982a..36d2696c9563e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -485,7 +485,7 @@ static void mce_report_event(struct pt_regs *regs)
  * be somewhat complicated (e.g. segment offset would require an instruction
  * parser). So only support physical addresses up to page granuality for now.
  */
-static int mce_usable_address(struct mce *m)
+int mce_usable_address(struct mce *m)
 {
 	if (!(m->status & MCI_STATUS_ADDRV))
 		return 0;
@@ -505,6 +505,7 @@ static int mce_usable_address(struct mce *m)
 
 	return 1;
 }
+EXPORT_SYMBOL_GPL(mce_usable_address);
 
 bool mce_is_memory_error(struct mce *m)
 {
diff --git a/drivers/acpi/nfit/mce.c b/drivers/acpi/nfit/mce.c
index 7a51707f87e95..d6c1b10f6c254 100644
--- a/drivers/acpi/nfit/mce.c
+++ b/drivers/acpi/nfit/mce.c
@@ -29,6 +29,10 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
 	if (!mce_is_memory_error(mce) || mce_is_correctable(mce))
 		return NOTIFY_DONE;
 
+	/* Verify the address reported in the MCE is valid. */
+	if (!mce_usable_address(mce))
+		return NOTIFY_DONE;
+
 	/*
 	 * mce->addr contains the physical addr accessed that caused the
 	 * machine check. We need to walk through the list of NFITs, and see
-- 
GitLab


From 042cb56478152b31c50bea8a784fc826891eb38e Mon Sep 17 00:00:00 2001
From: Tao Ren <taoren@fb.com>
Date: Mon, 5 Nov 2018 14:35:40 -0800
Subject: [PATCH 0718/1890] net: phy: Allow BCM54616S PHY to setup internal
 TX/RX clock delay

This patch allows users to enable/disable internal TX and/or RX clock
delay for BCM54616S PHYs so as to satisfy RGMII timing specifications.

On a particular platform, whether TX and/or RX clock delay is required
depends on how PHY connected to the MAC IP. This requirement can be
specified through "phy-mode" property in the platform device tree.

The patch is inspired by commit 733336262b28 ("net: phy: Allow BCM5481x
PHYs to setup internal TX/RX clock delay").

Signed-off-by: Tao Ren <taoren@fb.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/broadcom.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index e86ea105c8022..7045370104537 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -92,7 +92,7 @@ static int bcm54612e_config_init(struct phy_device *phydev)
 	return 0;
 }
 
-static int bcm5481x_config(struct phy_device *phydev)
+static int bcm54xx_config_clock_delay(struct phy_device *phydev)
 {
 	int rc, val;
 
@@ -429,7 +429,7 @@ static int bcm5481_config_aneg(struct phy_device *phydev)
 	ret = genphy_config_aneg(phydev);
 
 	/* Then we can set up the delay. */
-	bcm5481x_config(phydev);
+	bcm54xx_config_clock_delay(phydev);
 
 	if (of_property_read_bool(np, "enet-phy-lane-swap")) {
 		/* Lane Swap - Undocumented register...magic! */
@@ -442,6 +442,19 @@ static int bcm5481_config_aneg(struct phy_device *phydev)
 	return ret;
 }
 
+static int bcm54616s_config_aneg(struct phy_device *phydev)
+{
+	int ret;
+
+	/* Aneg firsly. */
+	ret = genphy_config_aneg(phydev);
+
+	/* Then we can set up the delay. */
+	bcm54xx_config_clock_delay(phydev);
+
+	return ret;
+}
+
 static int brcm_phy_setbits(struct phy_device *phydev, int reg, int set)
 {
 	int val;
@@ -636,6 +649,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.features	= PHY_GBIT_FEATURES,
 	.flags		= PHY_HAS_INTERRUPT,
 	.config_init	= bcm54xx_config_init,
+	.config_aneg	= bcm54616s_config_aneg,
 	.ack_interrupt	= bcm_phy_ack_intr,
 	.config_intr	= bcm_phy_config_intr,
 }, {
-- 
GitLab


From d52888aa2753e3063a9d3a0c9f72f94aa9809c15 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Fri, 26 Oct 2018 15:28:54 +0300
Subject: [PATCH 0719/1890] x86/mm: Move LDT remap out of KASLR region on
 5-level paging

On 5-level paging the LDT remap area is placed in the middle of the KASLR
randomization region and it can overlap with the direct mapping, the
vmalloc or the vmap area.

The LDT mapping is per mm, so it cannot be moved into the P4D page table
next to the CPU_ENTRY_AREA without complicating PGD table allocation for
5-level paging.

The 4 PGD slot gap just before the direct mapping is reserved for
hypervisors, so it cannot be used.

Move the direct mapping one slot deeper and use the resulting gap for the
LDT remap area. The resulting layout is the same for 4 and 5 level paging.

[ tglx: Massaged changelog ]

Fixes: f55f0501cbf6 ("x86/pti: Put the LDT in its own PGD if PTI is on")
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Cc: bp@alien8.de
Cc: hpa@zytor.com
Cc: dave.hansen@linux.intel.com
Cc: peterz@infradead.org
Cc: boris.ostrovsky@oracle.com
Cc: jgross@suse.com
Cc: bhe@redhat.com
Cc: willy@infradead.org
Cc: linux-mm@kvack.org
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20181026122856.66224-2-kirill.shutemov@linux.intel.com
---
 Documentation/x86/x86_64/mm.txt         | 34 +++++++++++++------------
 arch/x86/include/asm/page_64_types.h    | 12 +++++----
 arch/x86/include/asm/pgtable_64_types.h |  4 +--
 arch/x86/xen/mmu_pv.c                   |  6 ++---
 4 files changed, 29 insertions(+), 27 deletions(-)

diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
index 73aaaa3da4369..804f9426ed17b 100644
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -34,23 +34,24 @@ __________________|____________|__________________|_________|___________________
 ____________________________________________________________|___________________________________________________________
                   |            |                  |         |
  ffff800000000000 | -128    TB | ffff87ffffffffff |    8 TB | ... guard hole, also reserved for hypervisor
- ffff880000000000 | -120    TB | ffffc7ffffffffff |   64 TB | direct mapping of all physical memory (page_offset_base)
- ffffc80000000000 |  -56    TB | ffffc8ffffffffff |    1 TB | ... unused hole
+ ffff880000000000 | -120    TB | ffff887fffffffff |  0.5 TB | LDT remap for PTI
+ ffff888000000000 | -119.5  TB | ffffc87fffffffff |   64 TB | direct mapping of all physical memory (page_offset_base)
+ ffffc88000000000 |  -55.5  TB | ffffc8ffffffffff |  0.5 TB | ... unused hole
  ffffc90000000000 |  -55    TB | ffffe8ffffffffff |   32 TB | vmalloc/ioremap space (vmalloc_base)
  ffffe90000000000 |  -23    TB | ffffe9ffffffffff |    1 TB | ... unused hole
  ffffea0000000000 |  -22    TB | ffffeaffffffffff |    1 TB | virtual memory map (vmemmap_base)
  ffffeb0000000000 |  -21    TB | ffffebffffffffff |    1 TB | ... unused hole
  ffffec0000000000 |  -20    TB | fffffbffffffffff |   16 TB | KASAN shadow memory
- fffffc0000000000 |   -4    TB | fffffdffffffffff |    2 TB | ... unused hole
-                  |            |                  |         | vaddr_end for KASLR
- fffffe0000000000 |   -2    TB | fffffe7fffffffff |  0.5 TB | cpu_entry_area mapping
- fffffe8000000000 |   -1.5  TB | fffffeffffffffff |  0.5 TB | LDT remap for PTI
- ffffff0000000000 |   -1    TB | ffffff7fffffffff |  0.5 TB | %esp fixup stacks
 __________________|____________|__________________|_________|____________________________________________________________
                                                             |
-                                                            | Identical layout to the 47-bit one from here on:
+                                                            | Identical layout to the 56-bit one from here on:
 ____________________________________________________________|____________________________________________________________
                   |            |                  |         |
+ fffffc0000000000 |   -4    TB | fffffdffffffffff |    2 TB | ... unused hole
+                  |            |                  |         | vaddr_end for KASLR
+ fffffe0000000000 |   -2    TB | fffffe7fffffffff |  0.5 TB | cpu_entry_area mapping
+ fffffe8000000000 |   -1.5  TB | fffffeffffffffff |  0.5 TB | ... unused hole
+ ffffff0000000000 |   -1    TB | ffffff7fffffffff |  0.5 TB | %esp fixup stacks
  ffffff8000000000 | -512    GB | ffffffeeffffffff |  444 GB | ... unused hole
  ffffffef00000000 |  -68    GB | fffffffeffffffff |   64 GB | EFI region mapping space
  ffffffff00000000 |   -4    GB | ffffffff7fffffff |    2 GB | ... unused hole
@@ -83,7 +84,7 @@ Notes:
 __________________|____________|__________________|_________|___________________________________________________________
                   |            |                  |         |
  0000800000000000 |  +64    PB | ffff7fffffffffff | ~16K PB | ... huge, still almost 64 bits wide hole of non-canonical
-                  |            |                  |         |     virtual memory addresses up to the -128 TB
+                  |            |                  |         |     virtual memory addresses up to the -64 PB
                   |            |                  |         |     starting offset of kernel mappings.
 __________________|____________|__________________|_________|___________________________________________________________
                                                             |
@@ -91,23 +92,24 @@ __________________|____________|__________________|_________|___________________
 ____________________________________________________________|___________________________________________________________
                   |            |                  |         |
  ff00000000000000 |  -64    PB | ff0fffffffffffff |    4 PB | ... guard hole, also reserved for hypervisor
- ff10000000000000 |  -60    PB | ff8fffffffffffff |   32 PB | direct mapping of all physical memory (page_offset_base)
- ff90000000000000 |  -28    PB | ff9fffffffffffff |    4 PB | LDT remap for PTI
+ ff10000000000000 |  -60    PB | ff10ffffffffffff | 0.25 PB | LDT remap for PTI
+ ff11000000000000 |  -59.75 PB | ff90ffffffffffff |   32 PB | direct mapping of all physical memory (page_offset_base)
+ ff91000000000000 |  -27.75 PB | ff9fffffffffffff | 3.75 PB | ... unused hole
  ffa0000000000000 |  -24    PB | ffd1ffffffffffff | 12.5 PB | vmalloc/ioremap space (vmalloc_base)
  ffd2000000000000 |  -11.5  PB | ffd3ffffffffffff |  0.5 PB | ... unused hole
  ffd4000000000000 |  -11    PB | ffd5ffffffffffff |  0.5 PB | virtual memory map (vmemmap_base)
  ffd6000000000000 |  -10.5  PB | ffdeffffffffffff | 2.25 PB | ... unused hole
  ffdf000000000000 |   -8.25 PB | fffffdffffffffff |   ~8 PB | KASAN shadow memory
- fffffc0000000000 |   -4    TB | fffffdffffffffff |    2 TB | ... unused hole
-                  |            |                  |         | vaddr_end for KASLR
- fffffe0000000000 |   -2    TB | fffffe7fffffffff |  0.5 TB | cpu_entry_area mapping
- fffffe8000000000 |   -1.5  TB | fffffeffffffffff |  0.5 TB | ... unused hole
- ffffff0000000000 |   -1    TB | ffffff7fffffffff |  0.5 TB | %esp fixup stacks
 __________________|____________|__________________|_________|____________________________________________________________
                                                             |
                                                             | Identical layout to the 47-bit one from here on:
 ____________________________________________________________|____________________________________________________________
                   |            |                  |         |
+ fffffc0000000000 |   -4    TB | fffffdffffffffff |    2 TB | ... unused hole
+                  |            |                  |         | vaddr_end for KASLR
+ fffffe0000000000 |   -2    TB | fffffe7fffffffff |  0.5 TB | cpu_entry_area mapping
+ fffffe8000000000 |   -1.5  TB | fffffeffffffffff |  0.5 TB | ... unused hole
+ ffffff0000000000 |   -1    TB | ffffff7fffffffff |  0.5 TB | %esp fixup stacks
  ffffff8000000000 | -512    GB | ffffffeeffffffff |  444 GB | ... unused hole
  ffffffef00000000 |  -68    GB | fffffffeffffffff |   64 GB | EFI region mapping space
  ffffffff00000000 |   -4    GB | ffffffff7fffffff |    2 GB | ... unused hole
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index cd0cf1c568b4c..8f657286d599a 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -33,12 +33,14 @@
 
 /*
  * Set __PAGE_OFFSET to the most negative possible address +
- * PGDIR_SIZE*16 (pgd slot 272).  The gap is to allow a space for a
- * hypervisor to fit.  Choosing 16 slots here is arbitrary, but it's
- * what Xen requires.
+ * PGDIR_SIZE*17 (pgd slot 273).
+ *
+ * The gap is to allow a space for LDT remap for PTI (1 pgd slot) and space for
+ * a hypervisor (16 slots). Choosing 16 slots for a hypervisor is arbitrary,
+ * but it's what Xen requires.
  */
-#define __PAGE_OFFSET_BASE_L5	_AC(0xff10000000000000, UL)
-#define __PAGE_OFFSET_BASE_L4	_AC(0xffff880000000000, UL)
+#define __PAGE_OFFSET_BASE_L5	_AC(0xff11000000000000, UL)
+#define __PAGE_OFFSET_BASE_L4	_AC(0xffff888000000000, UL)
 
 #ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
 #define __PAGE_OFFSET           page_offset_base
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 04edd2d58211a..84bd9bdc1987f 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -111,9 +111,7 @@ extern unsigned int ptrs_per_p4d;
  */
 #define MAXMEM			(1UL << MAX_PHYSMEM_BITS)
 
-#define LDT_PGD_ENTRY_L4	-3UL
-#define LDT_PGD_ENTRY_L5	-112UL
-#define LDT_PGD_ENTRY		(pgtable_l5_enabled() ? LDT_PGD_ENTRY_L5 : LDT_PGD_ENTRY_L4)
+#define LDT_PGD_ENTRY		-240UL
 #define LDT_BASE_ADDR		(LDT_PGD_ENTRY << PGDIR_SHIFT)
 #define LDT_END_ADDR		(LDT_BASE_ADDR + PGDIR_SIZE)
 
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 0d7b3ae4960bb..a5d7ed1253370 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1905,7 +1905,7 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
 	init_top_pgt[0] = __pgd(0);
 
 	/* Pre-constructed entries are in pfn, so convert to mfn */
-	/* L4[272] -> level3_ident_pgt  */
+	/* L4[273] -> level3_ident_pgt  */
 	/* L4[511] -> level3_kernel_pgt */
 	convert_pfn_mfn(init_top_pgt);
 
@@ -1925,8 +1925,8 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
 	addr[0] = (unsigned long)pgd;
 	addr[1] = (unsigned long)l3;
 	addr[2] = (unsigned long)l2;
-	/* Graft it onto L4[272][0]. Note that we creating an aliasing problem:
-	 * Both L4[272][0] and L4[511][510] have entries that point to the same
+	/* Graft it onto L4[273][0]. Note that we creating an aliasing problem:
+	 * Both L4[273][0] and L4[511][510] have entries that point to the same
 	 * L2 (PMD) tables. Meaning that if you modify it in __va space
 	 * it will be also modified in the __ka space! (But if you just
 	 * modify the PMD table to point to other PTE's or none, then you
-- 
GitLab


From a0e6e0831c516860fc7f9be1db6c081fe902ebcf Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Fri, 26 Oct 2018 15:28:55 +0300
Subject: [PATCH 0720/1890] x86/ldt: Unmap PTEs for the slot before freeing LDT
 pages

modify_ldt(2) leaves the old LDT mapped after switching over to the new
one. The old LDT gets freed and the pages can be re-used.

Leaving the mapping in place can have security implications. The mapping is
present in the userspace page tables and Meltdown-like attacks can read
these freed and possibly reused pages.

It's relatively simple to fix: unmap the old LDT and flush TLB before
freeing the old LDT memory.

This further allows to avoid flushing the TLB in map_ldt_struct() as the
slot is unmapped and flushed by unmap_ldt_struct() or has never been mapped
at all.

[ tglx: Massaged changelog and removed the needless line breaks ]

Fixes: f55f0501cbf6 ("x86/pti: Put the LDT in its own PGD if PTI is on")
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: bp@alien8.de
Cc: hpa@zytor.com
Cc: dave.hansen@linux.intel.com
Cc: luto@kernel.org
Cc: peterz@infradead.org
Cc: boris.ostrovsky@oracle.com
Cc: jgross@suse.com
Cc: bhe@redhat.com
Cc: willy@infradead.org
Cc: linux-mm@kvack.org
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20181026122856.66224-3-kirill.shutemov@linux.intel.com
---
 arch/x86/kernel/ldt.c | 51 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 38 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index ab18e0884dc6f..18e4525c5933e 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -199,14 +199,6 @@ static void sanity_check_ldt_mapping(struct mm_struct *mm)
 /*
  * If PTI is enabled, this maps the LDT into the kernelmode and
  * usermode tables for the given mm.
- *
- * There is no corresponding unmap function.  Even if the LDT is freed, we
- * leave the PTEs around until the slot is reused or the mm is destroyed.
- * This is harmless: the LDT is always in ordinary memory, and no one will
- * access the freed slot.
- *
- * If we wanted to unmap freed LDTs, we'd also need to do a flush to make
- * it useful, and the flush would slow down modify_ldt().
  */
 static int
 map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
@@ -214,8 +206,8 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
 	unsigned long va;
 	bool is_vmalloc;
 	spinlock_t *ptl;
+	int i, nr_pages;
 	pgd_t *pgd;
-	int i;
 
 	if (!static_cpu_has(X86_FEATURE_PTI))
 		return 0;
@@ -238,7 +230,9 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
 
 	is_vmalloc = is_vmalloc_addr(ldt->entries);
 
-	for (i = 0; i * PAGE_SIZE < ldt->nr_entries * LDT_ENTRY_SIZE; i++) {
+	nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE);
+
+	for (i = 0; i < nr_pages; i++) {
 		unsigned long offset = i << PAGE_SHIFT;
 		const void *src = (char *)ldt->entries + offset;
 		unsigned long pfn;
@@ -272,13 +266,39 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
 	/* Propagate LDT mapping to the user page-table */
 	map_ldt_struct_to_user(mm);
 
-	va = (unsigned long)ldt_slot_va(slot);
-	flush_tlb_mm_range(mm, va, va + LDT_SLOT_STRIDE, PAGE_SHIFT, false);
-
 	ldt->slot = slot;
 	return 0;
 }
 
+static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt)
+{
+	unsigned long va;
+	int i, nr_pages;
+
+	if (!ldt)
+		return;
+
+	/* LDT map/unmap is only required for PTI */
+	if (!static_cpu_has(X86_FEATURE_PTI))
+		return;
+
+	nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE);
+
+	for (i = 0; i < nr_pages; i++) {
+		unsigned long offset = i << PAGE_SHIFT;
+		spinlock_t *ptl;
+		pte_t *ptep;
+
+		va = (unsigned long)ldt_slot_va(ldt->slot) + offset;
+		ptep = get_locked_pte(mm, va, &ptl);
+		pte_clear(mm, va, ptep);
+		pte_unmap_unlock(ptep, ptl);
+	}
+
+	va = (unsigned long)ldt_slot_va(ldt->slot);
+	flush_tlb_mm_range(mm, va, va + nr_pages * PAGE_SIZE, PAGE_SHIFT, false);
+}
+
 #else /* !CONFIG_PAGE_TABLE_ISOLATION */
 
 static int
@@ -286,6 +306,10 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
 {
 	return 0;
 }
+
+static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt)
+{
+}
 #endif /* CONFIG_PAGE_TABLE_ISOLATION */
 
 static void free_ldt_pgtables(struct mm_struct *mm)
@@ -524,6 +548,7 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
 	}
 
 	install_ldt(mm, new_ldt);
+	unmap_ldt_struct(mm, old_ldt);
 	free_ldt_struct(old_ldt);
 	error = 0;
 
-- 
GitLab


From b082f2dd80612015cd6d9d84e52099734ec9a0e1 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Fri, 26 Oct 2018 15:28:56 +0300
Subject: [PATCH 0721/1890] x86/ldt: Remove unused variable in map_ldt_struct()

Splitting out the sanity check in map_ldt_struct() moved page table syncing
into a separate function, which made the pgd variable unused. Remove it.

[ tglx: Massaged changelog ]

Fixes: 9bae3197e15d ("x86/ldt: Split out sanity check in map_ldt_struct()")
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Cc: bp@alien8.de
Cc: hpa@zytor.com
Cc: dave.hansen@linux.intel.com
Cc: peterz@infradead.org
Cc: boris.ostrovsky@oracle.com
Cc: jgross@suse.com
Cc: bhe@redhat.com
Cc: willy@infradead.org
Cc: linux-mm@kvack.org
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20181026122856.66224-4-kirill.shutemov@linux.intel.com
---
 arch/x86/kernel/ldt.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 18e4525c5933e..6135ae8ce0364 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -207,7 +207,6 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
 	bool is_vmalloc;
 	spinlock_t *ptl;
 	int i, nr_pages;
-	pgd_t *pgd;
 
 	if (!static_cpu_has(X86_FEATURE_PTI))
 		return 0;
@@ -221,13 +220,6 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
 	/* Check if the current mappings are sane */
 	sanity_check_ldt_mapping(mm);
 
-	/*
-	 * Did we already have the top level entry allocated?  We can't
-	 * use pgd_none() for this because it doens't do anything on
-	 * 4-level page table kernels.
-	 */
-	pgd = pgd_offset(mm, LDT_BASE_ADDR);
-
 	is_vmalloc = is_vmalloc_addr(ldt->entries);
 
 	nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE);
-- 
GitLab


From a48777fdda7d13179979a889e1fb87655a783cc0 Mon Sep 17 00:00:00 2001
From: Eial Czerwacki <eial@scalemp.com>
Date: Mon, 5 Nov 2018 19:31:54 +0200
Subject: [PATCH 0722/1890] x86/vsmp: Remove dependency on pv_irq_ops

vSMP dependency on pv_irq_ops has been removed some years ago, but the code
still deals with pv_irq_ops.

In short, "cap & ctl & (1 << 4)" is always returning 0, so all
PARAVIRT/PARAVIRT_XXL code related to that can be removed.

However, the rest of the code depends on CONFIG_PCI, so fix it accordingly.

Rename set_vsmp_pv_ops to set_vsmp_ctl as the original name does not make
sense anymore.

Signed-off-by: Eial Czerwacki <eial@scalemp.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Shai Fultheim <shai@scalemp.com>
Cc: Juergen Gross <jgross@suse.com>
Link: https://lkml.kernel.org/r/1541439114-28297-1-git-send-email-eial@scalemp.com
---
 arch/x86/Kconfig          |  1 -
 arch/x86/kernel/vsmp_64.c | 84 ++++-----------------------------------
 2 files changed, 7 insertions(+), 78 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ba7e3464ee923..9d734f3c8234d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -525,7 +525,6 @@ config X86_VSMP
 	bool "ScaleMP vSMP"
 	select HYPERVISOR_GUEST
 	select PARAVIRT
-	select PARAVIRT_XXL
 	depends on X86_64 && PCI
 	depends on X86_EXTENDED_PLATFORM
 	depends on SMP
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index 1eae5af491c27..891a75dbc1313 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -26,65 +26,8 @@
 
 #define TOPOLOGY_REGISTER_OFFSET 0x10
 
-#if defined CONFIG_PCI && defined CONFIG_PARAVIRT_XXL
-/*
- * Interrupt control on vSMPowered systems:
- * ~AC is a shadow of IF.  If IF is 'on' AC should be 'off'
- * and vice versa.
- */
-
-asmlinkage __visible unsigned long vsmp_save_fl(void)
-{
-	unsigned long flags = native_save_fl();
-
-	if (!(flags & X86_EFLAGS_IF) || (flags & X86_EFLAGS_AC))
-		flags &= ~X86_EFLAGS_IF;
-	return flags;
-}
-PV_CALLEE_SAVE_REGS_THUNK(vsmp_save_fl);
-
-__visible void vsmp_restore_fl(unsigned long flags)
-{
-	if (flags & X86_EFLAGS_IF)
-		flags &= ~X86_EFLAGS_AC;
-	else
-		flags |= X86_EFLAGS_AC;
-	native_restore_fl(flags);
-}
-PV_CALLEE_SAVE_REGS_THUNK(vsmp_restore_fl);
-
-asmlinkage __visible void vsmp_irq_disable(void)
-{
-	unsigned long flags = native_save_fl();
-
-	native_restore_fl((flags & ~X86_EFLAGS_IF) | X86_EFLAGS_AC);
-}
-PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_disable);
-
-asmlinkage __visible void vsmp_irq_enable(void)
-{
-	unsigned long flags = native_save_fl();
-
-	native_restore_fl((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC));
-}
-PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_enable);
-
-static unsigned __init vsmp_patch(u8 type, void *ibuf,
-				  unsigned long addr, unsigned len)
-{
-	switch (type) {
-	case PARAVIRT_PATCH(irq.irq_enable):
-	case PARAVIRT_PATCH(irq.irq_disable):
-	case PARAVIRT_PATCH(irq.save_fl):
-	case PARAVIRT_PATCH(irq.restore_fl):
-		return paravirt_patch_default(type, ibuf, addr, len);
-	default:
-		return native_patch(type, ibuf, addr, len);
-	}
-
-}
-
-static void __init set_vsmp_pv_ops(void)
+#ifdef CONFIG_PCI
+static void __init set_vsmp_ctl(void)
 {
 	void __iomem *address;
 	unsigned int cap, ctl, cfg;
@@ -109,28 +52,12 @@ static void __init set_vsmp_pv_ops(void)
 	}
 #endif
 
-	if (cap & ctl & (1 << 4)) {
-		/* Setup irq ops and turn on vSMP  IRQ fastpath handling */
-		pv_ops.irq.irq_disable = PV_CALLEE_SAVE(vsmp_irq_disable);
-		pv_ops.irq.irq_enable = PV_CALLEE_SAVE(vsmp_irq_enable);
-		pv_ops.irq.save_fl = PV_CALLEE_SAVE(vsmp_save_fl);
-		pv_ops.irq.restore_fl = PV_CALLEE_SAVE(vsmp_restore_fl);
-		pv_ops.init.patch = vsmp_patch;
-		ctl &= ~(1 << 4);
-	}
 	writel(ctl, address + 4);
 	ctl = readl(address + 4);
 	pr_info("vSMP CTL: control set to:0x%08x\n", ctl);
 
 	early_iounmap(address, 8);
 }
-#else
-static void __init set_vsmp_pv_ops(void)
-{
-}
-#endif
-
-#ifdef CONFIG_PCI
 static int is_vsmp = -1;
 
 static void __init detect_vsmp_box(void)
@@ -164,11 +91,14 @@ static int is_vsmp_box(void)
 {
 	return 0;
 }
+static void __init set_vsmp_ctl(void)
+{
+}
 #endif
 
 static void __init vsmp_cap_cpus(void)
 {
-#if !defined(CONFIG_X86_VSMP) && defined(CONFIG_SMP)
+#if !defined(CONFIG_X86_VSMP) && defined(CONFIG_SMP) && defined(CONFIG_PCI)
 	void __iomem *address;
 	unsigned int cfg, topology, node_shift, maxcpus;
 
@@ -221,6 +151,6 @@ void __init vsmp_init(void)
 
 	vsmp_cap_cpus();
 
-	set_vsmp_pv_ops();
+	set_vsmp_ctl();
 	return;
 }
-- 
GitLab


From 86a484bda787d542d4a968bd7742bcf844c8adb2 Mon Sep 17 00:00:00 2001
From: Leo Li <sunpeng.li@amd.com>
Date: Tue, 30 Oct 2018 15:09:08 -0400
Subject: [PATCH 0723/1890] drm/amd: Update atom_smu_info_v3_3 structure

Mainly adding the WAFL spread spectrum info, for adjusting display
clocks when XGMI is enabled.

Signed-off-by: Leo Li <sunpeng.li@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/include/atomfirmware.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h
index d2e7c0fa96c2f..8eb0bb241210b 100644
--- a/drivers/gpu/drm/amd/include/atomfirmware.h
+++ b/drivers/gpu/drm/amd/include/atomfirmware.h
@@ -1325,7 +1325,7 @@ struct atom_smu_info_v3_3 {
   struct   atom_common_table_header  table_header;
   uint8_t  smuip_min_ver;
   uint8_t  smuip_max_ver;
-  uint8_t  smu_rsd1;
+  uint8_t  waflclk_ss_mode;
   uint8_t  gpuclk_ss_mode;
   uint16_t sclk_ss_percentage;
   uint16_t sclk_ss_rate_10hz;
@@ -1355,7 +1355,10 @@ struct atom_smu_info_v3_3 {
   uint32_t syspll3_1_vco_freq_10khz;
   uint32_t bootup_fclk_10khz;
   uint32_t bootup_waflclk_10khz;
-  uint32_t reserved[3];
+  uint32_t smu_info_caps;
+  uint16_t waflclk_ss_percentage;    // in unit of 0.001%
+  uint16_t smuinitoffset;
+  uint32_t reserved;
 };
 
 /*
-- 
GitLab


From ce317dd9f809c8da9656c88761e30f0a82a8c2e6 Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Fri, 26 Oct 2018 10:40:50 -0700
Subject: [PATCH 0724/1890] ice: Set carrier state and start/stop queues in
 rebuild

Set the carrier state post rebuild by querying the link status. Also
start/stop queues based on link status.

Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_main.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 05993451147a0..6d31ffb649405 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -3296,7 +3296,7 @@ static void ice_rebuild(struct ice_pf *pf)
 	struct device *dev = &pf->pdev->dev;
 	struct ice_hw *hw = &pf->hw;
 	enum ice_status ret;
-	int err;
+	int err, i;
 
 	if (test_bit(__ICE_DOWN, pf->state))
 		goto clear_recovery;
@@ -3370,6 +3370,22 @@ static void ice_rebuild(struct ice_pf *pf)
 	}
 
 	ice_reset_all_vfs(pf, true);
+
+	for (i = 0; i < pf->num_alloc_vsi; i++) {
+		bool link_up;
+
+		if (!pf->vsi[i] || pf->vsi[i]->type != ICE_VSI_PF)
+			continue;
+		ice_get_link_status(pf->vsi[i]->port_info, &link_up);
+		if (link_up) {
+			netif_carrier_on(pf->vsi[i]->netdev);
+			netif_tx_wake_all_queues(pf->vsi[i]->netdev);
+		} else {
+			netif_carrier_off(pf->vsi[i]->netdev);
+			netif_tx_stop_all_queues(pf->vsi[i]->netdev);
+		}
+	}
+
 	/* if we get here, reset flow is successful */
 	clear_bit(__ICE_RESET_FAILED, pf->state);
 	return;
-- 
GitLab


From afd9d4ab58db20029a75cf82f23b6a5641cd7d6f Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Fri, 26 Oct 2018 10:40:51 -0700
Subject: [PATCH 0725/1890] ice: Check for reset in progress during remove

The remove path does not currently check to see if a
reset is in progress before proceeding.  This can cause
a resource collision resulting in various types of errors.

Check for reset in progress and wait for a reasonable
amount of time before allowing the remove to progress.

Signed-off-by: Dave Ertman <david.m.ertman@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice.h      | 2 ++
 drivers/net/ethernet/intel/ice/ice_main.c | 6 ++++++
 2 files changed, 8 insertions(+)

diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 4c4b5717a627d..e5b37fa608844 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -76,6 +76,8 @@ extern const char ice_drv_ver[];
 #define ICE_MIN_INTR_PER_VF		(ICE_MIN_QS_PER_VF + 1)
 #define ICE_DFLT_INTR_PER_VF		(ICE_DFLT_QS_PER_VF + 1)
 
+#define ICE_MAX_RESET_WAIT		20
+
 #define ICE_VSIQF_HKEY_ARRAY_SIZE	((VSIQF_HKEY_MAX_INDEX + 1) *	4)
 
 #define ICE_DFLT_NETIF_M (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 6d31ffb649405..aee22f11a41aa 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -2182,6 +2182,12 @@ static void ice_remove(struct pci_dev *pdev)
 	if (!pf)
 		return;
 
+	for (i = 0; i < ICE_MAX_RESET_WAIT; i++) {
+		if (!ice_is_reset_in_progress(pf->state))
+			break;
+		msleep(100);
+	}
+
 	set_bit(__ICE_DOWN, pf->state);
 	ice_service_task_stop(pf);
 
-- 
GitLab


From 0f5d4c21a50716f8bd4e220544b82dca7408d113 Mon Sep 17 00:00:00 2001
From: Akeem G Abodunrin <akeem.g.abodunrin@intel.com>
Date: Fri, 26 Oct 2018 10:40:52 -0700
Subject: [PATCH 0726/1890] ice: Fix dead device link issue with flow control

Setting Rx or Tx pause parameter currently results in link loss on the
interface, requiring the platform/host to be cold power cycled. Fix it.

Signed-off-by: Akeem G Abodunrin <akeem.g.abodunrin@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_ethtool.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 96923580f2a6c..648acdb4c644b 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -1517,10 +1517,15 @@ ice_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause)
 	}
 
 	if (!test_bit(__ICE_DOWN, pf->state)) {
-		/* Give it a little more time to try to come back */
+		/* Give it a little more time to try to come back. If still
+		 * down, restart autoneg link or reinitialize the interface.
+		 */
 		msleep(75);
 		if (!test_bit(__ICE_DOWN, pf->state))
 			return ice_nway_reset(netdev);
+
+		ice_down(vsi);
+		ice_up(vsi);
 	}
 
 	return err;
-- 
GitLab


From 33e055fcc26909b1d66b5d1f334aee38356068d7 Mon Sep 17 00:00:00 2001
From: Victor Raj <victor.raj@intel.com>
Date: Fri, 26 Oct 2018 10:40:53 -0700
Subject: [PATCH 0727/1890] ice: Free VSI contexts during for unload

In the unload path, all VSIs are freed. Also free the related VSI
contexts to prevent memory leaks.

Signed-off-by: Victor Raj <victor.raj@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_common.c |  3 +++
 drivers/net/ethernet/intel/ice/ice_switch.c | 12 ++++++++++++
 drivers/net/ethernet/intel/ice/ice_switch.h |  2 ++
 3 files changed, 17 insertions(+)

diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 8cd6a2401fd9f..554fd707a6d69 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -811,6 +811,9 @@ void ice_deinit_hw(struct ice_hw *hw)
 	/* Attempt to disable FW logging before shutting down control queues */
 	ice_cfg_fw_log(hw, false);
 	ice_shutdown_all_ctrlq(hw);
+
+	/* Clear VSI contexts if not already cleared */
+	ice_clear_all_vsi_ctx(hw);
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
index 33403f39f1b3f..40c9c65589568 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.c
+++ b/drivers/net/ethernet/intel/ice/ice_switch.c
@@ -347,6 +347,18 @@ static void ice_clear_vsi_ctx(struct ice_hw *hw, u16 vsi_handle)
 	}
 }
 
+/**
+ * ice_clear_all_vsi_ctx - clear all the VSI context entries
+ * @hw: pointer to the hw struct
+ */
+void ice_clear_all_vsi_ctx(struct ice_hw *hw)
+{
+	u16 i;
+
+	for (i = 0; i < ICE_MAX_VSI; i++)
+		ice_clear_vsi_ctx(hw, i);
+}
+
 /**
  * ice_add_vsi - add VSI context to the hardware and VSI handle list
  * @hw: pointer to the hw struct
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h
index b88d96a1ef693..d5ef0bd58bf97 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.h
+++ b/drivers/net/ethernet/intel/ice/ice_switch.h
@@ -190,6 +190,8 @@ ice_update_vsi(struct ice_hw *hw, u16 vsi_handle, struct ice_vsi_ctx *vsi_ctx,
 	       struct ice_sq_cd *cd);
 bool ice_is_vsi_valid(struct ice_hw *hw, u16 vsi_handle);
 struct ice_vsi_ctx *ice_get_vsi_ctx(struct ice_hw *hw, u16 vsi_handle);
+void ice_clear_all_vsi_ctx(struct ice_hw *hw);
+/* Switch config */
 enum ice_status ice_get_initial_sw_cfg(struct ice_hw *hw);
 
 /* Switch/bridge related commands */
-- 
GitLab


From 9ecd25c26810a61b9c3abc6c73de32dca6da96e1 Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Fri, 26 Oct 2018 10:40:54 -0700
Subject: [PATCH 0728/1890] ice: Remove duplicate addition of VLANs in replay
 path

ice_restore_vlan and active_vlans were originally put in place to
reprogram VLAN filters in the replay path. This is now done as part
of the much broader VSI rebuild/replay framework. So remove both
ice_restore_vlan and active_vlans

Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice.h          |  1 -
 drivers/net/ethernet/intel/ice/ice_main.c     | 42 +++----------------
 .../net/ethernet/intel/ice/ice_virtchnl_pf.c  |  2 -
 3 files changed, 6 insertions(+), 39 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index e5b37fa608844..1639e955f1580 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -191,7 +191,6 @@ struct ice_vsi {
 	u64 tx_linearize;
 	DECLARE_BITMAP(state, __ICE_STATE_NBITS);
 	DECLARE_BITMAP(flags, ICE_VSI_FLAG_NBITS);
-	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
 	unsigned int current_netdev_flags;
 	u32 tx_restart;
 	u32 tx_busy;
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index aee22f11a41aa..338abb1b92338 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -1622,7 +1622,6 @@ static int ice_vlan_rx_add_vid(struct net_device *netdev,
 {
 	struct ice_netdev_priv *np = netdev_priv(netdev);
 	struct ice_vsi *vsi = np->vsi;
-	int ret;
 
 	if (vid >= VLAN_N_VID) {
 		netdev_err(netdev, "VLAN id requested %d is out of range %d\n",
@@ -1635,7 +1634,8 @@ static int ice_vlan_rx_add_vid(struct net_device *netdev,
 
 	/* Enable VLAN pruning when VLAN 0 is added */
 	if (unlikely(!vid)) {
-		ret = ice_cfg_vlan_pruning(vsi, true);
+		int ret = ice_cfg_vlan_pruning(vsi, true);
+
 		if (ret)
 			return ret;
 	}
@@ -1644,12 +1644,7 @@ static int ice_vlan_rx_add_vid(struct net_device *netdev,
 	 * needed to continue allowing all untagged packets since VLAN prune
 	 * list is applied to all packets by the switch
 	 */
-	ret = ice_vsi_add_vlan(vsi, vid);
-
-	if (!ret)
-		set_bit(vid, vsi->active_vlans);
-
-	return ret;
+	return ice_vsi_add_vlan(vsi, vid);
 }
 
 /**
@@ -1677,8 +1672,6 @@ static int ice_vlan_rx_kill_vid(struct net_device *netdev,
 	if (status)
 		return status;
 
-	clear_bit(vid, vsi->active_vlans);
-
 	/* Disable VLAN pruning when VLAN 0 is removed */
 	if (unlikely(!vid))
 		status = ice_cfg_vlan_pruning(vsi, false);
@@ -2515,31 +2508,6 @@ static int ice_vsi_vlan_setup(struct ice_vsi *vsi)
 	return ret;
 }
 
-/**
- * ice_restore_vlan - Reinstate VLANs when vsi/netdev comes back up
- * @vsi: the VSI being brought back up
- */
-static int ice_restore_vlan(struct ice_vsi *vsi)
-{
-	int err;
-	u16 vid;
-
-	if (!vsi->netdev)
-		return -EINVAL;
-
-	err = ice_vsi_vlan_setup(vsi);
-	if (err)
-		return err;
-
-	for_each_set_bit(vid, vsi->active_vlans, VLAN_N_VID) {
-		err = ice_vlan_rx_add_vid(vsi->netdev, htons(ETH_P_8021Q), vid);
-		if (err)
-			break;
-	}
-
-	return err;
-}
-
 /**
  * ice_vsi_cfg - Setup the VSI
  * @vsi: the VSI being configured
@@ -2552,7 +2520,9 @@ static int ice_vsi_cfg(struct ice_vsi *vsi)
 
 	if (vsi->netdev) {
 		ice_set_rx_mode(vsi->netdev);
-		err = ice_restore_vlan(vsi);
+
+		err = ice_vsi_vlan_setup(vsi);
+
 		if (err)
 			return err;
 	}
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 45f10f8f01dc1..9576b958622b3 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -2171,7 +2171,6 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
 
 			if (!ice_vsi_add_vlan(vsi, vid)) {
 				vf->num_vlan++;
-				set_bit(vid, vsi->active_vlans);
 
 				/* Enable VLAN pruning when VLAN 0 is added */
 				if (unlikely(!vid))
@@ -2190,7 +2189,6 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
 			 */
 			if (!ice_vsi_kill_vlan(vsi, vid)) {
 				vf->num_vlan--;
-				clear_bit(vid, vsi->active_vlans);
 
 				/* Disable VLAN pruning when removing VLAN 0 */
 				if (unlikely(!vid))
-- 
GitLab


From 58297dd133f64ea028e7d52dd00cd9ff8aa4479f Mon Sep 17 00:00:00 2001
From: Md Fahad Iqbal Polash <md.fahad.iqbal.polash@intel.com>
Date: Fri, 26 Oct 2018 10:40:55 -0700
Subject: [PATCH 0729/1890] ice: Fix flags for port VLAN

According to the spec, whenever insert PVID field is set, the VLAN
driver insertion mode should be set to 01b which isn't done currently.
Fix it.

Signed-off-by: Md Fahad Iqbal Polash <md.fahad.iqbal.polash@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 9576b958622b3..e71065f9d3918 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -348,7 +348,7 @@ static int ice_vsi_set_pvid(struct ice_vsi *vsi, u16 vid)
 	struct ice_vsi_ctx ctxt = { 0 };
 	enum ice_status status;
 
-	ctxt.info.vlan_flags = ICE_AQ_VSI_VLAN_MODE_TAGGED |
+	ctxt.info.vlan_flags = ICE_AQ_VSI_VLAN_MODE_UNTAGGED |
 			       ICE_AQ_VSI_PVLAN_INSERT_PVID |
 			       ICE_AQ_VSI_VLAN_EMOD_STR;
 	ctxt.info.pvid = cpu_to_le16(vid);
-- 
GitLab


From 31082519c11b01fe1fb6dd512055f252812c1508 Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Fri, 26 Oct 2018 10:40:56 -0700
Subject: [PATCH 0730/1890] ice: Fix typo in error message

Print should say "Enabling" instead of "Enaabling"

Signed-off-by: Akeem G Abodunrin <akeem.g.abodunrin@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_lib.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 5bacad01f0c9c..c604a44c8cfbd 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -1997,7 +1997,7 @@ int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena)
 	status = ice_update_vsi(&vsi->back->hw, vsi->idx, ctxt, NULL);
 	if (status) {
 		netdev_err(vsi->netdev, "%sabling VLAN pruning on VSI handle: %d, VSI HW ID: %d failed, err = %d, aq_err = %d\n",
-			   ena ? "Ena" : "Dis", vsi->idx, vsi->vsi_num, status,
+			   ena ? "En" : "Dis", vsi->idx, vsi->vsi_num, status,
 			   vsi->back->hw.adminq.sq_last_status);
 		goto err_out;
 	}
-- 
GitLab


From 25525b69bb44a628841492f44a5a8e74f34724f4 Mon Sep 17 00:00:00 2001
From: Dave Ertman <david.m.ertman@intel.com>
Date: Fri, 26 Oct 2018 10:40:57 -0700
Subject: [PATCH 0731/1890] ice: Fix napi delete calls for remove

In the remove path, the vsi->netdev is being set to NULL before the call
to free vectors. This is causing the netif_napi_del call to never be made.

Add a call to ice_napi_del to the same location as the calls to
unregister_netdev and just prior to them. This will use the reverse flow
as the register and netif_napi_add calls.

Signed-off-by: Dave Ertman <david.m.ertman@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice.h      | 1 +
 drivers/net/ethernet/intel/ice/ice_lib.c  | 1 +
 drivers/net/ethernet/intel/ice/ice_main.c | 2 +-
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 1639e955f1580..b8548370f1c72 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -370,5 +370,6 @@ int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
 int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
 void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size);
 void ice_print_link_msg(struct ice_vsi *vsi, bool isup);
+void ice_napi_del(struct ice_vsi *vsi);
 
 #endif /* _ICE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index c604a44c8cfbd..1041fa2a77678 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -2458,6 +2458,7 @@ int ice_vsi_release(struct ice_vsi *vsi)
 	 * on this wq
 	 */
 	if (vsi->netdev && !ice_is_reset_in_progress(pf->state)) {
+		ice_napi_del(vsi);
 		unregister_netdev(vsi->netdev);
 		free_netdev(vsi->netdev);
 		vsi->netdev = NULL;
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 338abb1b92338..82f49dbd762c6 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -1465,7 +1465,7 @@ static int ice_req_irq_msix_misc(struct ice_pf *pf)
  * ice_napi_del - Remove NAPI handler for the VSI
  * @vsi: VSI for which NAPI handler is to be removed
  */
-static void ice_napi_del(struct ice_vsi *vsi)
+void ice_napi_del(struct ice_vsi *vsi)
 {
 	int v_idx;
 
-- 
GitLab


From c585ea42ec75e8d3afa278b7095d9f0dd6ee515b Mon Sep 17 00:00:00 2001
From: Brett Creeley <brett.creeley@intel.com>
Date: Fri, 26 Oct 2018 10:40:58 -0700
Subject: [PATCH 0732/1890] ice: Fix tx_timeout in PF driver

Prior to this commit the driver was running into tx_timeouts when a
queue was stressed enough. This was happening because the HW tail
and SW tail (NTU) were incorrectly out of sync. Consequently this was
causing the HW head to collide with the HW tail, which to the hardware
means that all descriptors posted for Tx have been processed.

Due to the Tx logic used in the driver SW tail and HW tail are allowed
to be out of sync. This is done as an optimization because it allows the
driver to write HW tail as infrequently as possible, while still
updating the SW tail index to keep track. However, there are situations
where this results in the tail never getting updated, resulting in Tx
timeouts.

Tx HW tail write condition:
	if (netif_xmit_stopped(txring_txq(tx_ring) || !skb->xmit_more)
		writel(sw_tail, tx_ring->tail);

An issue was found in the Tx logic that was causing the afore mentioned
condition for updating HW tail to never happen, causing tx_timeouts.

In ice_xmit_frame_ring we calculate how many descriptors we need for the
Tx transaction based on the skb the kernel hands us. This is then passed
into ice_maybe_stop_tx along with some extra padding to determine if we
have enough descriptors available for this transaction. If we don't then
we return -EBUSY to the stack, otherwise we move on and eventually
prepare the Tx descriptors accordingly in ice_tx_map and set
next_to_watch. In ice_tx_map we make another call to ice_maybe_stop_tx
with a value of MAX_SKB_FRAGS + 4. The key here is that this value is
possibly less than the value we sent in the first call to
ice_maybe_stop_tx in ice_xmit_frame_ring. Now, if the number of unused
descriptors is between MAX_SKB_FRAGS + 4 and the value used in the first
call to ice_maybe_stop_tx in ice_xmit_frame_ring then we do not update
the HW tail because of the "Tx HW tail write condition" above. This is
because in ice_maybe_stop_tx we return success from ice_maybe_stop_tx
instead of calling __ice_maybe_stop_tx and subsequently calling
netif_stop_subqueue, which sets the __QUEUE_STATE_DEV_XOFF bit. This
bit is then checked in the "Tx HW tail write condition" by calling
netif_xmit_stopped and subsequently updating HW tail if the
afore mentioned bit is set.

In ice_clean_tx_irq, if next_to_watch is not NULL, we end up cleaning
the descriptors that HW sets the DD bit on and we have the budget. The
HW head will eventually run into the HW tail in response to the
description in the paragraph above.

The next time through ice_xmit_frame_ring we make the initial call to
ice_maybe_stop_tx with another skb from the stack. This time we do not
have enough descriptors available and we return NETDEV_TX_BUSY to the
stack and end up setting next_to_watch to NULL.

This is where we are stuck. In ice_clean_tx_irq we never clean anything
because next_to_watch is always NULL and in ice_xmit_frame_ring we never
update HW tail because we already return NETDEV_TX_BUSY to the stack and
eventually we hit a tx_timeout.

This issue was fixed by making sure that the second call to
ice_maybe_stop_tx in ice_tx_map is passed a value that is >= the value
that was used on the initial call to ice_maybe_stop_tx in
ice_xmit_frame_ring. This was done by adding the following defines to
make the logic more clear and to reduce the chance of mucking this up
again:

ICE_CACHE_LINE_BYTES		64
ICE_DESCS_PER_CACHE_LINE	(ICE_CACHE_LINE_BYTES / \
				 sizeof(struct ice_tx_desc))
ICE_DESCS_FOR_CTX_DESC		1
ICE_DESCS_FOR_SKB_DATA_PTR	1

The ICE_CACHE_LINE_BYTES being 64 is an assumption being made so we
don't have to figure this out on every pass through the Tx path. Instead
I added a sanity check in ice_probe to verify cache line size and print
a message if it's not 64 Bytes. This will make it easier to file issues
if they are seen when the cache line size is not 64 Bytes when reading
from the GLPCI_CNF2 register.

Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 .../net/ethernet/intel/ice/ice_hw_autogen.h    |  2 ++
 drivers/net/ethernet/intel/ice/ice_main.c      | 18 ++++++++++++++++++
 drivers/net/ethernet/intel/ice/ice_txrx.c      |  9 +++++----
 drivers/net/ethernet/intel/ice/ice_txrx.h      | 17 +++++++++++++++--
 4 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
index 5fdea6ec7675b..596b9fb1c510d 100644
--- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -242,6 +242,8 @@
 #define GLNVM_ULD				0x000B6008
 #define GLNVM_ULD_CORER_DONE_M			BIT(3)
 #define GLNVM_ULD_GLOBR_DONE_M			BIT(4)
+#define GLPCI_CNF2				0x000BE004
+#define GLPCI_CNF2_CACHELINE_SIZE_M		BIT(1)
 #define PF_FUNC_RID				0x0009E880
 #define PF_FUNC_RID_FUNC_NUM_S			0
 #define PF_FUNC_RID_FUNC_NUM_M			ICE_M(0x7, 0)
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 82f49dbd762c6..333312a1d5957 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -1994,6 +1994,22 @@ static int ice_init_interrupt_scheme(struct ice_pf *pf)
 	return 0;
 }
 
+/**
+ * ice_verify_cacheline_size - verify driver's assumption of 64 Byte cache lines
+ * @pf: pointer to the PF structure
+ *
+ * There is no error returned here because the driver should be able to handle
+ * 128 Byte cache lines, so we only print a warning in case issues are seen,
+ * specifically with Tx.
+ */
+static void ice_verify_cacheline_size(struct ice_pf *pf)
+{
+	if (rd32(&pf->hw, GLPCI_CNF2) & GLPCI_CNF2_CACHELINE_SIZE_M)
+		dev_warn(&pf->pdev->dev,
+			 "%d Byte cache line assumption is invalid, driver may have Tx timeouts!\n",
+			 ICE_CACHE_LINE_BYTES);
+}
+
 /**
  * ice_probe - Device initialization routine
  * @pdev: PCI device information struct
@@ -2144,6 +2160,8 @@ static int ice_probe(struct pci_dev *pdev,
 	/* since everything is good, start the service timer */
 	mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period));
 
+	ice_verify_cacheline_size(pf);
+
 	return 0;
 
 err_alloc_sw_unroll:
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 5dae968d853e1..3387c67c848de 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -1556,15 +1556,15 @@ int ice_tso(struct ice_tx_buf *first, struct ice_tx_offload_params *off)
  * magnitude greater than our largest possible GSO size.
  *
  * This would then be implemented as:
- *     return (((size >> 12) * 85) >> 8) + 1;
+ *     return (((size >> 12) * 85) >> 8) + ICE_DESCS_FOR_SKB_DATA_PTR;
  *
  * Since multiplication and division are commutative, we can reorder
  * operations into:
- *     return ((size * 85) >> 20) + 1;
+ *     return ((size * 85) >> 20) + ICE_DESCS_FOR_SKB_DATA_PTR;
  */
 static unsigned int ice_txd_use_count(unsigned int size)
 {
-	return ((size * 85) >> 20) + 1;
+	return ((size * 85) >> 20) + ICE_DESCS_FOR_SKB_DATA_PTR;
 }
 
 /**
@@ -1706,7 +1706,8 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_ring *tx_ring)
 	 *       + 1 desc for context descriptor,
 	 * otherwise try next time
 	 */
-	if (ice_maybe_stop_tx(tx_ring, count + 4 + 1)) {
+	if (ice_maybe_stop_tx(tx_ring, count + ICE_DESCS_PER_CACHE_LINE +
+			      ICE_DESCS_FOR_CTX_DESC)) {
 		tx_ring->tx_stats.tx_busy++;
 		return NETDEV_TX_BUSY;
 	}
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index 1d0f58bd389bd..75d0eaf6c9ddb 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -22,8 +22,21 @@
 #define ICE_RX_BUF_WRITE	16	/* Must be power of 2 */
 #define ICE_MAX_TXQ_PER_TXQG	128
 
-/* Tx Descriptors needed, worst case */
-#define DESC_NEEDED (MAX_SKB_FRAGS + 4)
+/* We are assuming that the cache line is always 64 Bytes here for ice.
+ * In order to make sure that is a correct assumption there is a check in probe
+ * to print a warning if the read from GLPCI_CNF2 tells us that the cache line
+ * size is 128 bytes. We do it this way because we do not want to read the
+ * GLPCI_CNF2 register or a variable containing the value on every pass through
+ * the Tx path.
+ */
+#define ICE_CACHE_LINE_BYTES		64
+#define ICE_DESCS_PER_CACHE_LINE	(ICE_CACHE_LINE_BYTES / \
+					 sizeof(struct ice_tx_desc))
+#define ICE_DESCS_FOR_CTX_DESC		1
+#define ICE_DESCS_FOR_SKB_DATA_PTR	1
+/* Tx descriptors needed, worst case */
+#define DESC_NEEDED (MAX_SKB_FRAGS + ICE_DESCS_FOR_CTX_DESC + \
+		     ICE_DESCS_PER_CACHE_LINE + ICE_DESCS_FOR_SKB_DATA_PTR)
 #define ICE_DESC_UNUSED(R)	\
 	((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \
 	(R)->next_to_clean - (R)->next_to_use - 1)
-- 
GitLab


From d944b46992f8e99b6bdc721e44b02e5ca294fa2b Mon Sep 17 00:00:00 2001
From: Brett Creeley <brett.creeley@intel.com>
Date: Fri, 26 Oct 2018 10:40:59 -0700
Subject: [PATCH 0733/1890] ice: Fix the bytecount sent to netdev_tx_sent_queue

Currently if the driver does a TSO offload the bytecount sent to
netdev_tx_sent_queue will be incorrect. This is because in ice_tso we
overwrite the initial value that we set in ice_tx_map. This creates a
mismatch between the Tx and Tx clean flow. In the Tx clean flow we
calculate the bytecount (called total_bytes) as we clean the
descriptors so the value used in the Tx clean path is correct. Fix this
by using += in ice_tso instead of =. This fixes the mismatch in
bytecount mentioned above.

Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_txrx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 3387c67c848de..fe5bbabbb41ea 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -1520,7 +1520,7 @@ int ice_tso(struct ice_tx_buf *first, struct ice_tx_offload_params *off)
 
 	/* update gso_segs and bytecount */
 	first->gso_segs = skb_shinfo(skb)->gso_segs;
-	first->bytecount = (first->gso_segs - 1) * off->header_len;
+	first->bytecount += (first->gso_segs - 1) * off->header_len;
 
 	cd_tso_len = skb->len - off->header_len;
 	cd_mss = skb_shinfo(skb)->gso_size;
-- 
GitLab


From 4c9b658eeaefedd402a59e858d8ac3bfdf6153e3 Mon Sep 17 00:00:00 2001
From: Miroslav Lichvar <mlichvar@redhat.com>
Date: Fri, 26 Oct 2018 19:13:00 +0200
Subject: [PATCH 0734/1890] igb: shorten maximum PHC timecounter update
 interval

The timecounter needs to be updated at least once per ~550 seconds in
order to avoid a 40-bit SYSTIM timestamp to be misinterpreted as an old
timestamp.

Since commit 500462a9de65 ("timers: Switch to a non-cascading wheel"),
scheduling of delayed work seems to be less accurate and a requested
delay of 540 seconds may actually be longer than 550 seconds. Also, the
PHC may be adjusted to run up to 6% faster than real time and the system
clock up to 10% slower. Shorten the delay to 360 seconds to be sure the
timecounter is updated in time.

This fixes an issue with HW timestamps on 82580/I350/I354 being off by
~1100 seconds for few seconds every ~9 minutes.

Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Miroslav Lichvar <mlichvar@redhat.com>
Acked-by: Jacob Keller <jacob.e.keller@intel.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/igb_ptp.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
index 29ced6b74d364..2b95dc9c7a6a8 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -53,13 +53,15 @@
  *   2^40 * 10^-9 /  60  = 18.3 minutes.
  *
  * SYSTIM is converted to real time using a timecounter. As
- * timecounter_cyc2time() allows old timestamps, the timecounter
- * needs to be updated at least once per half of the SYSTIM interval.
- * Scheduling of delayed work is not very accurate, so we aim for 8
- * minutes to be sure the actual interval is shorter than 9.16 minutes.
+ * timecounter_cyc2time() allows old timestamps, the timecounter needs
+ * to be updated at least once per half of the SYSTIM interval.
+ * Scheduling of delayed work is not very accurate, and also the NIC
+ * clock can be adjusted to run up to 6% faster and the system clock
+ * up to 10% slower, so we aim for 6 minutes to be sure the actual
+ * interval in the NIC time is shorter than 9.16 minutes.
  */
 
-#define IGB_SYSTIM_OVERFLOW_PERIOD	(HZ * 60 * 8)
+#define IGB_SYSTIM_OVERFLOW_PERIOD	(HZ * 60 * 6)
 #define IGB_PTP_TX_TIMEOUT		(HZ * 15)
 #define INCPERIOD_82576			BIT(E1000_TIMINCA_16NS_SHIFT)
 #define INCVALUE_82576_MASK		GENMASK(E1000_TIMINCA_16NS_SHIFT - 1, 0)
-- 
GitLab


From 81bd415c91eb966118d773dddf254aebf3022411 Mon Sep 17 00:00:00 2001
From: Mathieu Malaterre <malat@debian.org>
Date: Wed, 6 Jun 2018 21:42:32 +0200
Subject: [PATCH 0735/1890] watchdog/core: Add missing prototypes for weak
 functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The split out of the hard lockup detector exposed two new weak functions,
but no prototypes for them, which triggers the build warning:

  kernel/watchdog.c:109:12: warning: no previous prototype for â€˜watchdog_nmi_enableâ€™ [-Wmissing-prototypes]
  kernel/watchdog.c:115:13: warning: no previous prototype for â€˜watchdog_nmi_disableâ€™ [-Wmissing-prototypes]

Add the prototypes.

Fixes: 73ce0511c436 ("kernel/watchdog.c: move hardlockup detector to separate file")
Signed-off-by: Mathieu Malaterre <malat@debian.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Babu Moger <babu.moger@oracle.com>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20180606194232.17653-1-malat@debian.org
---
 include/linux/nmi.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 08f9247e9827e..9003e29cde461 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -119,6 +119,8 @@ static inline int hardlockup_detector_perf_init(void) { return 0; }
 void watchdog_nmi_stop(void);
 void watchdog_nmi_start(void);
 int watchdog_nmi_probe(void);
+int watchdog_nmi_enable(unsigned int cpu);
+void watchdog_nmi_disable(unsigned int cpu);
 
 /**
  * touch_nmi_watchdog - restart NMI watchdog timeout.
-- 
GitLab


From f348e2241fb73515d65b5d77dd9c174128a7fbf2 Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@virtuozzo.com>
Date: Tue, 6 Nov 2018 16:16:01 -0500
Subject: [PATCH 0736/1890] ext4: fix missing cleanup if
 ext4_alloc_flex_bg_array() fails while resizing

Fixes: 117fff10d7f1 ("ext4: grow the s_flex_groups array as needed ...")
Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org # 3.7
---
 fs/ext4/resize.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 3df326ee6d506..5fee65afd58b8 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -2022,7 +2022,7 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
 
 	err = ext4_alloc_flex_bg_array(sb, n_group + 1);
 	if (err)
-		return err;
+		goto out;
 
 	err = ext4_mb_alloc_groupinfo(sb, n_group + 1);
 	if (err)
-- 
GitLab


From db6aee62406d9fbb53315fcddd81f1dc271d49fa Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@virtuozzo.com>
Date: Tue, 6 Nov 2018 16:20:40 -0500
Subject: [PATCH 0737/1890] ext4: fix possible inode leak in the retry loop of
 ext4_resize_fs()

Fixes: 1c6bd7173d66 ("ext4: convert file system to meta_bg if needed ...")
Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org # 3.7
---
 fs/ext4/resize.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 5fee65afd58b8..85158e9de7c23 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -2058,6 +2058,10 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
 		n_blocks_count_retry = 0;
 		free_flex_gd(flex_gd);
 		flex_gd = NULL;
+		if (resize_inode) {
+			iput(resize_inode);
+			resize_inode = NULL;
+		}
 		goto retry;
 	}
 
-- 
GitLab


From 63088da9472854408ae5d7c47bd011daf9e1a81b Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Tue, 6 Nov 2018 16:06:32 -0500
Subject: [PATCH 0738/1890] Revert "drm/amd/display: set backlight level limit
 to 1"

This reverts commit 0cafc82fae41531b0162150f9a97f2c74f97118f.

This breaks some apps that assume 0 is minimum brightness.

Revert for 4.20.  This is fixed properly for drm-next/4.21 in:
"drm/amd: Don't fail on backlight = 0"
However, that patch depends on more extensive changes to the
backlight interface which are too invasive for -fixes.

Fixes: Bugzilla: https://bugs.freedesktop.org/108668
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index b0df6dc9a775f..e224f23e22155 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -1524,13 +1524,6 @@ static int amdgpu_dm_backlight_update_status(struct backlight_device *bd)
 {
 	struct amdgpu_display_manager *dm = bl_get_data(bd);
 
-	/*
-	 * PWM interperts 0 as 100% rather than 0% because of HW
-	 * limitation for level 0.So limiting minimum brightness level
-	 * to 1.
-	 */
-	if (bd->props.brightness < 1)
-		return 1;
 	if (dc_link_set_backlight_level(dm->backlight_link,
 			bd->props.brightness, 0, 0))
 		return 0;
-- 
GitLab


From 8ed4ec32d5b1f04a641978322a38a8d7089553bb Mon Sep 17 00:00:00 2001
From: Shaokun Zhang <zhangshaokun@hisilicon.com>
Date: Mon, 5 Nov 2018 18:33:35 +0800
Subject: [PATCH 0739/1890] drm/amd/display: Fix misleading buffer information

RETIMER_REDRIVER_INFO shows the buffer as a decimal value with a '0x'
prefix, which is somewhat misleading.

Fix it to print hexadecimal, as was intended.

Fixes: 2f14bc89("drm/amd/display: add retimer log for HWQ tuning use.")
Cc: Charlene Liu <charlene.liu@amd.com>
Cc: Dmytro Laktyushkin <Dmytro.Laktyushkin@amd.com>
Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
Reviewed-by: Leo Li <sunpeng.li@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/core/dc_link.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index fb04a4ad141fd..5da2186b3615f 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -1722,7 +1722,7 @@ static void write_i2c_retimer_setting(
 		i2c_success = i2c_write(pipe_ctx, slave_address,
 				buffer, sizeof(buffer));
 		RETIMER_REDRIVER_INFO("retimer write to slave_address = 0x%x,\
-			offset = 0x%d, reg_val = 0x%d, i2c_success = %d\n",
+			offset = 0x%x, reg_val = 0x%x, i2c_success = %d\n",
 			slave_address, buffer[0], buffer[1], i2c_success?1:0);
 		if (!i2c_success)
 			/* Write failure */
@@ -1734,7 +1734,7 @@ static void write_i2c_retimer_setting(
 		i2c_success = i2c_write(pipe_ctx, slave_address,
 				buffer, sizeof(buffer));
 		RETIMER_REDRIVER_INFO("retimer write to slave_address = 0x%x,\
-			offset = 0x%d, reg_val = 0x%d, i2c_success = %d\n",
+			offset = 0x%x, reg_val = 0x%x, i2c_success = %d\n",
 			slave_address, buffer[0], buffer[1], i2c_success?1:0);
 		if (!i2c_success)
 			/* Write failure */
-- 
GitLab


From 02680efbb10be0d2c867fe722ae23d588f6bebef Mon Sep 17 00:00:00 2001
From: Harry Wentland <harry.wentland@amd.com>
Date: Sun, 7 Oct 2018 10:01:23 -0400
Subject: [PATCH 0740/1890] drm/amd/display: Stop leaking planes

[Why]
drm_plane_cleanup does not free the plane.

[How]
Call drm_primary_helper_destroy which will also free the plane.

Signed-off-by: Harry Wentland <harry.wentland@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index e224f23e22155..289ddd52cac48 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -3301,7 +3301,7 @@ void dm_drm_plane_destroy_state(struct drm_plane *plane,
 static const struct drm_plane_funcs dm_plane_funcs = {
 	.update_plane	= drm_atomic_helper_update_plane,
 	.disable_plane	= drm_atomic_helper_disable_plane,
-	.destroy	= drm_plane_cleanup,
+	.destroy	= drm_primary_helper_destroy,
 	.reset = dm_drm_plane_reset,
 	.atomic_duplicate_state = dm_drm_plane_duplicate_state,
 	.atomic_destroy_state = dm_drm_plane_destroy_state,
-- 
GitLab


From 3426d66d3e74ab0ab264a85e0795a17e3dde1e71 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Tue, 6 Nov 2018 11:19:00 -0500
Subject: [PATCH 0741/1890] drm/amdgpu/vega20: add CLK base offset

In case we need to access CLK registers.

Acked-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
index 2d4473557b0d2..d13fc4fcb5179 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
@@ -49,6 +49,7 @@ int vega20_reg_base_init(struct amdgpu_device *adev)
 		adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i]));
 		adev->reg_offset[NBIF_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i]));
 		adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i]));
+		adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i]));
 	}
 	return 0;
 }
-- 
GitLab


From 689e7b34234e29e5168894b27b752a4e16ef08c4 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Fri, 2 Nov 2018 10:51:50 -0500
Subject: [PATCH 0742/1890] drm/amdgpu/display: check if fbc is available in
 set_static_screen_control (v2)

The value is dependent on whether fbc is available.

v2: only check if num_pipes is valid

Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c    | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
index b75ede5f84f76..b459867a05b20 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
@@ -1736,7 +1736,12 @@ static void set_static_screen_control(struct pipe_ctx **pipe_ctx,
 	if (events->force_trigger)
 		value |= 0x1;
 
-	value |= 0x84;
+	if (num_pipes) {
+		struct dc *dc = pipe_ctx[0]->stream->ctx->dc;
+
+		if (dc->fbc_compressor)
+			value |= 0x84;
+	}
 
 	for (i = 0; i < num_pipes; i++)
 		pipe_ctx[i]->stream_res.tg->funcs->
-- 
GitLab


From 7875a22625aa7f11befba84bd1e669201032947d Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Tue, 6 Nov 2018 14:44:29 -0500
Subject: [PATCH 0743/1890] drm/amdgpu: add DC feature mask module parameter

Similar to ppfeaturemask.  Allows you to selectively enable/disable
DC features.

Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h      |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c  | 11 +++++++++++
 drivers/gpu/drm/amd/include/amd_shared.h |  4 ++++
 3 files changed, 16 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index d0102cfc8efbd..104b2e0d893bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -151,6 +151,7 @@ extern int amdgpu_compute_multipipe;
 extern int amdgpu_gpu_recovery;
 extern int amdgpu_emu_mode;
 extern uint amdgpu_smu_memory_pool_size;
+extern uint amdgpu_dc_feature_mask;
 extern struct amdgpu_mgpu_info mgpu_info;
 
 #ifdef CONFIG_DRM_AMDGPU_SI
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 943dbf3c5da12..8de55f7f1a3a3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -127,6 +127,9 @@ int amdgpu_compute_multipipe = -1;
 int amdgpu_gpu_recovery = -1; /* auto */
 int amdgpu_emu_mode = 0;
 uint amdgpu_smu_memory_pool_size = 0;
+/* FBC (bit 0) disabled by default*/
+uint amdgpu_dc_feature_mask = 0;
+
 struct amdgpu_mgpu_info mgpu_info = {
 	.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
 };
@@ -631,6 +634,14 @@ module_param(halt_if_hws_hang, int, 0644);
 MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)");
 #endif
 
+/**
+ * DOC: dcfeaturemask (uint)
+ * Override display features enabled. See enum DC_FEATURE_MASK in drivers/gpu/drm/amd/include/amd_shared.h.
+ * The default is the current set of stable display features.
+ */
+MODULE_PARM_DESC(dcfeaturemask, "all stable DC features enabled (default))");
+module_param_named(dcfeaturemask, amdgpu_dc_feature_mask, uint, 0444);
+
 static const struct pci_device_id pciidlist[] = {
 #ifdef  CONFIG_DRM_AMDGPU_SI
 	{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
index 2083c308007cd..470d7b89071a4 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -133,6 +133,10 @@ enum PP_FEATURE_MASK {
 	PP_AVFS_MASK = 0x40000,
 };
 
+enum DC_FEATURE_MASK {
+	DC_FBC_MASK = 0x1,
+};
+
 /**
  * struct amd_ip_funcs - general hooks for managing amdgpu IP Blocks
  */
-- 
GitLab


From ce2127c462d9d1c0832f088b23158420e87e71a0 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Tue, 6 Nov 2018 15:10:37 -0500
Subject: [PATCH 0744/1890] drm/amdgpu/display/dc: add FBC to dc_config

Add FBC to the list of features that can be enabled from the DM.

Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dc.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index 199527171100b..b57fa61b3034a 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -169,6 +169,7 @@ struct link_training_settings;
 struct dc_config {
 	bool gpu_vm_support;
 	bool disable_disp_pll_sharing;
+	bool fbc_support;
 };
 
 enum visual_confirm {
-- 
GitLab


From 04b94af4e348acc52546b9b19c933575f26589a1 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Tue, 6 Nov 2018 15:19:49 -0500
Subject: [PATCH 0745/1890] drm/amdgpu/display/dm: handle FBC dc feature
 parameter

Set the dc_config properly when the option is enabled.

Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 289ddd52cac48..d3f5cb1795bf8 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -429,6 +429,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 	    adev->asic_type < CHIP_RAVEN)
 		init_data.flags.gpu_vm_support = true;
 
+	if (amdgpu_dc_feature_mask & DC_FBC_MASK)
+		init_data.flags.fbc_support = true;
+
 	/* Display Core create. */
 	adev->dm.dc = dc_create(&init_data);
 
-- 
GitLab


From 5822e9539dc11721b53d74accd4c091b982011d0 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Fri, 2 Nov 2018 10:54:27 -0500
Subject: [PATCH 0746/1890] drm/amdgpu/display/dce11: only enable FBC when
 selected

Causes a black screen on a Stoney laptop.

Bug: https://bugs.freedesktop.org/show_bug.cgi?id=108577
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
index e3624ca245748..7c9fd9052ee23 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c
@@ -1362,7 +1362,8 @@ static bool construct(
 		pool->base.sw_i2cs[i] = NULL;
 	}
 
-	dc->fbc_compressor = dce110_compressor_create(ctx);
+	if (dc->config.fbc_support)
+		dc->fbc_compressor = dce110_compressor_create(ctx);
 
 	if (!underlay_create(ctx, &pool->base))
 		goto res_create_fail;
-- 
GitLab


From a6758309a005060b8297a538a457c88699cb2520 Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@virtuozzo.com>
Date: Tue, 6 Nov 2018 16:49:50 -0500
Subject: [PATCH 0747/1890] ext4: avoid buffer leak on shutdown in
 ext4_mark_iloc_dirty()

ext4_mark_iloc_dirty() callers expect that it releases iloc->bh
even if it returns an error.

Fixes: 0db1ff222d40 ("ext4: add shutdown bit and check for it")
Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org # 4.11
---
 fs/ext4/inode.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index c3d9a42c561ef..55c8fca76daff 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5835,9 +5835,10 @@ int ext4_mark_iloc_dirty(handle_t *handle,
 {
 	int err = 0;
 
-	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) {
+		put_bh(iloc->bh);
 		return -EIO;
-
+	}
 	if (IS_I_VERSION(inode))
 		inode_inc_iversion(inode);
 
-- 
GitLab


From feaf264ce7f8d54582e2f66eb82dd9dd124c94f3 Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@virtuozzo.com>
Date: Tue, 6 Nov 2018 17:01:36 -0500
Subject: [PATCH 0748/1890] ext4: avoid buffer leak in ext4_orphan_add() after
 prior errors

Fixes: d745a8c20c1f ("ext4: reduce contention on s_orphan_lock")
Fixes: 6e3617e579e0 ("ext4: Handle non empty on-disk orphan link")
Cc: Dmitry Monakhov <dmonakhov@gmail.com>
Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org # 2.6.34
---
 fs/ext4/namei.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 67a38532032ae..d388cce72db20 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2811,7 +2811,9 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
 			list_del_init(&EXT4_I(inode)->i_orphan);
 			mutex_unlock(&sbi->s_orphan_lock);
 		}
-	}
+	} else
+		brelse(iloc.bh);
+
 	jbd_debug(4, "superblock will point to %lu\n", inode->i_ino);
 	jbd_debug(4, "orphan inode %lu will point to %d\n",
 			inode->i_ino, NEXT_ORPHAN(inode));
-- 
GitLab


From 4f32c38b4662312dd3c5f113d8bdd459887fb773 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Tue, 6 Nov 2018 17:18:17 -0500
Subject: [PATCH 0749/1890] ext4: avoid possible double brelse() in
 add_new_gdb() on error path

Fixes: b40971426a83 ("ext4: add error checking to calls to ...")
Reported-by: Vasily Averin <vvs@virtuozzo.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org # 2.6.38
---
 fs/ext4/resize.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 85158e9de7c23..a5efee34415fe 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -871,6 +871,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 	err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh);
 	if (unlikely(err)) {
 		ext4_std_error(sb, err);
+		iloc.bh = NULL;
 		goto exit_inode;
 	}
 	brelse(dind);
-- 
GitLab


From 1bfc204dc0e7a690ab8440e91bb7d1a324320fdc Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@virtuozzo.com>
Date: Tue, 6 Nov 2018 17:45:02 -0500
Subject: [PATCH 0750/1890] ext4: remove unneeded brelse call in
 ext4_xattr_inode_update_ref()

Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/xattr.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index f36fc5d5b2574..dc1aeab06dbad 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1031,10 +1031,8 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
 	inode_lock(ea_inode);
 
 	ret = ext4_reserve_inode_write(handle, ea_inode, &iloc);
-	if (ret) {
-		iloc.bh = NULL;
+	if (ret)
 		goto out;
-	}
 
 	ref_count = ext4_xattr_inode_get_ref(ea_inode);
 	ref_count += ref_change;
@@ -1080,12 +1078,10 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
 	}
 
 	ret = ext4_mark_iloc_dirty(handle, ea_inode, &iloc);
-	iloc.bh = NULL;
 	if (ret)
 		ext4_warning_inode(ea_inode,
 				   "ext4_mark_iloc_dirty() failed ret=%d", ret);
 out:
-	brelse(iloc.bh);
 	inode_unlock(ea_inode);
 	return ret;
 }
-- 
GitLab


From aca49ee041cbdd329c55d4dbcd6b3d4b9af240e4 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Mon, 5 Nov 2018 22:49:47 -0500
Subject: [PATCH 0751/1890] Revert "scsi: ufs: Disable blk-mq for now"

This reverts commit d87161bea405e3260377026ca8a704a3f68bd67a.

The issues that forced us to disable blk_mq for ufs have been resolved.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/ufs/ufshcd.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 23d7cca36ff03..27db55b0ca7f8 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -8099,13 +8099,6 @@ int ufshcd_alloc_host(struct device *dev, struct ufs_hba **hba_handle)
 		err = -ENOMEM;
 		goto out_error;
 	}
-
-	/*
-	 * Do not use blk-mq at this time because blk-mq does not support
-	 * runtime pm.
-	 */
-	host->use_blk_mq = false;
-
 	hba = shost_priv(host);
 	hba->host = host;
 	hba->dev = dev;
-- 
GitLab


From f635e48e866ee1a47d2d42ce012fdcc07bf55853 Mon Sep 17 00:00:00 2001
From: Quinn Tran <quinn.tran@cavium.com>
Date: Tue, 6 Nov 2018 00:51:21 -0800
Subject: [PATCH 0752/1890] scsi: qla2xxx: Initialize port speed to avoid
 setting lower speed

This patch initializes port speed so that firmware does not set lower
operating speed. Setting lower speed in firmware impacts WRITE perfomance.

Fixes: 726b85487067 ("qla2xxx: Add framework for async fabric discovery")
Cc: <stable@vger.kernel.org>
Signed-off-by: Quinn Tran <quinn.tran@cavium.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Tested-by: Laurence Oberman <loberman@redhat.com>
Reviewed-by: Ewan D. Milne <emilne@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qla2xxx/qla_init.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 6fe20c27acc16..eb59c796a795d 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -4763,6 +4763,7 @@ qla2x00_alloc_fcport(scsi_qla_host_t *vha, gfp_t flags)
 	fcport->loop_id = FC_NO_LOOP_ID;
 	qla2x00_set_fcport_state(fcport, FCS_UNCONFIGURED);
 	fcport->supported_classes = FC_COS_UNSPECIFIED;
+	fcport->fp_speed = PORT_SPEED_UNKNOWN;
 
 	fcport->ct_desc.ct_sns = dma_alloc_coherent(&vha->hw->pdev->dev,
 		sizeof(struct ct_sns_pkt), &fcport->ct_desc.ct_sns_dma,
-- 
GitLab


From 99b77fef3c6c69bb7664f1ac97a69d5b17968dae Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Wed, 31 Oct 2018 12:20:28 +0200
Subject: [PATCH 0753/1890] net/mlx5: Fix XRC SRQ umem valid bits

Adapt XRC SRQ to the latest HW specification with fixed definition
around umem valid bits. The previous definition relied on a bit which
was taken for other purposes in legacy FW.

Fixes: bd37197554eb ("net/mlx5: Update mlx5_ifc with DEVX UID bits")
Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Reviewed-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index dbff9ff28f2c4..34e17e6f89429 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -2473,14 +2473,15 @@ struct mlx5_ifc_xrc_srqc_bits {
 
 	u8         wq_signature[0x1];
 	u8         cont_srq[0x1];
-	u8         dbr_umem_valid[0x1];
+	u8         reserved_at_22[0x1];
 	u8         rlky[0x1];
 	u8         basic_cyclic_rcv_wqe[0x1];
 	u8         log_rq_stride[0x3];
 	u8         xrcd[0x18];
 
 	u8         page_offset[0x6];
-	u8         reserved_at_46[0x2];
+	u8         reserved_at_46[0x1];
+	u8         dbr_umem_valid[0x1];
 	u8         cqn[0x18];
 
 	u8         reserved_at_60[0x20];
@@ -6689,9 +6690,12 @@ struct mlx5_ifc_create_xrc_srq_in_bits {
 
 	struct mlx5_ifc_xrc_srqc_bits xrc_srq_context_entry;
 
-	u8         reserved_at_280[0x40];
+	u8         reserved_at_280[0x60];
+
 	u8         xrc_srq_umem_valid[0x1];
-	u8         reserved_at_2c1[0x5bf];
+	u8         reserved_at_2e1[0x1f];
+
+	u8         reserved_at_300[0x580];
 
 	u8         pas[0][0x40];
 };
-- 
GitLab


From 28c5bcf74fa07c25d5bd118d1271920f51ce2a98 Mon Sep 17 00:00:00 2001
From: Scott Wood <oss@buserror.net>
Date: Tue, 6 Nov 2018 19:49:34 -0600
Subject: [PATCH 0754/1890] KVM: PPC: Move and undef TRACE_INCLUDE_PATH/FILE

TRACE_INCLUDE_PATH and TRACE_INCLUDE_FILE are used by
<trace/define_trace.h>, so like that #include, they should
be outside #ifdef protection.

They also need to be #undefed before defining, in case multiple trace
headers are included by the same C file.  This became the case on
book3e after commit cf4a6085151a ("powerpc/mm: Add missing tracepoint for
tlbie"), leading to the following build error:

   CC      arch/powerpc/kvm/powerpc.o
In file included from arch/powerpc/kvm/powerpc.c:51:0:
arch/powerpc/kvm/trace.h:9:0: error: "TRACE_INCLUDE_PATH" redefined
[-Werror]
  #define TRACE_INCLUDE_PATH .
  ^
In file included from arch/powerpc/kvm/../mm/mmu_decl.h:25:0,
                  from arch/powerpc/kvm/powerpc.c:48:
./arch/powerpc/include/asm/trace.h:224:0: note: this is the location of
the previous definition
  #define TRACE_INCLUDE_PATH asm
  ^
cc1: all warnings being treated as errors

Reported-by: Christian Zigotzky <chzigotzky@xenosoft.de>
Signed-off-by: Scott Wood <oss@buserror.net>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kvm/trace.h       | 8 ++++++--
 arch/powerpc/kvm/trace_booke.h | 9 +++++++--
 arch/powerpc/kvm/trace_hv.h    | 9 +++++++--
 arch/powerpc/kvm/trace_pr.h    | 9 +++++++--
 4 files changed, 27 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h
index 491b0f715d6bc..ea1d7c8083190 100644
--- a/arch/powerpc/kvm/trace.h
+++ b/arch/powerpc/kvm/trace.h
@@ -6,8 +6,6 @@
 
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM kvm
-#define TRACE_INCLUDE_PATH .
-#define TRACE_INCLUDE_FILE trace
 
 /*
  * Tracepoint for guest mode entry.
@@ -120,4 +118,10 @@ TRACE_EVENT(kvm_check_requests,
 #endif /* _TRACE_KVM_H */
 
 /* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace
+
 #include <trace/define_trace.h>
diff --git a/arch/powerpc/kvm/trace_booke.h b/arch/powerpc/kvm/trace_booke.h
index ac640e81fdc5f..3837842986aa4 100644
--- a/arch/powerpc/kvm/trace_booke.h
+++ b/arch/powerpc/kvm/trace_booke.h
@@ -6,8 +6,6 @@
 
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM kvm_booke
-#define TRACE_INCLUDE_PATH .
-#define TRACE_INCLUDE_FILE trace_booke
 
 #define kvm_trace_symbol_exit \
 	{0, "CRITICAL"}, \
@@ -218,4 +216,11 @@ TRACE_EVENT(kvm_booke_queue_irqprio,
 #endif
 
 /* This part must be outside protection */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_booke
+
 #include <trace/define_trace.h>
diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h
index bcfe8a987f6a9..8a1e3b0047f19 100644
--- a/arch/powerpc/kvm/trace_hv.h
+++ b/arch/powerpc/kvm/trace_hv.h
@@ -9,8 +9,6 @@
 
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM kvm_hv
-#define TRACE_INCLUDE_PATH .
-#define TRACE_INCLUDE_FILE trace_hv
 
 #define kvm_trace_symbol_hcall \
 	{H_REMOVE,			"H_REMOVE"}, \
@@ -497,4 +495,11 @@ TRACE_EVENT(kvmppc_run_vcpu_exit,
 #endif /* _TRACE_KVM_HV_H */
 
 /* This part must be outside protection */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_hv
+
 #include <trace/define_trace.h>
diff --git a/arch/powerpc/kvm/trace_pr.h b/arch/powerpc/kvm/trace_pr.h
index 2f9a8829552b9..46a46d328fbf2 100644
--- a/arch/powerpc/kvm/trace_pr.h
+++ b/arch/powerpc/kvm/trace_pr.h
@@ -8,8 +8,6 @@
 
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM kvm_pr
-#define TRACE_INCLUDE_PATH .
-#define TRACE_INCLUDE_FILE trace_pr
 
 TRACE_EVENT(kvm_book3s_reenter,
 	TP_PROTO(int r, struct kvm_vcpu *vcpu),
@@ -257,4 +255,11 @@ TRACE_EVENT(kvm_exit,
 #endif /* _TRACE_KVM_H */
 
 /* This part must be outside protection */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_pr
+
 #include <trace/define_trace.h>
-- 
GitLab


From 9a1ec4eb6f3722a65a267bb7d14c371d73f66c5b Mon Sep 17 00:00:00 2001
From: "Spencer E. Olson" <olsonse@umich.edu>
Date: Wed, 24 Oct 2018 08:46:59 -0600
Subject: [PATCH 0755/1890] staging: comedi: ni_mio_common: scale ao
 INSN_CONFIG_GET_CMD_TIMING_CONSTRAINTS

Fixes implementation of INSN_CONFIG_GET_CMD_TIMING_CONSTRAINTS for
ni_mio devices.  The previous patch should have used the channel
information passed in to scale the result by the number of channels being
used.

Fixes: 51fd36738383 ("staging: comedi: ni_mio_common: implement INSN_CONFIG_GET_CMD_TIMING_CONSTRAINTS")
Signed-off-by: Spencer E. Olson <olsonse@umich.edu>
Reviewed-by: Ian Abbott <abbotti@mev.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/comedi/drivers/ni_mio_common.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/comedi/drivers/ni_mio_common.c b/drivers/staging/comedi/drivers/ni_mio_common.c
index 2d1e0325d04d1..5edf59ac6706d 100644
--- a/drivers/staging/comedi/drivers/ni_mio_common.c
+++ b/drivers/staging/comedi/drivers/ni_mio_common.c
@@ -2843,7 +2843,8 @@ static int ni_ao_insn_config(struct comedi_device *dev,
 		return ni_ao_arm(dev, s);
 	case INSN_CONFIG_GET_CMD_TIMING_CONSTRAINTS:
 		/* we don't care about actual channels */
-		data[1] = board->ao_speed;
+		/* data[3] : chanlist_len */
+		data[1] = board->ao_speed * data[3];
 		data[2] = 0;
 		return 0;
 	default:
-- 
GitLab


From c3e43d8b958bd6849817393483e805d8638a8ab7 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Wed, 17 Oct 2018 10:15:34 +0200
Subject: [PATCH 0756/1890] staging: rtl8723bs: Fix the return value in case of
 error in 'rtw_wx_read32()'

We return 0 unconditionally in 'rtw_wx_read32()'.
However, 'ret' is set to some error codes in several error handling paths.

Return 'ret' instead to propagate the error code.

Fixes: 554c0a3abf216 ("staging: Add rtl8723bs sdio wifi driver")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8723bs/os_dep/ioctl_linux.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c
index 28bfdbdc6e76a..b8631baf128d6 100644
--- a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c
+++ b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c
@@ -2289,7 +2289,7 @@ static int rtw_wx_read32(struct net_device *dev,
 exit:
 	kfree(ptmp);
 
-	return 0;
+	return ret;
 }
 
 static int rtw_wx_write32(struct net_device *dev,
-- 
GitLab


From b7c56d7bfe834904d9d332e0cfbf929b75976f8a Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 2 Nov 2018 19:01:26 +0000
Subject: [PATCH 0757/1890] drivers: staging: cedrus: find ctx before
 dereferencing it ctx

Currently if count is an invalid value the v4l2_info message will
dereference a null ctx pointer to get the dev information. Fix
this by finding ctx first and then checking for an invalid count,
this way ctxt will be non-null hence avoiding the null pointer
dereference.

Detected by CoverityScan, CID#1475337 ("Explicit null dereferenced")

Fixes: 50e761516f2b ("media: platform: Add Cedrus VPU decoder driver")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Maxime Ripard <maxime.ripard@bootlin.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/media/sunxi/cedrus/cedrus.c | 22 ++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.c b/drivers/staging/media/sunxi/cedrus/cedrus.c
index 82558455384af..699d62dceb6cd 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus.c
+++ b/drivers/staging/media/sunxi/cedrus/cedrus.c
@@ -108,17 +108,6 @@ static int cedrus_request_validate(struct media_request *req)
 	unsigned int count;
 	unsigned int i;
 
-	count = vb2_request_buffer_cnt(req);
-	if (!count) {
-		v4l2_info(&ctx->dev->v4l2_dev,
-			  "No buffer was provided with the request\n");
-		return -ENOENT;
-	} else if (count > 1) {
-		v4l2_info(&ctx->dev->v4l2_dev,
-			  "More than one buffer was provided with the request\n");
-		return -EINVAL;
-	}
-
 	list_for_each_entry(obj, &req->objects, list) {
 		struct vb2_buffer *vb;
 
@@ -133,6 +122,17 @@ static int cedrus_request_validate(struct media_request *req)
 	if (!ctx)
 		return -ENOENT;
 
+	count = vb2_request_buffer_cnt(req);
+	if (!count) {
+		v4l2_info(&ctx->dev->v4l2_dev,
+			  "No buffer was provided with the request\n");
+		return -ENOENT;
+	} else if (count > 1) {
+		v4l2_info(&ctx->dev->v4l2_dev,
+			  "More than one buffer was provided with the request\n");
+		return -EINVAL;
+	}
+
 	parent_hdl = &ctx->hdl;
 
 	hdl = v4l2_ctrl_request_hdl_find(req, parent_hdl);
-- 
GitLab


From 4dc2a3cd27856531cf92762fea2eb9468efe552f Mon Sep 17 00:00:00 2001
From: "Spencer E. Olson" <olsonse@umich.edu>
Date: Wed, 24 Oct 2018 08:33:40 -0600
Subject: [PATCH 0758/1890] staging: comedi: clarify/unify macros for NI
 macro-defined terminals

Uses a single macro to define multiple macros that represent a series of
terminals for NI devices.  This patch also redefines NI_MAX_COUNTERS as the
maximum number of counters possible on NI devices (instead of the maximum
index of the counters).  This was a little confusing and caused a bug in
commit 347e244884c3b ("staging: comedi: tio: implement global tio/ctr routing")
when setting/reading registers for counter terminals.

Fixes: 347e244884c3b ("staging: comedi: tio: implement global tio/ctr routing")
Signed-off-by: Spencer E. Olson <olsonse@umich.edu>
Reviewed-by: Ian Abbott <abbotti@mev.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/comedi/comedi.h | 39 ++++++++++++++++++---------------
 1 file changed, 21 insertions(+), 18 deletions(-)

diff --git a/drivers/staging/comedi/comedi.h b/drivers/staging/comedi/comedi.h
index e90b177752840..09a940066c0ec 100644
--- a/drivers/staging/comedi/comedi.h
+++ b/drivers/staging/comedi/comedi.h
@@ -1005,35 +1005,38 @@ enum i8254_mode {
  * and INSN_DEVICE_CONFIG_GET_ROUTES.
  */
 #define NI_NAMES_BASE	0x8000u
+
+#define _TERM_N(base, n, x)	((base) + ((x) & ((n) - 1)))
+
 /*
  * not necessarily all allowed 64 PFIs are valid--certainly not for all devices
  */
-#define NI_PFI(x)	(NI_NAMES_BASE        + ((x) & 0x3f))
+#define NI_PFI(x)		_TERM_N(NI_NAMES_BASE, 64, x)
 /* 8 trigger lines by standard, Some devices cannot talk to all eight. */
-#define TRIGGER_LINE(x)	(NI_PFI(-1)       + 1 + ((x) & 0x7))
+#define TRIGGER_LINE(x)		_TERM_N(NI_PFI(-1) + 1, 8, x)
 /* 4 RTSI shared MUXes to route signals to/from TRIGGER_LINES on NI hardware */
-#define NI_RTSI_BRD(x)	(TRIGGER_LINE(-1) + 1 + ((x) & 0x3))
+#define NI_RTSI_BRD(x)		_TERM_N(TRIGGER_LINE(-1) + 1, 4, x)
 
 /* *** Counter/timer names : 8 counters max *** */
-#define NI_COUNTER_NAMES_BASE  (NI_RTSI_BRD(-1)  + 1)
-#define NI_MAX_COUNTERS	       7
-#define NI_CtrSource(x)	       (NI_COUNTER_NAMES_BASE + ((x) & NI_MAX_COUNTERS))
+#define NI_MAX_COUNTERS		8
+#define NI_COUNTER_NAMES_BASE	(NI_RTSI_BRD(-1)  + 1)
+#define NI_CtrSource(x)	      _TERM_N(NI_COUNTER_NAMES_BASE, NI_MAX_COUNTERS, x)
 /* Gate, Aux, A,B,Z are all treated, at times as gates */
-#define NI_GATES_NAMES_BASE    (NI_CtrSource(-1) + 1)
-#define NI_CtrGate(x)	       (NI_GATES_NAMES_BASE   + ((x) & NI_MAX_COUNTERS))
-#define NI_CtrAux(x)	       (NI_CtrGate(-1)   + 1  + ((x) & NI_MAX_COUNTERS))
-#define NI_CtrA(x)	       (NI_CtrAux(-1)    + 1  + ((x) & NI_MAX_COUNTERS))
-#define NI_CtrB(x)	       (NI_CtrA(-1)      + 1  + ((x) & NI_MAX_COUNTERS))
-#define NI_CtrZ(x)	       (NI_CtrB(-1)      + 1  + ((x) & NI_MAX_COUNTERS))
-#define NI_GATES_NAMES_MAX     NI_CtrZ(-1)
-#define NI_CtrArmStartTrigger(x) (NI_CtrZ(-1)    + 1  + ((x) & NI_MAX_COUNTERS))
+#define NI_GATES_NAMES_BASE	(NI_CtrSource(-1) + 1)
+#define NI_CtrGate(x)		_TERM_N(NI_GATES_NAMES_BASE, NI_MAX_COUNTERS, x)
+#define NI_CtrAux(x)		_TERM_N(NI_CtrGate(-1)  + 1, NI_MAX_COUNTERS, x)
+#define NI_CtrA(x)		_TERM_N(NI_CtrAux(-1)   + 1, NI_MAX_COUNTERS, x)
+#define NI_CtrB(x)		_TERM_N(NI_CtrA(-1)     + 1, NI_MAX_COUNTERS, x)
+#define NI_CtrZ(x)		_TERM_N(NI_CtrB(-1)     + 1, NI_MAX_COUNTERS, x)
+#define NI_GATES_NAMES_MAX	NI_CtrZ(-1)
+#define NI_CtrArmStartTrigger(x) _TERM_N(NI_CtrZ(-1)    + 1, NI_MAX_COUNTERS, x)
 #define NI_CtrInternalOutput(x) \
-		     (NI_CtrArmStartTrigger(-1)  + 1  + ((x) & NI_MAX_COUNTERS))
+		      _TERM_N(NI_CtrArmStartTrigger(-1) + 1, NI_MAX_COUNTERS, x)
 /** external pin(s) labeled conveniently as Ctr<i>Out. */
-#define NI_CtrOut(x)  (NI_CtrInternalOutput(-1)  + 1  + ((x) & NI_MAX_COUNTERS))
+#define NI_CtrOut(x)   _TERM_N(NI_CtrInternalOutput(-1) + 1, NI_MAX_COUNTERS, x)
 /** For Buffered sampling of ctr -- x series capability. */
-#define NI_CtrSampleClock(x)	(NI_CtrOut(-1)   + 1  + ((x) & NI_MAX_COUNTERS))
-#define NI_COUNTER_NAMES_MAX   NI_CtrSampleClock(-1)
+#define NI_CtrSampleClock(x)	_TERM_N(NI_CtrOut(-1)   + 1, NI_MAX_COUNTERS, x)
+#define NI_COUNTER_NAMES_MAX	NI_CtrSampleClock(-1)
 
 enum ni_common_signal_names {
 	/* PXI_Star: this is a non-NI-specific signal */
-- 
GitLab


From 354e379684fcc70ab8d5450b4d57bd92b5294dfd Mon Sep 17 00:00:00 2001
From: Sergio Paracuellos <sergio.paracuellos@gmail.com>
Date: Sat, 20 Oct 2018 13:31:06 +0200
Subject: [PATCH 0759/1890] staging: mt7621-dma: fix potentially dereferencing
 uninitialized 'tx_desc'

Function 'mtk_hsdma_start_transfer' uses 'tx_desc' pointer which can be
dereferenced before it is initializated. Initializate pointer before
avoiding the problem.

Fixes: 0853c7a53eb3 ("staging: mt7621-dma: ralink: add rt2880 dma engine")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Sergio Paracuellos <sergio.paracuellos@gmail.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/mt7621-dma/mtk-hsdma.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/mt7621-dma/mtk-hsdma.c b/drivers/staging/mt7621-dma/mtk-hsdma.c
index df6ebf41bdea4..5831f816c17b1 100644
--- a/drivers/staging/mt7621-dma/mtk-hsdma.c
+++ b/drivers/staging/mt7621-dma/mtk-hsdma.c
@@ -335,6 +335,8 @@ static int mtk_hsdma_start_transfer(struct mtk_hsdam_engine *hsdma,
 	/* tx desc */
 	src = sg->src_addr;
 	for (i = 0; i < chan->desc->num_sgs; i++) {
+		tx_desc = &chan->tx_ring[chan->tx_idx];
+
 		if (len > HSDMA_MAX_PLEN)
 			tlen = HSDMA_MAX_PLEN;
 		else
@@ -344,7 +346,6 @@ static int mtk_hsdma_start_transfer(struct mtk_hsdam_engine *hsdma,
 			tx_desc->addr1 = src;
 			tx_desc->flags |= HSDMA_DESC_PLEN1(tlen);
 		} else {
-			tx_desc = &chan->tx_ring[chan->tx_idx];
 			tx_desc->addr0 = src;
 			tx_desc->flags = HSDMA_DESC_PLEN0(tlen);
 
-- 
GitLab


From c948c6915b620f075496846df8d4487ee0c56121 Mon Sep 17 00:00:00 2001
From: Larry Finger <Larry.Finger@lwfinger.net>
Date: Tue, 6 Nov 2018 21:33:14 -0600
Subject: [PATCH 0760/1890] staging: rtl8723bs: Fix incorrect sense of
 ether_addr_equal

In commit b37f9e1c3801 ("staging: rtl8723bs: Fix lines too long in
update_recvframe_attrib()."), the refactoring involved replacing
two memcmp() calls with ether_addr_equal() calls. What the author
missed is that memcmp() returns false when the two strings are equal,
whereas ether_addr_equal() returns true when the two addresses are
equal. One side effect of this error is that the strength of an
unassociated AP was much stronger than the same AP after association.
This bug is reported at bko#201611.

Fixes: b37f9e1c3801 ("staging: rtl8723bs: Fix lines too long in update_recvframe_attrib().")
Cc: Stable <stable@vger.kernel.org>
Cc: youling257 <youling257@gmail.com>
Cc: u.srikant.patnaik@gmail.com
Reported-and-tested-by: youling257 <youling257@gmail.com>
Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8723bs/hal/rtl8723bs_recv.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/rtl8723bs/hal/rtl8723bs_recv.c b/drivers/staging/rtl8723bs/hal/rtl8723bs_recv.c
index 85077947b9b8d..85aba8a503cd2 100644
--- a/drivers/staging/rtl8723bs/hal/rtl8723bs_recv.c
+++ b/drivers/staging/rtl8723bs/hal/rtl8723bs_recv.c
@@ -109,12 +109,12 @@ static void update_recvframe_phyinfo(union recv_frame *precvframe,
 	rx_bssid = get_hdr_bssid(wlanhdr);
 	pkt_info.bssid_match = ((!IsFrameTypeCtrl(wlanhdr)) &&
 				!pattrib->icv_err && !pattrib->crc_err &&
-				!ether_addr_equal(rx_bssid, my_bssid));
+				ether_addr_equal(rx_bssid, my_bssid));
 
 	rx_ra = get_ra(wlanhdr);
 	my_hwaddr = myid(&padapter->eeprompriv);
 	pkt_info.to_self = pkt_info.bssid_match &&
-		!ether_addr_equal(rx_ra, my_hwaddr);
+		ether_addr_equal(rx_ra, my_hwaddr);
 
 
 	pkt_info.is_beacon = pkt_info.bssid_match &&
-- 
GitLab


From 781f0766cc41a9dd2e5d118ef4b1d5d89430257b Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng@canonical.com>
Date: Fri, 19 Oct 2018 16:14:50 +0800
Subject: [PATCH 0761/1890] USB: Wait for extra delay time after
 USB_PORT_FEAT_RESET for quirky hub

Devices connected under Terminus Technology Inc. Hub (1a40:0101) may
fail to work after the system resumes from suspend:
[  206.063325] usb 3-2.4: reset full-speed USB device number 4 using xhci_hcd
[  206.143691] usb 3-2.4: device descriptor read/64, error -32
[  206.351671] usb 3-2.4: device descriptor read/64, error -32

Info for this hub:
T:  Bus=03 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#=  2 Spd=480 MxCh= 4
D:  Ver= 2.00 Cls=09(hub  ) Sub=00 Prot=01 MxPS=64 #Cfgs=  1
P:  Vendor=1a40 ProdID=0101 Rev=01.11
S:  Product=USB 2.0 Hub
C:  #Ifs= 1 Cfg#= 1 Atr=e0 MxPwr=100mA
I:  If#= 0 Alt= 0 #EPs= 1 Cls=09(hub  ) Sub=00 Prot=00 Driver=hub

Some expirements indicate that the USB devices connected to the hub are
innocent, it's the hub itself is to blame. The hub needs extra delay
time after it resets its port.

Hence wait for extra delay, if the device is connected to this quirky
hub.

Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Cc: stable <stable@vger.kernel.org>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/admin-guide/kernel-parameters.txt |  2 ++
 drivers/usb/core/hub.c                          | 14 +++++++++++---
 drivers/usb/core/quirks.c                       |  6 ++++++
 include/linux/usb/quirks.h                      |  3 +++
 4 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 81d1d5a747280..19f4423e70d91 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4713,6 +4713,8 @@
 					prevent spurious wakeup);
 				n = USB_QUIRK_DELAY_CTRL_MSG (Device needs a
 					pause after every control message);
+				o = USB_QUIRK_HUB_SLOW_RESET (Hub needs extra
+					delay after resetting its port);
 			Example: quirks=0781:5580:bk,0a5c:5834:gij
 
 	usbhid.mousepoll=
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index c6077d582d296..d9bd7576786a7 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -2794,6 +2794,7 @@ static int hub_port_reset(struct usb_hub *hub, int port1,
 	int i, status;
 	u16 portchange, portstatus;
 	struct usb_port *port_dev = hub->ports[port1 - 1];
+	int reset_recovery_time;
 
 	if (!hub_is_superspeed(hub->hdev)) {
 		if (warm) {
@@ -2885,11 +2886,18 @@ static int hub_port_reset(struct usb_hub *hub, int port1,
 
 done:
 	if (status == 0) {
-		/* TRSTRCY = 10 ms; plus some extra */
 		if (port_dev->quirks & USB_PORT_QUIRK_FAST_ENUM)
 			usleep_range(10000, 12000);
-		else
-			msleep(10 + 40);
+		else {
+			/* TRSTRCY = 10 ms; plus some extra */
+			reset_recovery_time = 10 + 40;
+
+			/* Hub needs extra delay after resetting its port. */
+			if (hub->hdev->quirks & USB_QUIRK_HUB_SLOW_RESET)
+				reset_recovery_time += 100;
+
+			msleep(reset_recovery_time);
+		}
 
 		if (udev) {
 			struct usb_hcd *hcd = bus_to_hcd(udev->bus);
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index 178d6c6063c02..4d7d948eae636 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -128,6 +128,9 @@ static int quirks_param_set(const char *val, const struct kernel_param *kp)
 			case 'n':
 				flags |= USB_QUIRK_DELAY_CTRL_MSG;
 				break;
+			case 'o':
+				flags |= USB_QUIRK_HUB_SLOW_RESET;
+				break;
 			/* Ignore unrecognized flag characters */
 			}
 		}
@@ -380,6 +383,9 @@ static const struct usb_device_id usb_quirk_list[] = {
 	{ USB_DEVICE(0x1a0a, 0x0200), .driver_info =
 			USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL },
 
+	/* Terminus Technology Inc. Hub */
+	{ USB_DEVICE(0x1a40, 0x0101), .driver_info = USB_QUIRK_HUB_SLOW_RESET },
+
 	/* Corsair K70 RGB */
 	{ USB_DEVICE(0x1b1c, 0x1b13), .driver_info = USB_QUIRK_DELAY_INIT },
 
diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h
index b7a99ce56bc9a..a1be64c9940fb 100644
--- a/include/linux/usb/quirks.h
+++ b/include/linux/usb/quirks.h
@@ -66,4 +66,7 @@
 /* Device needs a pause after every control message. */
 #define USB_QUIRK_DELAY_CTRL_MSG		BIT(13)
 
+/* Hub needs extra delay after resetting its port. */
+#define USB_QUIRK_HUB_SLOW_RESET		BIT(14)
+
 #endif /* __LINUX_USB_QUIRKS_H */
-- 
GitLab


From a77112577667cbda7c6292c52d909636aef31fd9 Mon Sep 17 00:00:00 2001
From: Emmanuel Pescosta <emmanuelpescosta099@gmail.com>
Date: Fri, 26 Oct 2018 14:48:09 +0200
Subject: [PATCH 0762/1890] usb: quirks: Add delay-init quirk for Corsair K70
 LUX RGB

Following on from this patch: https://lkml.org/lkml/2017/11/3/516,
Corsair K70 LUX RGB keyboards also require the DELAY_INIT quirk to
start correctly at boot.

Dmesg output:
usb 1-6: string descriptor 0 read error: -110
usb 1-6: New USB device found, idVendor=1b1c, idProduct=1b33
usb 1-6: New USB device strings: Mfr=1, Product=2, SerialNumber=3
usb 1-6: can't set config #1, error -110

Signed-off-by: Emmanuel Pescosta <emmanuelpescosta099@gmail.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/quirks.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index 4d7d948eae636..64df444af2299 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -397,6 +397,9 @@ static const struct usb_device_id usb_quirk_list[] = {
 	{ USB_DEVICE(0x1b1c, 0x1b20), .driver_info = USB_QUIRK_DELAY_INIT |
 	  USB_QUIRK_DELAY_CTRL_MSG },
 
+	/* Corsair K70 LUX RGB */
+	{ USB_DEVICE(0x1b1c, 0x1b33), .driver_info = USB_QUIRK_DELAY_INIT },
+
 	/* Corsair K70 LUX */
 	{ USB_DEVICE(0x1b1c, 0x1b36), .driver_info = USB_QUIRK_DELAY_INIT },
 
-- 
GitLab


From deefd24228a172d1b27d4a9adbfd2cdacd60ae64 Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng@canonical.com>
Date: Fri, 26 Oct 2018 13:33:15 +0800
Subject: [PATCH 0763/1890] USB: quirks: Add no-lpm quirk for Raydium
 touchscreens

Raydium USB touchscreen fails to set config if LPM is enabled:
[    2.030658] usb 1-8: New USB device found, idVendor=2386, idProduct=3119
[    2.030659] usb 1-8: New USB device strings: Mfr=1, Product=2, SerialNumber=0
[    2.030660] usb 1-8: Product: Raydium Touch System
[    2.030661] usb 1-8: Manufacturer: Raydium Corporation
[    7.132209] usb 1-8: can't set config #1, error -110

Same behavior can be observed on 2386:3114.

Raydium claims the touchscreen supports LPM under Windows, so I used
Microsoft USB Test Tools (MUTT) [1] to check its LPM status. MUTT shows
that the LPM doesn't work under Windows, either. So let's just disable LPM
for Raydium touchscreens.

[1] https://docs.microsoft.com/en-us/windows-hardware/drivers/usbcon/usb-test-tools

Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/quirks.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index 64df444af2299..f9ff03e6af93e 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -420,6 +420,11 @@ static const struct usb_device_id usb_quirk_list[] = {
 	{ USB_DEVICE(0x2040, 0x7200), .driver_info =
 			USB_QUIRK_CONFIG_INTF_STRINGS },
 
+	/* Raydium Touchscreen */
+	{ USB_DEVICE(0x2386, 0x3114), .driver_info = USB_QUIRK_NO_LPM },
+
+	{ USB_DEVICE(0x2386, 0x3119), .driver_info = USB_QUIRK_NO_LPM },
+
 	/* DJI CineSSD */
 	{ USB_DEVICE(0x2ca3, 0x0031), .driver_info = USB_QUIRK_NO_LPM },
 
-- 
GitLab


From f6501f49199097b99e4e263644d88c90d1ec1060 Mon Sep 17 00:00:00 2001
From: Mattias Jacobsson <2pi@mok.nu>
Date: Sun, 21 Oct 2018 11:25:37 +0200
Subject: [PATCH 0764/1890] USB: misc: appledisplay: add 20" Apple Cinema
 Display

Add another Apple Cinema Display to the list of supported displays

Signed-off-by: Mattias Jacobsson <2pi@mok.nu>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/misc/appledisplay.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/misc/appledisplay.c b/drivers/usb/misc/appledisplay.c
index bd539f3058bca..85b48c6ddc7e6 100644
--- a/drivers/usb/misc/appledisplay.c
+++ b/drivers/usb/misc/appledisplay.c
@@ -50,6 +50,7 @@ static const struct usb_device_id appledisplay_table[] = {
 	{ APPLEDISPLAY_DEVICE(0x9219) },
 	{ APPLEDISPLAY_DEVICE(0x921c) },
 	{ APPLEDISPLAY_DEVICE(0x921d) },
+	{ APPLEDISPLAY_DEVICE(0x9222) },
 	{ APPLEDISPLAY_DEVICE(0x9236) },
 
 	/* Terminating entry */
-- 
GitLab


From 8d72ee3266f08fd3490011ee7b95ffc31e66fd6d Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Mon, 29 Oct 2018 12:47:40 +0530
Subject: [PATCH 0765/1890] Documentation: cpu-freq: Frequencies aren't always
 sorted

The order in which the frequencies are displayed in cpufreq stats
depends on the order in which the frequencies were sorted in the
frequency table provided to cpufreq core by the cpufreq driver. They can
be completely unsorted as well.

The documentation's claim that the stats will be sorted in descending
order is hence incorrect and here is an attempt to fix it.

Reported-by: Pavel <pavel2000@ngs.ru>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/cpu-freq/cpufreq-stats.txt | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/Documentation/cpu-freq/cpufreq-stats.txt b/Documentation/cpu-freq/cpufreq-stats.txt
index a873855c811d6..14378cecb1723 100644
--- a/Documentation/cpu-freq/cpufreq-stats.txt
+++ b/Documentation/cpu-freq/cpufreq-stats.txt
@@ -86,9 +86,11 @@ transitions.
 This will give a fine grained information about all the CPU frequency
 transitions. The cat output here is a two dimensional matrix, where an entry
 <i,j> (row i, column j) represents the count of number of transitions from 
-Freq_i to Freq_j. Freq_i is in descending order with increasing rows and 
-Freq_j is in descending order with increasing columns. The output here also 
-contains the actual freq values for each row and column for better readability.
+Freq_i to Freq_j. Freq_i rows and Freq_j columns follow the sorting order in
+which the driver has provided the frequency table initially to the cpufreq core
+and so can be sorted (ascending or descending) or unsorted.  The output here
+also contains the actual freq values for each row and column for better
+readability.
 
 If the transition table is bigger than PAGE_SIZE, reading this will
 return an -EFBIG error.
-- 
GitLab


From 6ef28a04d1ccf718eee069b72132ce4aa1e52ab9 Mon Sep 17 00:00:00 2001
From: Anson Huang <anson.huang@nxp.com>
Date: Mon, 5 Nov 2018 00:59:28 +0000
Subject: [PATCH 0766/1890] cpufreq: imx6q: add return value check for voltage
 scale

Add return value check for voltage scale when ARM clock
rate change fail.

Signed-off-by: Anson Huang <Anson.Huang@nxp.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/imx6q-cpufreq.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c
index 8cfee0ab804b4..d8c3595e90236 100644
--- a/drivers/cpufreq/imx6q-cpufreq.c
+++ b/drivers/cpufreq/imx6q-cpufreq.c
@@ -160,8 +160,13 @@ static int imx6q_set_target(struct cpufreq_policy *policy, unsigned int index)
 	/* Ensure the arm clock divider is what we expect */
 	ret = clk_set_rate(clks[ARM].clk, new_freq * 1000);
 	if (ret) {
+		int ret1;
+
 		dev_err(cpu_dev, "failed to set clock rate: %d\n", ret);
-		regulator_set_voltage_tol(arm_reg, volt_old, 0);
+		ret1 = regulator_set_voltage_tol(arm_reg, volt_old, 0);
+		if (ret1)
+			dev_warn(cpu_dev,
+				 "failed to restore vddarm voltage: %d\n", ret1);
 		return ret;
 	}
 
-- 
GitLab


From e531efa1e92b888acce45785b3ae69278fa712c0 Mon Sep 17 00:00:00 2001
From: Zhao Wei Liew <zhaoweiliew@gmail.com>
Date: Thu, 1 Nov 2018 01:32:46 +0000
Subject: [PATCH 0767/1890] Documentation: cpufreq: Correct a typo

Fix a typo in the admin-guide documentation for cpufreq.

Signed-off-by: Zhao Wei Liew <zhaoweiliew@gmail.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/admin-guide/pm/cpufreq.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/pm/cpufreq.rst b/Documentation/admin-guide/pm/cpufreq.rst
index 47153e64dfb53..7eca9026a9ed2 100644
--- a/Documentation/admin-guide/pm/cpufreq.rst
+++ b/Documentation/admin-guide/pm/cpufreq.rst
@@ -150,7 +150,7 @@ data structures necessary to handle the given policy and, possibly, to add
 a governor ``sysfs`` interface to it.  Next, the governor is started by
 invoking its ``->start()`` callback.
 
-That callback it expected to register per-CPU utilization update callbacks for
+That callback is expected to register per-CPU utilization update callbacks for
 all of the online CPUs belonging to the given policy with the CPU scheduler.
 The utilization update callbacks will be invoked by the CPU scheduler on
 important events, like task enqueue and dequeue, on every iteration of the
-- 
GitLab


From 943126417891372d56aa3fe46295cbf53db31370 Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Date: Tue, 16 Oct 2018 10:57:48 +0100
Subject: [PATCH 0768/1890] slimbus: ngd: remove unnecessary check

SLIM_MSG_CLK_PAUSE_SEQ_FLG is never set in any of the slim core,
so performing a check in ngd driver is totally unnecessary.

Also this patch fixes warning about mc field overflow reported
with CoverityScan.

Making clk pause feature optional will be added to slim core in
next development cycle.

Reported-by: Colin Ian King <colin.king@canonical.com>
Fixes: 917809e2280b ("slimbus: ngd: Add qcom SLIMBus NGD driver")
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/slimbus/qcom-ngd-ctrl.c | 3 ---
 drivers/slimbus/slimbus.h       | 6 ------
 2 files changed, 9 deletions(-)

diff --git a/drivers/slimbus/qcom-ngd-ctrl.c b/drivers/slimbus/qcom-ngd-ctrl.c
index 7218fb963d0a1..1382a8df6c75f 100644
--- a/drivers/slimbus/qcom-ngd-ctrl.c
+++ b/drivers/slimbus/qcom-ngd-ctrl.c
@@ -777,9 +777,6 @@ static int qcom_slim_ngd_xfer_msg(struct slim_controller *sctrl,
 	u8 la = txn->la;
 	bool usr_msg = false;
 
-	if (txn->mc & SLIM_MSG_CLK_PAUSE_SEQ_FLG)
-		return -EPROTONOSUPPORT;
-
 	if (txn->mt == SLIM_MSG_MT_CORE &&
 		(txn->mc >= SLIM_MSG_MC_BEGIN_RECONFIGURATION &&
 		 txn->mc <= SLIM_MSG_MC_RECONFIGURE_NOW))
diff --git a/drivers/slimbus/slimbus.h b/drivers/slimbus/slimbus.h
index 4399d1873e2d9..9be41089edde7 100644
--- a/drivers/slimbus/slimbus.h
+++ b/drivers/slimbus/slimbus.h
@@ -61,12 +61,6 @@
 #define SLIM_MSG_MC_NEXT_REMOVE_CHANNEL          0x58
 #define SLIM_MSG_MC_RECONFIGURE_NOW              0x5F
 
-/*
- * Clock pause flag to indicate that the reconfig message
- * corresponds to clock pause sequence
- */
-#define SLIM_MSG_CLK_PAUSE_SEQ_FLG		(1U << 8)
-
 /* Clock pause values per SLIMbus spec */
 #define SLIM_CLK_FAST				0
 #define SLIM_CLK_CONST_PHASE			1
-- 
GitLab


From 9e463084cdb22e0b56b2dfbc50461020409a5fd3 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 7 Nov 2018 10:32:53 -0500
Subject: [PATCH 0769/1890] ext4: fix possible leak of sbi->s_group_desc_leak
 in error path

Fixes: bfe0a5f47ada ("ext4: add more mount time checks of the superblock")
Reported-by: Vasily Averin <vvs@virtuozzo.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org # 4.18
---
 fs/ext4/super.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index a221f1cdf7046..92092b55db1ec 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4075,6 +4075,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	sbi->s_groups_count = blocks_count;
 	sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count,
 			(EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
+	if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) !=
+	    le32_to_cpu(es->s_inodes_count)) {
+		ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu",
+			 le32_to_cpu(es->s_inodes_count),
+			 ((u64)sbi->s_groups_count * sbi->s_inodes_per_group));
+		ret = -EINVAL;
+		goto failed_mount;
+	}
 	db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
 		   EXT4_DESC_PER_BLOCK(sb);
 	if (ext4_has_feature_meta_bg(sb)) {
@@ -4094,14 +4102,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 		ret = -ENOMEM;
 		goto failed_mount;
 	}
-	if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) !=
-	    le32_to_cpu(es->s_inodes_count)) {
-		ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu",
-			 le32_to_cpu(es->s_inodes_count),
-			 ((u64)sbi->s_groups_count * sbi->s_inodes_per_group));
-		ret = -EINVAL;
-		goto failed_mount;
-	}
 
 	bgl_lock_init(sbi->s_blockgroup_lock);
 
-- 
GitLab


From f26621e60b35369bca9228bc936dc723b3e421af Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Mon, 5 Nov 2018 10:33:07 +0100
Subject: [PATCH 0770/1890] resource/docs: Complete kernel-doc style function
 documentation

Add the missing kernel-doc style function parameters documentation.

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: akpm@linux-foundation.org
Cc: linux-tip-commits@vger.kernel.org
Cc: rdunlap@infradead.org
Fixes: b69c2e20f6e4 ("resource: Clean it up a bit")
Link: http://lkml.kernel.org/r/20181105093307.GA12445@zn.tnic
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/resource.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/kernel/resource.c b/kernel/resource.c
index 17bcb189d5306..b0fbf685c77a5 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -318,17 +318,24 @@ int release_resource(struct resource *old)
 
 EXPORT_SYMBOL(release_resource);
 
-/*
- * Finds the lowest iomem resource that covers part of [start..end].  The
- * caller must specify start, end, flags, and desc (which may be
+/**
+ * Finds the lowest iomem resource that covers part of [@start..@end].  The
+ * caller must specify @start, @end, @flags, and @desc (which may be
  * IORES_DESC_NONE).
  *
- * If a resource is found, returns 0 and *res is overwritten with the part
- * of the resource that's within [start..end]; if none is found, returns
- * -1. Returns -EINVAL for other invalid parameters.
+ * If a resource is found, returns 0 and @*res is overwritten with the part
+ * of the resource that's within [@start..@end]; if none is found, returns
+ * -1 or -EINVAL for other invalid parameters.
  *
  * This function walks the whole tree and not just first level children
  * unless @first_lvl is true.
+ *
+ * @start:	start address of the resource searched for
+ * @end:	end address of same resource
+ * @flags:	flags which the resource must have
+ * @desc:	descriptor the resource must have
+ * @first_lvl:	walk only the first level children, if set
+ * @res:	return ptr, if resource found
  */
 static int find_next_iomem_res(resource_size_t start, resource_size_t end,
 			       unsigned long flags, unsigned long desc,
@@ -390,9 +397,7 @@ static int __walk_iomem_res_desc(resource_size_t start, resource_size_t end,
 }
 
 /**
- * walk_iomem_res_desc - walk through iomem resources
- *
- * Walks through iomem resources and calls @func() with matching resource
+ * Walks through iomem resources and calls func() with matching resource
  * ranges. This walks through whole tree and not just first level children.
  * All the memory ranges which overlap start,end and also match flags and
  * desc are valid candidates.
-- 
GitLab


From af18e35bfd01e6d65a5e3ef84ffe8b252d1628c5 Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@virtuozzo.com>
Date: Wed, 7 Nov 2018 10:56:28 -0500
Subject: [PATCH 0771/1890] ext4: fix possible leak of s_journal_flag_rwsem in
 error path

Fixes: c8585c6fcaf2 ("ext4: fix races between changing inode journal ...")
Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org # 4.7
---
 fs/ext4/super.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 92092b55db1ec..53ff6c2a26ed9 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4510,6 +4510,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	percpu_counter_destroy(&sbi->s_freeinodes_counter);
 	percpu_counter_destroy(&sbi->s_dirs_counter);
 	percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
+	percpu_free_rwsem(&sbi->s_journal_flag_rwsem);
 failed_mount5:
 	ext4_ext_release(sb);
 	ext4_release_system_zone(sb);
-- 
GitLab


From ecaaf408478b6fb4d9986f9b6652f3824e374f4c Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@virtuozzo.com>
Date: Wed, 7 Nov 2018 11:01:33 -0500
Subject: [PATCH 0772/1890] ext4: fix buffer leak in ext4_xattr_get_block() on
 error path

Fixes: dec214d00e0d ("ext4: xattr inode deduplication")
Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org # 4.13
---
 fs/ext4/xattr.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index dc1aeab06dbad..07c3a115f7ae9 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -2272,8 +2272,10 @@ static struct buffer_head *ext4_xattr_get_block(struct inode *inode)
 	if (!bh)
 		return ERR_PTR(-EIO);
 	error = ext4_xattr_check_block(inode, bh);
-	if (error)
+	if (error) {
+		brelse(bh);
 		return ERR_PTR(error);
+	}
 	return bh;
 }
 
-- 
GitLab


From 45ae932d246f721e6584430017176cbcadfde610 Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@virtuozzo.com>
Date: Wed, 7 Nov 2018 11:07:01 -0500
Subject: [PATCH 0773/1890] ext4: release bs.bh before re-using in
 ext4_xattr_block_find()

bs.bh was taken in previous ext4_xattr_block_find() call,
it should be released before re-using

Fixes: 7e01c8e5420b ("ext3/4: fix uninitialized bs in ...")
Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org # 2.6.26
---
 fs/ext4/xattr.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 07c3a115f7ae9..07b9a335c8eb4 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -2395,6 +2395,8 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
 			error = ext4_xattr_block_set(handle, inode, &i, &bs);
 		} else if (error == -ENOSPC) {
 			if (EXT4_I(inode)->i_file_acl && !bs.s.base) {
+				brelse(bs.bh);
+				bs.bh = NULL;
 				error = ext4_xattr_block_find(inode, &i, &bs);
 				if (error)
 					goto cleanup;
-- 
GitLab


From 6bdc9977fcdedf47118d2caf7270a19f4b6d8a8f Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@virtuozzo.com>
Date: Wed, 7 Nov 2018 11:10:21 -0500
Subject: [PATCH 0774/1890] ext4: fix buffer leak in ext4_xattr_move_to_block()
 on error path

Fixes: 3f2571c1f91f ("ext4: factor out xattr moving")
Fixes: 6dd4ee7cab7e ("ext4: Expand extra_inodes space per ...")
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org # 2.6.23
---
 fs/ext4/xattr.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 07b9a335c8eb4..5c9bc0d85cc0a 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -2617,6 +2617,8 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
 	kfree(buffer);
 	if (is)
 		brelse(is->iloc.bh);
+	if (bs)
+		brelse(bs->bh);
 	kfree(is);
 	kfree(bs);
 
-- 
GitLab


From fbb974ba693bbfb4e24a62181ef16d4e45febc37 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Tue, 4 Sep 2018 16:51:29 +0200
Subject: [PATCH 0775/1890] rtc: cmos: Do not export alarm rtc_ops when we do
 not support alarms

When there is no IRQ configured for the RTC, the rtc-cmos code does not
support alarms, all alarm rtc_ops fail with -EIO / -EINVAL.

The rtc-core expects a rtc driver which does not support rtc alarms to
not have alarm ops at all. Otherwise the wakealarm sysfs attr will read
as empty rather then returning an error, making it impossible for
userspace to find out beforehand if alarms are supported.

A system without an IRQ for the RTC before this patch:
[root@localhost ~]# cat /sys/class/rtc/rtc0/wakealarm
[root@localhost ~]#

After this patch:
[root@localhost ~]# cat /sys/class/rtc/rtc0/wakealarm
cat: /sys/class/rtc/rtc0/wakealarm: No such file or directory
[root@localhost ~]#

This fixes gnome-session + systemd trying to use suspend-then-hibernate,
which causes systemd to abort the suspend when writing the RTC alarm fails.

BugLink: https://github.com/systemd/systemd/issues/9988
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-cmos.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index df0c5776d49bb..a5a19ff105354 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -257,6 +257,7 @@ static int cmos_read_alarm(struct device *dev, struct rtc_wkalrm *t)
 	struct cmos_rtc	*cmos = dev_get_drvdata(dev);
 	unsigned char	rtc_control;
 
+	/* This not only a rtc_op, but also called directly */
 	if (!is_valid_irq(cmos->irq))
 		return -EIO;
 
@@ -452,6 +453,7 @@ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t)
 	unsigned char mon, mday, hrs, min, sec, rtc_control;
 	int ret;
 
+	/* This not only a rtc_op, but also called directly */
 	if (!is_valid_irq(cmos->irq))
 		return -EIO;
 
@@ -516,9 +518,6 @@ static int cmos_alarm_irq_enable(struct device *dev, unsigned int enabled)
 	struct cmos_rtc	*cmos = dev_get_drvdata(dev);
 	unsigned long	flags;
 
-	if (!is_valid_irq(cmos->irq))
-		return -EINVAL;
-
 	spin_lock_irqsave(&rtc_lock, flags);
 
 	if (enabled)
@@ -579,6 +578,12 @@ static const struct rtc_class_ops cmos_rtc_ops = {
 	.alarm_irq_enable	= cmos_alarm_irq_enable,
 };
 
+static const struct rtc_class_ops cmos_rtc_ops_no_alarm = {
+	.read_time		= cmos_read_time,
+	.set_time		= cmos_set_time,
+	.proc			= cmos_procfs,
+};
+
 /*----------------------------------------------------------------*/
 
 /*
@@ -855,9 +860,12 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
 			dev_dbg(dev, "IRQ %d is already in use\n", rtc_irq);
 			goto cleanup1;
 		}
+
+		cmos_rtc.rtc->ops = &cmos_rtc_ops;
+	} else {
+		cmos_rtc.rtc->ops = &cmos_rtc_ops_no_alarm;
 	}
 
-	cmos_rtc.rtc->ops = &cmos_rtc_ops;
 	cmos_rtc.rtc->nvram_old_abi = true;
 	retval = rtc_register_device(cmos_rtc.rtc);
 	if (retval)
-- 
GitLab


From 7ce9a992ffde8ce93d5ae5767362a5c7389ae895 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@linux-mips.org>
Date: Mon, 5 Nov 2018 03:48:25 +0000
Subject: [PATCH 0776/1890] rtc: hctosys: Add missing range error reporting

Fix an issue with the 32-bit range error path in `rtc_hctosys' where no
error code is set and consequently the successful preceding call result
from `rtc_read_time' is propagated to `rtc_hctosys_ret'.  This in turn
makes any subsequent call to `hctosys_show' incorrectly report in sysfs
that the system time has been set from this RTC while it has not.

Set the error to ERANGE then if we can't express the result due to an
overflow.

Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
Fixes: b3a5ac42ab18 ("rtc: hctosys: Ensure system time doesn't overflow time_t")
Cc: stable@vger.kernel.org # 4.17+
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/hctosys.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/rtc/hctosys.c b/drivers/rtc/hctosys.c
index e79f2a181ad24..b9ec4a16db1f6 100644
--- a/drivers/rtc/hctosys.c
+++ b/drivers/rtc/hctosys.c
@@ -50,8 +50,10 @@ static int __init rtc_hctosys(void)
 	tv64.tv_sec = rtc_tm_to_time64(&tm);
 
 #if BITS_PER_LONG == 32
-	if (tv64.tv_sec > INT_MAX)
+	if (tv64.tv_sec > INT_MAX) {
+		err = -ERANGE;
 		goto err_read;
+	}
 #endif
 
 	err = do_settimeofday64(&tv64);
-- 
GitLab


From 9bde0afb7a906f1dabdba37162551565740b862d Mon Sep 17 00:00:00 2001
From: Xulin Sun <xulin.sun@windriver.com>
Date: Tue, 6 Nov 2018 16:42:19 +0800
Subject: [PATCH 0777/1890] rtc: pcf2127: fix a kmemleak caused in
 pcf2127_i2c_gather_write

pcf2127_i2c_gather_write() allocates memory as local variable
for i2c_master_send(), after finishing the master transfer,
the allocated memory should be freed. The kmemleak is reported:

unreferenced object 0xffff80231e7dba80 (size 64):
  comm "hwclock", pid 27762, jiffies 4296880075 (age 356.944s)
  hex dump (first 32 bytes):
    03 00 12 03 19 02 11 13 00 80 98 18 00 00 ff ff ................
    00 50 00 00 00 00 00 00 02 00 00 00 00 00 00 00 .P..............
  backtrace:
    [<ffff000008221398>] create_object+0xf8/0x278
    [<ffff000008a96264>] kmemleak_alloc+0x74/0xa0
    [<ffff00000821070c>] __kmalloc+0x1ac/0x348
    [<ffff0000087ed1dc>] pcf2127_i2c_gather_write+0x54/0xf8
    [<ffff0000085fd9d4>] _regmap_raw_write+0x464/0x850
    [<ffff0000085fe3f4>] regmap_bulk_write+0x1a4/0x348
    [<ffff0000087ed32c>] pcf2127_rtc_set_time+0xac/0xe8
    [<ffff0000087eaad8>] rtc_set_time+0x80/0x138
    [<ffff0000087ebfb0>] rtc_dev_ioctl+0x398/0x610
    [<ffff00000823f2c0>] do_vfs_ioctl+0xb0/0x848
    [<ffff00000823fae4>] SyS_ioctl+0x8c/0xa8
    [<ffff000008083ac0>] el0_svc_naked+0x34/0x38
    [<ffffffffffffffff>] 0xffffffffffffffff

Signed-off-by: Xulin Sun <xulin.sun@windriver.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-pcf2127.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c
index 9f99a0966550b..7cb786d76e3c1 100644
--- a/drivers/rtc/rtc-pcf2127.c
+++ b/drivers/rtc/rtc-pcf2127.c
@@ -303,6 +303,9 @@ static int pcf2127_i2c_gather_write(void *context,
 	memcpy(buf + 1, val, val_size);
 
 	ret = i2c_master_send(client, buf, val_size + 1);
+
+	kfree(buf);
+
 	if (ret != val_size + 1)
 		return ret < 0 ? ret : -EIO;
 
-- 
GitLab


From 53692ec074d00589c2cf1d6d17ca76ad0adce6ec Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@virtuozzo.com>
Date: Wed, 7 Nov 2018 11:14:35 -0500
Subject: [PATCH 0778/1890] ext4: fix buffer leak in
 ext4_expand_extra_isize_ea() on error path

Fixes: de05ca852679 ("ext4: move call to ext4_error() into ...")
Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org # 4.17
---
 fs/ext4/xattr.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 5c9bc0d85cc0a..0b96886835262 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -2698,7 +2698,6 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
 			       struct ext4_inode *raw_inode, handle_t *handle)
 {
 	struct ext4_xattr_ibody_header *header;
-	struct buffer_head *bh;
 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 	static unsigned int mnt_count;
 	size_t min_offs;
@@ -2739,13 +2738,17 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
 	 * EA block can hold new_extra_isize bytes.
 	 */
 	if (EXT4_I(inode)->i_file_acl) {
+		struct buffer_head *bh;
+
 		bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
 		error = -EIO;
 		if (!bh)
 			goto cleanup;
 		error = ext4_xattr_check_block(inode, bh);
-		if (error)
+		if (error) {
+			brelse(bh);
 			goto cleanup;
+		}
 		base = BHDR(bh);
 		end = bh->b_data + bh->b_size;
 		min_offs = end - base;
-- 
GitLab


From ffe498237b36ee42624e139b21efa05da4ff1f48 Mon Sep 17 00:00:00 2001
From: Chinh T Cao <chinh.t.cao@intel.com>
Date: Mon, 5 Nov 2018 12:18:35 -0800
Subject: [PATCH 0779/1890] ice: Change req_speeds to be u16

Since the req_speeds field in struct ice_link_status is a u8,
req_speeds & ICE_AQ_LINK_SPEED_40GB always returns 0. This was caught
by a coverity scan.

Fix this by changing req_speeds to be u16.

Reported-by: Bruce Allan <bruce.w.allan@intel.com>
Signed-off-by: Chinh T Cao <chinh.t.cao@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_type.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index 12f9432abf110..f4dbc81c19886 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -92,12 +92,12 @@ struct ice_link_status {
 	u64 phy_type_low;
 	u16 max_frame_size;
 	u16 link_speed;
+	u16 req_speeds;
 	u8 lse_ena;	/* Link Status Event notification */
 	u8 link_info;
 	u8 an_info;
 	u8 ext_info;
 	u8 pacing;
-	u8 req_speeds;
 	/* Refer to #define from module_type[ICE_MODULE_TYPE_TOTAL_BYTE] of
 	 * ice_aqc_get_phy_caps structure
 	 */
-- 
GitLab


From ba766b8b99c30ad3c55ed8cf224d1185ecff1476 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Mon, 29 Oct 2018 10:52:42 -0700
Subject: [PATCH 0780/1890] i40e: restore NETIF_F_GSO_IPXIP[46] to netdev
 features

Since commit bacd75cfac8a ("i40e/i40evf: Add capability exchange for
outer checksum", 2017-04-06) the i40e driver has not reported support
for IP-in-IP offloads. This likely occurred due to a bad rebase, as the
commit extracts hw_enc_features into its own variable. As part of this
change, it dropped the NETIF_F_FSO_IPXIP flags from the
netdev->hw_enc_features. This was unfortunately not caught during code
review.

Fix this by adding back the missing feature flags.

For reference, NETIF_F_GSO_IPXIP4 was added in commit 7e13318daa4a
("net: define gso types for IPx over IPv4 and IPv6", 2016-05-20),
replacing NETIF_F_GSO_IPIP and NETIF_F_GSO_SIT.

NETIF_F_GSO_IPXIP6 was added in commit bf2d1df39502 ("intel: Add support
for IPv6 IP-in-IP offload", 2016-05-20).

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index bc71a21c1dc2c..3ff5ee49818b0 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -12249,6 +12249,8 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
 			  NETIF_F_GSO_GRE		|
 			  NETIF_F_GSO_GRE_CSUM		|
 			  NETIF_F_GSO_PARTIAL		|
+			  NETIF_F_GSO_IPXIP4		|
+			  NETIF_F_GSO_IPXIP6		|
 			  NETIF_F_GSO_UDP_TUNNEL	|
 			  NETIF_F_GSO_UDP_TUNNEL_CSUM	|
 			  NETIF_F_SCTP_CRC		|
-- 
GitLab


From d5596fd46770550873ab4c02bcb69f83d3d63f63 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Mon, 29 Oct 2018 10:52:43 -0700
Subject: [PATCH 0781/1890] i40e: enable NETIF_F_NTUPLE and NETIF_F_HW_TC at
 driver load

The assignment of the feature flag NETIF_F_NTUPLE and NETIF_F_HW_TC
occurs prior to the initial setup of the local hw_features variable.

This means the features are set as user-changeable, but are not set in
the currently active feature list. This results in the features being
disabled at the driver's initial load.

Move the assignment after the initial assignment of hw_features, and
assign to the local variable. This ensures that NETIF_F_NTUPLE and
NETIF_F_HW_TC are marked as user-changeable, and also enables them by
default when the driver loads.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 3ff5ee49818b0..21c2688d63082 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -12268,13 +12268,13 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
 	/* record features VLANs can make use of */
 	netdev->vlan_features |= hw_enc_features | NETIF_F_TSO_MANGLEID;
 
-	if (!(pf->flags & I40E_FLAG_MFP_ENABLED))
-		netdev->hw_features |= NETIF_F_NTUPLE | NETIF_F_HW_TC;
-
 	hw_features = hw_enc_features		|
 		      NETIF_F_HW_VLAN_CTAG_TX	|
 		      NETIF_F_HW_VLAN_CTAG_RX;
 
+	if (!(pf->flags & I40E_FLAG_MFP_ENABLED))
+		hw_features |= NETIF_F_NTUPLE | NETIF_F_HW_TC;
+
 	netdev->hw_features |= hw_features;
 
 	netdev->features |= hw_features | NETIF_F_HW_VLAN_CTAG_FILTER;
-- 
GitLab


From d6fd0ae25c6495674dc5a41a8d16bc8e0073276d Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Wed, 31 Oct 2018 10:06:08 -0700
Subject: [PATCH 0782/1890] Btrfs: fix missing delayed iputs on unmount

There's a race between close_ctree() and cleaner_kthread().
close_ctree() sets btrfs_fs_closing(), and the cleaner stops when it
sees it set, but this is racy; the cleaner might have already checked
the bit and could be cleaning stuff. In particular, if it deletes unused
block groups, it will create delayed iputs for the free space cache
inodes. As of "btrfs: don't run delayed_iputs in commit", we're no
longer running delayed iputs after a commit. Therefore, if the cleaner
creates more delayed iputs after delayed iputs are run in
btrfs_commit_super(), we will leak inodes on unmount and get a busy
inode crash from the VFS.

Fix it by parking the cleaner before we actually close anything. Then,
any remaining delayed iputs will always be handled in
btrfs_commit_super(). This also ensures that the commit in close_ctree()
is really the last commit, so we can get rid of the commit in
cleaner_kthread().

The fstest/generic/475 followed by 476 can trigger a crash that
manifests as a slab corruption caused by accessing the freed kthread
structure by a wake up function. Sample trace:

[ 5657.077612] BUG: unable to handle kernel NULL pointer dereference at 00000000000000cc
[ 5657.079432] PGD 1c57a067 P4D 1c57a067 PUD da10067 PMD 0
[ 5657.080661] Oops: 0000 [#1] PREEMPT SMP
[ 5657.081592] CPU: 1 PID: 5157 Comm: fsstress Tainted: G        W         4.19.0-rc8-default+ #323
[ 5657.083703] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.2-0-gf9626cc-prebuilt.qemu-project.org 04/01/2014
[ 5657.086577] RIP: 0010:shrink_page_list+0x2f9/0xe90
[ 5657.091937] RSP: 0018:ffffb5c745c8f728 EFLAGS: 00010287
[ 5657.092953] RAX: 0000000000000074 RBX: ffffb5c745c8f830 RCX: 0000000000000000
[ 5657.094590] RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffff9a8747fdf3d0
[ 5657.095987] RBP: ffffb5c745c8f9e0 R08: 0000000000000000 R09: 0000000000000000
[ 5657.097159] R10: ffff9a8747fdf5e8 R11: 0000000000000000 R12: ffffb5c745c8f788
[ 5657.098513] R13: ffff9a877f6ff2c0 R14: ffff9a877f6ff2c8 R15: dead000000000200
[ 5657.099689] FS:  00007f948d853b80(0000) GS:ffff9a877d600000(0000) knlGS:0000000000000000
[ 5657.101032] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 5657.101953] CR2: 00000000000000cc CR3: 00000000684bd000 CR4: 00000000000006e0
[ 5657.103159] Call Trace:
[ 5657.103776]  shrink_inactive_list+0x194/0x410
[ 5657.104671]  shrink_node_memcg.constprop.84+0x39a/0x6a0
[ 5657.105750]  shrink_node+0x62/0x1c0
[ 5657.106529]  try_to_free_pages+0x1a4/0x500
[ 5657.107408]  __alloc_pages_slowpath+0x2c9/0xb20
[ 5657.108418]  __alloc_pages_nodemask+0x268/0x2b0
[ 5657.109348]  kmalloc_large_node+0x37/0x90
[ 5657.110205]  __kmalloc_node+0x236/0x310
[ 5657.111014]  kvmalloc_node+0x3e/0x70

Fixes: 30928e9baac2 ("btrfs: don't run delayed_iputs in commit")
Signed-off-by: Omar Sandoval <osandov@fb.com>
Reviewed-by: David Sterba <dsterba@suse.com>
[ add trace ]
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c | 51 ++++++++++++++--------------------------------
 1 file changed, 15 insertions(+), 36 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 00ee5e37e9897..3f0b6d1936e8e 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1664,9 +1664,8 @@ static int cleaner_kthread(void *arg)
 	struct btrfs_root *root = arg;
 	struct btrfs_fs_info *fs_info = root->fs_info;
 	int again;
-	struct btrfs_trans_handle *trans;
 
-	do {
+	while (1) {
 		again = 0;
 
 		/* Make the cleaner go to sleep early. */
@@ -1715,42 +1714,16 @@ static int cleaner_kthread(void *arg)
 		 */
 		btrfs_delete_unused_bgs(fs_info);
 sleep:
+		if (kthread_should_park())
+			kthread_parkme();
+		if (kthread_should_stop())
+			return 0;
 		if (!again) {
 			set_current_state(TASK_INTERRUPTIBLE);
-			if (!kthread_should_stop())
-				schedule();
+			schedule();
 			__set_current_state(TASK_RUNNING);
 		}
-	} while (!kthread_should_stop());
-
-	/*
-	 * Transaction kthread is stopped before us and wakes us up.
-	 * However we might have started a new transaction and COWed some
-	 * tree blocks when deleting unused block groups for example. So
-	 * make sure we commit the transaction we started to have a clean
-	 * shutdown when evicting the btree inode - if it has dirty pages
-	 * when we do the final iput() on it, eviction will trigger a
-	 * writeback for it which will fail with null pointer dereferences
-	 * since work queues and other resources were already released and
-	 * destroyed by the time the iput/eviction/writeback is made.
-	 */
-	trans = btrfs_attach_transaction(root);
-	if (IS_ERR(trans)) {
-		if (PTR_ERR(trans) != -ENOENT)
-			btrfs_err(fs_info,
-				  "cleaner transaction attach returned %ld",
-				  PTR_ERR(trans));
-	} else {
-		int ret;
-
-		ret = btrfs_commit_transaction(trans);
-		if (ret)
-			btrfs_err(fs_info,
-				  "cleaner open transaction commit returned %d",
-				  ret);
 	}
-
-	return 0;
 }
 
 static int transaction_kthread(void *arg)
@@ -3931,6 +3904,13 @@ void close_ctree(struct btrfs_fs_info *fs_info)
 	int ret;
 
 	set_bit(BTRFS_FS_CLOSING_START, &fs_info->flags);
+	/*
+	 * We don't want the cleaner to start new transactions, add more delayed
+	 * iputs, etc. while we're closing. We can't use kthread_stop() yet
+	 * because that frees the task_struct, and the transaction kthread might
+	 * still try to wake up the cleaner.
+	 */
+	kthread_park(fs_info->cleaner_kthread);
 
 	/* wait for the qgroup rescan worker to stop */
 	btrfs_qgroup_wait_for_completion(fs_info, false);
@@ -3958,9 +3938,8 @@ void close_ctree(struct btrfs_fs_info *fs_info)
 
 	if (!sb_rdonly(fs_info->sb)) {
 		/*
-		 * If the cleaner thread is stopped and there are
-		 * block groups queued for removal, the deletion will be
-		 * skipped when we quit the cleaner thread.
+		 * The cleaner kthread is stopped, so do one final pass over
+		 * unused block groups.
 		 */
 		btrfs_delete_unused_bgs(fs_info);
 
-- 
GitLab


From df376b2ed51a2777c3398e038992f62523c0f932 Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <jthumshirn@suse.de>
Date: Wed, 7 Nov 2018 14:58:14 +0100
Subject: [PATCH 0783/1890] block: respect virtual boundary mask in bvecs

With drivers that are settting a virtual boundary constrain, we are
seeing a lot of bio splitting and smaller I/Os being submitted to the
driver.

This happens because the bio gap detection code does not account cases
where PAGE_SIZE - 1 is bigger than queue_virt_boundary() and thus will
split the bio unnecessarily.

Cc: Jan Kara <jack@suse.cz>
Cc: Bart Van Assche <bvanassche@acm.org>
Cc: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Johannes Thumshirn <jthumshirn@suse.de>
Acked-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-merge.c | 2 +-
 block/blk.h       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/blk-merge.c b/block/blk-merge.c
index 6b5ad275ed565..208658a901c65 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -46,7 +46,7 @@ static inline bool bio_will_gap(struct request_queue *q,
 		bio_get_first_bvec(prev_rq->bio, &pb);
 	else
 		bio_get_first_bvec(prev, &pb);
-	if (pb.bv_offset)
+	if (pb.bv_offset & queue_virt_boundary(q))
 		return true;
 
 	/*
diff --git a/block/blk.h b/block/blk.h
index a1841b8ff1296..c85e53f21cdd8 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -169,7 +169,7 @@ static inline bool biovec_phys_mergeable(struct request_queue *q,
 static inline bool __bvec_gap_to_prev(struct request_queue *q,
 		struct bio_vec *bprv, unsigned int offset)
 {
-	return offset ||
+	return (offset & queue_virt_boundary(q)) ||
 		((bprv->bv_offset + bprv->bv_len) & queue_virt_boundary(q));
 }
 
-- 
GitLab


From 3a40cfe8ba3abba57af2c7e26aad2c6dd1681864 Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linaro.org>
Date: Wed, 7 Nov 2018 13:42:58 +0800
Subject: [PATCH 0784/1890] leds: trigger: Fix sleeping function called from
 invalid context

We will meet below issue due to mutex_lock() is called in interrupt context.
The mutex lock is used to protect the pattern trigger data, but before changing
new pattern trigger data (pattern values or repeat value) by users, we always
cancel the timer firstly to clear previous patterns' performance. That means
there is no race in pattern_trig_timer_function(), so we can drop the mutex
lock in pattern_trig_timer_function() to avoid this issue.

Moreover we can move the timer cancelling into mutex protection, since there
is no deadlock risk if we remove the mutex lock in pattern_trig_timer_function().

BUG: sleeping function called from invalid context at kernel/locking/mutex.c:254
in_atomic(): 1, irqs_disabled(): 0, pid: 0, name: swapper/1
CPU: 1 PID: 0 Comm: swapper/1 Not tainted
4.20.0-rc1-koelsch-00841-ga338c8181013c1a9 #171
Hardware name: Generic R-Car Gen2 (Flattened Device Tree)
[<c020f19c>] (unwind_backtrace) from [<c020aecc>] (show_stack+0x10/0x14)
[<c020aecc>] (show_stack) from [<c07affb8>] (dump_stack+0x7c/0x9c)
[<c07affb8>] (dump_stack) from [<c02417d4>] (___might_sleep+0xf4/0x158)
[<c02417d4>] (___might_sleep) from [<c07c92c4>] (mutex_lock+0x18/0x60)
[<c07c92c4>] (mutex_lock) from [<c067b28c>] (pattern_trig_timer_function+0x1c/0x11c)
[<c067b28c>] (pattern_trig_timer_function) from [<c027f6fc>] (call_timer_fn+0x1c/0x90)
[<c027f6fc>] (call_timer_fn) from [<c027f944>] (expire_timers+0x94/0xa4)
[<c027f944>] (expire_timers) from [<c027fc98>] (run_timer_softirq+0x108/0x15c)
[<c027fc98>] (run_timer_softirq) from [<c02021cc>] (__do_softirq+0x1d4/0x258)
[<c02021cc>] (__do_softirq) from [<c0224d24>] (irq_exit+0x64/0xc4)
[<c0224d24>] (irq_exit) from [<c0268dd0>] (__handle_domain_irq+0x80/0xb4)
[<c0268dd0>] (__handle_domain_irq) from [<c045e1b0>] (gic_handle_irq+0x58/0x90)
[<c045e1b0>] (gic_handle_irq) from [<c02019f8>] (__irq_svc+0x58/0x74)
Exception stack(0xeb483f60 to 0xeb483fa8)
3f60: 00000000 00000000 eb9afaa0 c0217e80 00000000 ffffe000 00000000 c0e06408
3f80: 00000002 c0e0647c c0c6a5f0 00000000 c0e04900 eb483fb0 c0207ea8 c0207e98
3fa0: 60020013 ffffffff
[<c02019f8>] (__irq_svc) from [<c0207e98>] (arch_cpu_idle+0x1c/0x38)
[<c0207e98>] (arch_cpu_idle) from [<c0247ca8>] (do_idle+0x138/0x268)
[<c0247ca8>] (do_idle) from [<c0248050>] (cpu_startup_entry+0x18/0x1c)
[<c0248050>] (cpu_startup_entry) from [<402022ec>] (0x402022ec)

Fixes: 5fd752b6b3a2 ("leds: core: Introduce LED pattern trigger")
Signed-off-by: Baolin Wang <baolin.wang@linaro.org>
Reported-by: Geert Uytterhoeven <geert+renesas@glider.be>
Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
---
 drivers/leds/trigger/ledtrig-pattern.c | 20 ++++----------------
 1 file changed, 4 insertions(+), 16 deletions(-)

diff --git a/drivers/leds/trigger/ledtrig-pattern.c b/drivers/leds/trigger/ledtrig-pattern.c
index 174a298f1be02..1870cf87afe1e 100644
--- a/drivers/leds/trigger/ledtrig-pattern.c
+++ b/drivers/leds/trigger/ledtrig-pattern.c
@@ -75,8 +75,6 @@ static void pattern_trig_timer_function(struct timer_list *t)
 {
 	struct pattern_trig_data *data = from_timer(data, t, timer);
 
-	mutex_lock(&data->lock);
-
 	for (;;) {
 		if (!data->is_indefinite && !data->repeat)
 			break;
@@ -117,8 +115,6 @@ static void pattern_trig_timer_function(struct timer_list *t)
 
 		break;
 	}
-
-	mutex_unlock(&data->lock);
 }
 
 static int pattern_trig_start_pattern(struct led_classdev *led_cdev)
@@ -177,14 +173,10 @@ static ssize_t repeat_store(struct device *dev, struct device_attribute *attr,
 	if (res < -1 || res == 0)
 		return -EINVAL;
 
-	/*
-	 * Clear previous patterns' performence firstly, and remove the timer
-	 * without mutex lock to avoid dead lock.
-	 */
-	del_timer_sync(&data->timer);
-
 	mutex_lock(&data->lock);
 
+	del_timer_sync(&data->timer);
+
 	if (data->is_hw_pattern)
 		led_cdev->pattern_clear(led_cdev);
 
@@ -235,14 +227,10 @@ static ssize_t pattern_trig_store_patterns(struct led_classdev *led_cdev,
 	struct pattern_trig_data *data = led_cdev->trigger_data;
 	int ccount, cr, offset = 0, err = 0;
 
-	/*
-	 * Clear previous patterns' performence firstly, and remove the timer
-	 * without mutex lock to avoid dead lock.
-	 */
-	del_timer_sync(&data->timer);
-
 	mutex_lock(&data->lock);
 
+	del_timer_sync(&data->timer);
+
 	if (data->is_hw_pattern)
 		led_cdev->pattern_clear(led_cdev);
 
-- 
GitLab


From 406e7f986b2e4499295351bcfff5c93b0d34022a Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 7 Nov 2018 14:45:24 +0100
Subject: [PATCH 0785/1890] Documentation: ABI: led-trigger-pattern: Fix typos

  - Spelling s/brigntess/brightness/,
  - Double "use".

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
---
 Documentation/ABI/testing/sysfs-class-led-trigger-pattern | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-class-led-trigger-pattern b/Documentation/ABI/testing/sysfs-class-led-trigger-pattern
index fb3d1e03b8819..1e5d172e06462 100644
--- a/Documentation/ABI/testing/sysfs-class-led-trigger-pattern
+++ b/Documentation/ABI/testing/sysfs-class-led-trigger-pattern
@@ -37,8 +37,8 @@ Description:
 		  0-|   /             \/             \/
 		    +---0----1----2----3----4----5----6------------> time (s)
 
-		2. To make the LED go instantly from one brigntess value to another,
-		we should use use zero-time lengths (the brightness must be same as
+		2. To make the LED go instantly from one brightness value to another,
+		we should use zero-time lengths (the brightness must be same as
 		the previous tuple's). So the format should be:
 		"brightness_1 duration_1 brightness_1 0 brightness_2 duration_2
 		brightness_2 0 ...". For example:
-- 
GitLab


From cf3d02a185edf449d30465dae8d22a4979545829 Mon Sep 17 00:00:00 2001
From: Sean Paul <seanpaul@chromium.org>
Date: Wed, 7 Nov 2018 15:55:33 -0500
Subject: [PATCH 0786/1890] drm: Fix htmldocs warnings in drm_fourcc.c

Add a description for dev and remove the excess one for native. Fixes
the following warnings:

../drivers/gpu/drm/drm_fourcc.c:112: warning: Function parameter or member 'dev' not described in 'drm_driver_legacy_fb_format'
../drivers/gpu/drm/drm_fourcc.c:112: warning: Excess function parameter 'native' description in 'drm_driver_legacy_fb_format'

Fixes: 059b5eb5d955 ("drm: move native byte order quirk to new drm_driver_legacy_fb_format function")
Cc: Gerd Hoffmann <kraxel@redhat.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Sean Paul <seanpaul@chromium.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20181107205546.216088-1-sean@poorly.run
---
 drivers/gpu/drm/drm_fourcc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_fourcc.c b/drivers/gpu/drm/drm_fourcc.c
index 90a1c846fc25a..8aaa5e86a979c 100644
--- a/drivers/gpu/drm/drm_fourcc.c
+++ b/drivers/gpu/drm/drm_fourcc.c
@@ -97,9 +97,9 @@ EXPORT_SYMBOL(drm_mode_legacy_fb_format);
 
 /**
  * drm_driver_legacy_fb_format - compute drm fourcc code from legacy description
+ * @dev: DRM device
  * @bpp: bits per pixels
  * @depth: bit depth per pixel
- * @native: use host native byte order
  *
  * Computes a drm fourcc pixel format code for the given @bpp/@depth values.
  * Unlike drm_mode_legacy_fb_format() this looks at the drivers mode_config,
-- 
GitLab


From 6961cd4d0fde97f1cdb798ba21cf124ecfa0bf95 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 7 Nov 2018 14:34:05 -0700
Subject: [PATCH 0787/1890] ubd: fix missing lock around request issue

We need to hold the device lock (and disable interrupts) while
writing new commands, or we could be interrupted while that
is happening and read invalid requests in the completion path.

Fixes: 4e6da0fe8058 ("um: Convert ubd driver to blk-mq")
Tested-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 arch/um/drivers/ubd_kern.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 74c002ddc0ce7..08831f5d83db0 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -1341,11 +1341,14 @@ static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req,
 static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
 				 const struct blk_mq_queue_data *bd)
 {
+	struct ubd *ubd_dev = hctx->queue->queuedata;
 	struct request *req = bd->rq;
 	int ret = 0;
 
 	blk_mq_start_request(req);
 
+	spin_lock_irq(&ubd_dev->lock);
+
 	if (req_op(req) == REQ_OP_FLUSH) {
 		ret = ubd_queue_one_vec(hctx, req, 0, NULL);
 	} else {
@@ -1361,9 +1364,11 @@ static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
 		}
 	}
 out:
-	if (ret < 0) {
+	spin_unlock_irq(&ubd_dev->lock);
+
+	if (ret < 0)
 		blk_mq_requeue_request(req, true);
-	}
+
 	return BLK_STS_OK;
 }
 
-- 
GitLab


From e31d36b0a453d581fb077fce6883811c5e14874d Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 7 Nov 2018 10:47:57 +0100
Subject: [PATCH 0788/1890] MAINTAINERS: Fix remaining pointers to obsolete
 libata.git

libata.git no longer exists.  Replace the remaining pointers to it by
pointers to the block tree, which is where all libata development
happens now.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 MAINTAINERS | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 2b928aa857ce4..262ece39cc471 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8343,7 +8343,7 @@ F:	drivers/media/dvb-frontends/lgdt3305.*
 LIBATA PATA ARASAN COMPACT FLASH CONTROLLER
 M:	Viresh Kumar <vireshk@kernel.org>
 L:	linux-ide@vger.kernel.org
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git
 S:	Maintained
 F:	include/linux/pata_arasan_cf_data.h
 F:	drivers/ata/pata_arasan_cf.c
@@ -8360,7 +8360,7 @@ F:	drivers/ata/ata_generic.c
 LIBATA PATA FARADAY FTIDE010 AND GEMINI SATA BRIDGE DRIVERS
 M:	Linus Walleij <linus.walleij@linaro.org>
 L:	linux-ide@vger.kernel.org
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git
 S:	Maintained
 F:	drivers/ata/pata_ftide010.c
 F:	drivers/ata/sata_gemini.c
@@ -8379,7 +8379,7 @@ F:	include/linux/ahci_platform.h
 LIBATA SATA PROMISE TX2/TX4 CONTROLLER DRIVER
 M:	Mikael Pettersson <mikpelinux@gmail.com>
 L:	linux-ide@vger.kernel.org
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git
 S:	Maintained
 F:	drivers/ata/sata_promise.*
 
-- 
GitLab


From f3587d76da05f68098ddb1cb3c98cc6a9e8a402c Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Wed, 7 Nov 2018 07:37:45 -0700
Subject: [PATCH 0789/1890] block: Clear kernel memory before copying to user

If the kernel allocates a bounce buffer for user read data, this memory
needs to be cleared before copying it to the user, otherwise it may leak
kernel memory to user space.

Laurence Oberman <loberman@redhat.com>
Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/block/bio.c b/block/bio.c
index 4a5a036268fb8..9a9c590675213 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1260,6 +1260,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
 		if (ret)
 			goto cleanup;
 	} else {
+		zero_fill_bio(bio);
 		iov_iter_advance(iter, bio->bi_iter.bi_size);
 	}
 
-- 
GitLab


From 3c7eda0b65ad279dd2f26908d2f4bd088da93996 Mon Sep 17 00:00:00 2001
From: Evan Quan <evan.quan@amd.com>
Date: Wed, 24 Oct 2018 12:11:35 +0800
Subject: [PATCH 0790/1890] drm/amd/powerplay: set a default fclk/gfxclk ratio

Otherwise big gap between these two clocks may causes
some hangs.

Signed-off-by: Evan Quan <evan.quan@amd.com>
Reviewed-by: Feifei Xu <Feifei.Xu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c   | 16 ++++++++++++++++
 .../gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.h   |  1 +
 drivers/gpu/drm/amd/powerplay/inc/vega20_ppsmc.h |  3 ++-
 3 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
index 57143d51e3eed..8bf76108cf393 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
@@ -120,6 +120,7 @@ static void vega20_set_default_registry_data(struct pp_hwmgr *hwmgr)
 	data->registry_data.disable_auto_wattman = 1;
 	data->registry_data.auto_wattman_debug = 0;
 	data->registry_data.auto_wattman_sample_period = 100;
+	data->registry_data.fclk_gfxclk_ratio = 0x3F6CCCCD;
 	data->registry_data.auto_wattman_threshold = 50;
 	data->registry_data.gfxoff_controlled_by_driver = 1;
 	data->gfxoff_allowed = false;
@@ -829,6 +830,16 @@ static int vega20_enable_all_smu_features(struct pp_hwmgr *hwmgr)
 	return 0;
 }
 
+static int vega20_send_clock_ratio(struct pp_hwmgr *hwmgr)
+{
+	struct vega20_hwmgr *data =
+			(struct vega20_hwmgr *)(hwmgr->backend);
+
+	return smum_send_msg_to_smc_with_parameter(hwmgr,
+			PPSMC_MSG_SetFclkGfxClkRatio,
+			data->registry_data.fclk_gfxclk_ratio);
+}
+
 static int vega20_disable_all_smu_features(struct pp_hwmgr *hwmgr)
 {
 	struct vega20_hwmgr *data =
@@ -1532,6 +1543,11 @@ static int vega20_enable_dpm_tasks(struct pp_hwmgr *hwmgr)
 			"[EnableDPMTasks] Failed to enable all smu features!",
 			return result);
 
+	result = vega20_send_clock_ratio(hwmgr);
+	PP_ASSERT_WITH_CODE(!result,
+			"[EnableDPMTasks] Failed to send clock ratio!",
+			return result);
+
 	/* Initialize UVD/VCE powergating state */
 	vega20_init_powergate_state(hwmgr);
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.h
index 56fe6a0d42e80..25faaa5c5b10c 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.h
@@ -328,6 +328,7 @@ struct vega20_registry_data {
 	uint8_t   disable_auto_wattman;
 	uint32_t  auto_wattman_debug;
 	uint32_t  auto_wattman_sample_period;
+	uint32_t  fclk_gfxclk_ratio;
 	uint8_t   auto_wattman_threshold;
 	uint8_t   log_avfs_param;
 	uint8_t   enable_enginess;
diff --git a/drivers/gpu/drm/amd/powerplay/inc/vega20_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/vega20_ppsmc.h
index 45d64a81e9453..4f63a736ea0e7 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/vega20_ppsmc.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/vega20_ppsmc.h
@@ -105,7 +105,8 @@
 #define PPSMC_MSG_SetSystemVirtualDramAddrHigh   0x4B
 #define PPSMC_MSG_SetSystemVirtualDramAddrLow    0x4C
 #define PPSMC_MSG_WaflTest                       0x4D
-// Unused ID 0x4E to 0x50
+#define PPSMC_MSG_SetFclkGfxClkRatio             0x4E
+// Unused ID 0x4F to 0x50
 #define PPSMC_MSG_AllowGfxOff                    0x51
 #define PPSMC_MSG_DisallowGfxOff                 0x52
 #define PPSMC_MSG_GetPptLimit                    0x53
-- 
GitLab


From 108110a3ffa3483d743f3bff93917ba64dc8edd0 Mon Sep 17 00:00:00 2001
From: Evan Quan <evan.quan@amd.com>
Date: Wed, 7 Nov 2018 09:16:07 +0800
Subject: [PATCH 0791/1890] drm/amd/powerplay: always use fast UCLK switching
 when UCLK DPM enabled

With UCLK DPM enabled, slow switching is not supported any more.

Signed-off-by: Evan Quan <evan.quan@amd.com>
Reviewed-by: Feifei Xu <Feifei.Xu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../drm/amd/powerplay/hwmgr/vega20_hwmgr.c    | 37 +++++++++----------
 1 file changed, 17 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
index 8bf76108cf393..99861f32b1f95 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
@@ -830,6 +830,18 @@ static int vega20_enable_all_smu_features(struct pp_hwmgr *hwmgr)
 	return 0;
 }
 
+static int vega20_notify_smc_display_change(struct pp_hwmgr *hwmgr)
+{
+	struct vega20_hwmgr *data = (struct vega20_hwmgr *)(hwmgr->backend);
+
+	if (data->smu_features[GNLD_DPM_UCLK].enabled)
+		return smum_send_msg_to_smc_with_parameter(hwmgr,
+			PPSMC_MSG_SetUclkFastSwitch,
+			1);
+
+	return 0;
+}
+
 static int vega20_send_clock_ratio(struct pp_hwmgr *hwmgr)
 {
 	struct vega20_hwmgr *data =
@@ -1543,6 +1555,11 @@ static int vega20_enable_dpm_tasks(struct pp_hwmgr *hwmgr)
 			"[EnableDPMTasks] Failed to enable all smu features!",
 			return result);
 
+	result = vega20_notify_smc_display_change(hwmgr);
+	PP_ASSERT_WITH_CODE(!result,
+			"[EnableDPMTasks] Failed to notify smc display change!",
+			return result);
+
 	result = vega20_send_clock_ratio(hwmgr);
 	PP_ASSERT_WITH_CODE(!result,
 			"[EnableDPMTasks] Failed to send clock ratio!",
@@ -1988,19 +2005,6 @@ static int vega20_read_sensor(struct pp_hwmgr *hwmgr, int idx,
 	return ret;
 }
 
-static int vega20_notify_smc_display_change(struct pp_hwmgr *hwmgr,
-		bool has_disp)
-{
-	struct vega20_hwmgr *data = (struct vega20_hwmgr *)(hwmgr->backend);
-
-	if (data->smu_features[GNLD_DPM_UCLK].enabled)
-		return smum_send_msg_to_smc_with_parameter(hwmgr,
-			PPSMC_MSG_SetUclkFastSwitch,
-			has_disp ? 1 : 0);
-
-	return 0;
-}
-
 int vega20_display_clock_voltage_request(struct pp_hwmgr *hwmgr,
 		struct pp_display_clock_request *clock_req)
 {
@@ -2060,13 +2064,6 @@ static int vega20_notify_smc_display_config_after_ps_adjustment(
 	struct pp_display_clock_request clock_req;
 	int ret = 0;
 
-	if ((hwmgr->display_config->num_display > 1) &&
-	     !hwmgr->display_config->multi_monitor_in_sync &&
-	     !hwmgr->display_config->nb_pstate_switch_disable)
-		vega20_notify_smc_display_change(hwmgr, false);
-	else
-		vega20_notify_smc_display_change(hwmgr, true);
-
 	min_clocks.dcefClock = hwmgr->display_config->min_dcef_set_clk;
 	min_clocks.dcefClockInSR = hwmgr->display_config->min_dcef_deep_sleep_set_clk;
 	min_clocks.memoryClock = hwmgr->display_config->min_mem_set_clock;
-- 
GitLab


From 8be17ac95f8451b51f636622d8d4b8674334f468 Mon Sep 17 00:00:00 2001
From: "Jerry (Fangzhi) Zuo" <Jerry.Zuo@amd.com>
Date: Tue, 30 Oct 2018 14:28:49 -0400
Subject: [PATCH 0792/1890] drm/amd/display: Cleanup MST non-atomic code
 workaround

[why]
It is not correct to touch aconnector within atomic_check.

[How]
It was added as workaround before, and no longer needed.

Signed-off-by: Jerry (Fangzhi) Zuo <Jerry.Zuo@amd.com>
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Reviewed-by: Lyude Paul <lyude@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 15 +++-----
 .../display/amdgpu_dm/amdgpu_dm_mst_types.c   | 34 -------------------
 .../display/amdgpu_dm/amdgpu_dm_mst_types.h   |  1 -
 3 files changed, 4 insertions(+), 46 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index d3f5cb1795bf8..c1262f62cd9f2 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -2703,18 +2703,11 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 	drm_connector = &aconnector->base;
 
 	if (!aconnector->dc_sink) {
-		/*
-		 * Create dc_sink when necessary to MST
-		 * Don't apply fake_sink to MST
-		 */
-		if (aconnector->mst_port) {
-			dm_dp_mst_dc_sink_create(drm_connector);
-			return stream;
+		if (!aconnector->mst_port) {
+			sink = create_fake_sink(aconnector);
+			if (!sink)
+				return stream;
 		}
-
-		sink = create_fake_sink(aconnector);
-		if (!sink)
-			return stream;
 	} else {
 		sink = aconnector->dc_sink;
 	}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index 03601d717fed9..03e98967d6c2f 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -205,40 +205,6 @@ static const struct drm_connector_funcs dm_dp_mst_connector_funcs = {
 	.atomic_get_property = amdgpu_dm_connector_atomic_get_property
 };
 
-void dm_dp_mst_dc_sink_create(struct drm_connector *connector)
-{
-	struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
-	struct dc_sink *dc_sink;
-	struct dc_sink_init_data init_params = {
-			.link = aconnector->dc_link,
-			.sink_signal = SIGNAL_TYPE_DISPLAY_PORT_MST };
-
-	/* FIXME none of this is safe. we shouldn't touch aconnector here in
-	 * atomic_check
-	 */
-
-	/*
-	 * TODO: Need to further figure out why ddc.algo is NULL while MST port exists
-	 */
-	if (!aconnector->port || !aconnector->port->aux.ddc.algo)
-		return;
-
-	ASSERT(aconnector->edid);
-
-	dc_sink = dc_link_add_remote_sink(
-		aconnector->dc_link,
-		(uint8_t *)aconnector->edid,
-		(aconnector->edid->extensions + 1) * EDID_LENGTH,
-		&init_params);
-
-	dc_sink->priv = aconnector;
-	aconnector->dc_sink = dc_sink;
-
-	if (aconnector->dc_sink)
-		amdgpu_dm_update_freesync_caps(
-				connector, aconnector->edid);
-}
-
 static int dm_dp_mst_get_modes(struct drm_connector *connector)
 {
 	struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
index 8cf51da26657e..2da851b40042a 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
@@ -31,6 +31,5 @@ struct amdgpu_dm_connector;
 
 void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm,
 				       struct amdgpu_dm_connector *aconnector);
-void dm_dp_mst_dc_sink_create(struct drm_connector *connector);
 
 #endif
-- 
GitLab


From 0e6613e46fed29316f33acf86e1d1568288638b5 Mon Sep 17 00:00:00 2001
From: "Jerry (Fangzhi) Zuo" <Jerry.Zuo@amd.com>
Date: Tue, 30 Oct 2018 14:37:16 -0400
Subject: [PATCH 0793/1890] drm/amd/display: Drop reusing drm connector for MST

[why]
It is not safe to keep existing connector while entire topology
has been removed. Could lead potential impact to uapi.
Entirely unregister all the connectors on the topology,
and use a new set of connectors when the topology is plugged back
on.

[How]
Remove the drm connector entirely each time when the
corresponding MST topology is gone.
When hotunplug a connector (e.g., DP2)
1. Remove connector from userspace.
2. Drop it's reference.
When hotplug back on:
1. Detect new topology, and create new connectors.
2. Notify userspace with sysfs hotplug event.
3. Reprobe new connectors, and reassign CRTC from old (e.g., DP2)
to new (e.g., DP3) connector.

Signed-off-by: Jerry (Fangzhi) Zuo <Jerry.Zuo@amd.com>
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Reviewed-by: Lyude Paul <lyude@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  2 -
 .../display/amdgpu_dm/amdgpu_dm_mst_types.c   | 40 ++++---------------
 2 files changed, 7 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 978b34a5011ce..924a38a1fc446 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -160,8 +160,6 @@ struct amdgpu_dm_connector {
 	struct mutex hpd_lock;
 
 	bool fake_enable;
-
-	bool mst_connected;
 };
 
 #define to_amdgpu_dm_connector(x) container_of(x, struct amdgpu_dm_connector, base)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index 03e98967d6c2f..31126eb23a740 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -320,25 +320,6 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
 	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_dm_connector *aconnector;
 	struct drm_connector *connector;
-	struct drm_connector_list_iter conn_iter;
-
-	drm_connector_list_iter_begin(dev, &conn_iter);
-	drm_for_each_connector_iter(connector, &conn_iter) {
-		aconnector = to_amdgpu_dm_connector(connector);
-		if (aconnector->mst_port == master
-				&& !aconnector->port) {
-			DRM_INFO("DM_MST: reusing connector: %p [id: %d] [master: %p]\n",
-						aconnector, connector->base.id, aconnector->mst_port);
-
-			aconnector->port = port;
-			drm_connector_set_path_property(connector, pathprop);
-
-			drm_connector_list_iter_end(&conn_iter);
-			aconnector->mst_connected = true;
-			return &aconnector->base;
-		}
-	}
-	drm_connector_list_iter_end(&conn_iter);
 
 	aconnector = kzalloc(sizeof(*aconnector), GFP_KERNEL);
 	if (!aconnector)
@@ -387,8 +368,6 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
 	 */
 	amdgpu_dm_connector_funcs_reset(connector);
 
-	aconnector->mst_connected = true;
-
 	DRM_INFO("DM_MST: added connector: %p [id: %d] [master: %p]\n",
 			aconnector, connector->base.id, aconnector->mst_port);
 
@@ -400,6 +379,9 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
 static void dm_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
 					struct drm_connector *connector)
 {
+	struct amdgpu_dm_connector *master = container_of(mgr, struct amdgpu_dm_connector, mst_mgr);
+	struct drm_device *dev = master->base.dev;
+	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
 
 	DRM_INFO("DM_MST: Disabling connector: %p [id: %d] [master: %p]\n",
@@ -413,7 +395,10 @@ static void dm_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
 		aconnector->dc_sink = NULL;
 	}
 
-	aconnector->mst_connected = false;
+	drm_connector_unregister(connector);
+	if (adev->mode_info.rfbdev)
+		drm_fb_helper_remove_one_connector(&adev->mode_info.rfbdev->helper, connector);
+	drm_connector_put(connector);
 }
 
 static void dm_dp_mst_hotplug(struct drm_dp_mst_topology_mgr *mgr)
@@ -424,18 +409,10 @@ static void dm_dp_mst_hotplug(struct drm_dp_mst_topology_mgr *mgr)
 	drm_kms_helper_hotplug_event(dev);
 }
 
-static void dm_dp_mst_link_status_reset(struct drm_connector *connector)
-{
-	mutex_lock(&connector->dev->mode_config.mutex);
-	drm_connector_set_link_status_property(connector, DRM_MODE_LINK_STATUS_BAD);
-	mutex_unlock(&connector->dev->mode_config.mutex);
-}
-
 static void dm_dp_mst_register_connector(struct drm_connector *connector)
 {
 	struct drm_device *dev = connector->dev;
 	struct amdgpu_device *adev = dev->dev_private;
-	struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
 
 	if (adev->mode_info.rfbdev)
 		drm_fb_helper_add_one_connector(&adev->mode_info.rfbdev->helper, connector);
@@ -443,9 +420,6 @@ static void dm_dp_mst_register_connector(struct drm_connector *connector)
 		DRM_ERROR("adev->mode_info.rfbdev is NULL\n");
 
 	drm_connector_register(connector);
-
-	if (aconnector->mst_connected)
-		dm_dp_mst_link_status_reset(connector);
 }
 
 static const struct drm_dp_mst_topology_cbs dm_mst_cbs = {
-- 
GitLab


From 63237f8748bdf46dccf79ef8f98f05e9fe799162 Mon Sep 17 00:00:00 2001
From: Lyude Paul <lyude@redhat.com>
Date: Thu, 1 Nov 2018 21:51:49 -0400
Subject: [PATCH 0794/1890] drm/amd/amdgpu/dm: Fix
 dm_dp_create_fake_mst_encoder()

[why]
Removing connector reusage from DM to match the rest of the tree ended
up revealing an issue that was surprisingly subtle. The original amdgpu
code for DC that was submitted appears to have left a chunk in
dm_dp_create_fake_mst_encoder() that tries to find a "master encoder",
the likes of which isn't actually used or stored anywhere. It does so at
the wrong time as well by trying to access parts of the drm_connector
from the encoder init before it's actually been initialized. This
results in a NULL pointer deref on MST hotplugs:

[  160.696613] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
[  160.697234] PGD 0 P4D 0
[  160.697814] Oops: 0010 [#1] SMP PTI
[  160.698430] CPU: 2 PID: 64 Comm: kworker/2:1 Kdump: loaded Tainted: G           O      4.19.0Lyude-Test+ #2
[  160.699020] Hardware name: HP HP ZBook 15 G4/8275, BIOS P70 Ver. 01.22 05/17/2018
[  160.699672] Workqueue: events_long drm_dp_mst_link_probe_work [drm_kms_helper]
[  160.700322] RIP: 0010:          (null)
[  160.700920] Code: Bad RIP value.
[  160.701541] RSP: 0018:ffffc9000029fc78 EFLAGS: 00010206
[  160.702183] RAX: 0000000000000000 RBX: ffff8804440ed468 RCX: ffff8804440e9158
[  160.702778] RDX: 0000000000000000 RSI: ffff8804556c5700 RDI: ffff8804440ed000
[  160.703408] RBP: ffff880458e21800 R08: 0000000000000002 R09: 000000005fca0a25
[  160.704002] R10: ffff88045a077a3d R11: ffff88045a077a3c R12: ffff8804440ed000
[  160.704614] R13: ffff880458e21800 R14: ffff8804440e9000 R15: ffff8804440e9000
[  160.705260] FS:  0000000000000000(0000) GS:ffff88045f280000(0000) knlGS:0000000000000000
[  160.705854] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  160.706478] CR2: ffffffffffffffd6 CR3: 000000000200a001 CR4: 00000000003606e0
[  160.707124] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  160.707724] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[  160.708372] Call Trace:
[  160.708998]  ? dm_dp_add_mst_connector+0xed/0x1d0 [amdgpu]
[  160.709625]  ? drm_dp_add_port+0x2fa/0x470 [drm_kms_helper]
[  160.710284]  ? wake_up_q+0x54/0x70
[  160.710877]  ? __mutex_unlock_slowpath.isra.18+0xb3/0x110
[  160.711512]  ? drm_dp_dpcd_access+0xe7/0x110 [drm_kms_helper]
[  160.712161]  ? drm_dp_send_link_address+0x155/0x1e0 [drm_kms_helper]
[  160.712762]  ? drm_dp_check_and_send_link_address+0xa3/0xd0 [drm_kms_helper]
[  160.713408]  ? drm_dp_mst_link_probe_work+0x4b/0x80 [drm_kms_helper]
[  160.714013]  ? process_one_work+0x1a1/0x3a0
[  160.714667]  ? worker_thread+0x30/0x380
[  160.715326]  ? wq_update_unbound_numa+0x10/0x10
[  160.715939]  ? kthread+0x112/0x130
[  160.716591]  ? kthread_create_worker_on_cpu+0x70/0x70
[  160.717262]  ? ret_from_fork+0x35/0x40
[  160.717886] Modules linked in: amdgpu(O) vfat fat snd_hda_codec_generic joydev i915 chash gpu_sched ttm i2c_algo_bit drm_kms_helper snd_hda_codec_hdmi hp_wmi syscopyarea iTCO_wdt sysfillrect sparse_keymap sysimgblt fb_sys_fops snd_hda_intel usbhid wmi_bmof drm snd_hda_codec btusb snd_hda_core intel_rapl btrtl x86_pkg_temp_thermal btbcm btintel coretemp snd_pcm crc32_pclmul bluetooth psmouse snd_timer snd pcspkr i2c_i801 mei_me i2c_core soundcore mei tpm_tis wmi tpm_tis_core hp_accel ecdh_generic lis3lv02d tpm video rfkill acpi_pad input_polldev hp_wireless pcc_cpufreq crc32c_intel serio_raw tg3 xhci_pci xhci_hcd [last unloaded: amdgpu]
[  160.720141] CR2: 0000000000000000

Somehow the connector reusage DM was using for MST connectors managed to
paper over this issue entirely; hence why this was never caught until
now.

[how]
Since this code isn't used anywhere and seems useless anyway, we can
just drop it entirely. This appears to fix the issue on my HP ZBook with
an AMD WX4150.

Signed-off-by: Lyude Paul <lyude@redhat.com>
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index 31126eb23a740..d02c32a1039c0 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -285,12 +285,7 @@ dm_dp_create_fake_mst_encoder(struct amdgpu_dm_connector *connector)
 	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_encoder *amdgpu_encoder;
 	struct drm_encoder *encoder;
-	const struct drm_connector_helper_funcs *connector_funcs =
-		connector->base.helper_private;
-	struct drm_encoder *enc_master =
-		connector_funcs->best_encoder(&connector->base);
 
-	DRM_DEBUG_KMS("enc master is %p\n", enc_master);
 	amdgpu_encoder = kzalloc(sizeof(*amdgpu_encoder), GFP_KERNEL);
 	if (!amdgpu_encoder)
 		return NULL;
-- 
GitLab


From de59fae0043f07de5d25e02ca360f7d57bfa5866 Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@virtuozzo.com>
Date: Wed, 7 Nov 2018 22:36:23 -0500
Subject: [PATCH 0795/1890] ext4: fix buffer leak in __ext4_read_dirblock() on
 error path

Fixes: dc6982ff4db1 ("ext4: refactor code to read directory blocks ...")
Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org # 3.9
---
 fs/ext4/namei.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index d388cce72db20..6a6b90363ef1b 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -126,6 +126,7 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
 	if (!is_dx_block && type == INDEX) {
 		ext4_error_inode(inode, func, line, block,
 		       "directory leaf block found instead of index block");
+		brelse(bh);
 		return ERR_PTR(-EFSCORRUPTED);
 	}
 	if (!ext4_has_metadata_csum(inode->i_sb) ||
-- 
GitLab


From d2f007dbe7e4c9583eea6eb04d60001e85c6f1bd Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Mon, 5 Nov 2018 20:55:09 +0100
Subject: [PATCH 0796/1890] userns: also map extents in the reverse map to
 kernel IDs

The current logic first clones the extent array and sorts both copies, then
maps the lower IDs of the forward mapping into the lower namespace, but
doesn't map the lower IDs of the reverse mapping.

This means that code in a nested user namespace with >5 extents will see
incorrect IDs. It also breaks some access checks, like
inode_owner_or_capable() and privileged_wrt_inode_uidgid(), so a process
can incorrectly appear to be capable relative to an inode.

To fix it, we have to make sure that the "lower_first" members of extents
in both arrays are translated; and we have to make sure that the reverse
map is sorted *after* the translation (since otherwise the translation can
break the sorting).

This is CVE-2018-18955.

Fixes: 6397fac4915a ("userns: bump idmap limits to 340")
Cc: stable@vger.kernel.org
Signed-off-by: Jann Horn <jannh@google.com>
Tested-by: Eric W. Biederman <ebiederm@xmission.com>
Reviewed-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 kernel/user_namespace.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index e5222b5fb4fe6..923414a246e9e 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -974,10 +974,6 @@ static ssize_t map_write(struct file *file, const char __user *buf,
 	if (!new_idmap_permitted(file, ns, cap_setid, &new_map))
 		goto out;
 
-	ret = sort_idmaps(&new_map);
-	if (ret < 0)
-		goto out;
-
 	ret = -EPERM;
 	/* Map the lower ids from the parent user namespace to the
 	 * kernel global id space.
@@ -1004,6 +1000,14 @@ static ssize_t map_write(struct file *file, const char __user *buf,
 		e->lower_first = lower_first;
 	}
 
+	/*
+	 * If we want to use binary search for lookup, this clones the extent
+	 * array and sorts both copies.
+	 */
+	ret = sort_idmaps(&new_map);
+	if (ret < 0)
+		goto out;
+
 	/* Install the map */
 	if (new_map.nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS) {
 		memcpy(map->extent, new_map.extent,
-- 
GitLab


From 96ed82cc1f515a2329f93506129ab1de35429b89 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@linux-mips.org>
Date: Wed, 7 Nov 2018 12:06:52 +0000
Subject: [PATCH 0797/1890] FDDI: defza: Fix SPDX annotation

The SPDX annotation for this driver does not match the license text,
which specifies GNU GPL 2 or later.  Make the two match by correcting
the SPDX tag.

Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/fddi/defza.c | 2 +-
 drivers/net/fddi/defza.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/fddi/defza.c b/drivers/net/fddi/defza.c
index 3b7f10a5f06a6..79980eeb0e438 100644
--- a/drivers/net/fddi/defza.c
+++ b/drivers/net/fddi/defza.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0+
 /*	FDDI network adapter driver for DEC FDDIcontroller 700/700-C devices.
  *
  *	Copyright (c) 2018  Maciej W. Rozycki
diff --git a/drivers/net/fddi/defza.h b/drivers/net/fddi/defza.h
index b06acf32738ea..ed659ed172151 100644
--- a/drivers/net/fddi/defza.h
+++ b/drivers/net/fddi/defza.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*	FDDI network adapter driver for DEC FDDIcontroller 700/700-C devices.
  *
  *	Copyright (c) 2018  Maciej W. Rozycki
-- 
GitLab


From 5f5fae37dbcf37feae48c40d2580dbd67c5df131 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@linux-mips.org>
Date: Wed, 7 Nov 2018 12:06:59 +0000
Subject: [PATCH 0798/1890] FDDI: defza: Add missing comment closing

Fix:

drivers/net/fddi/defza.h:238:1: warning: "/*" within comment [-Wcomment]

by adding a missing comment closing.

Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/fddi/defza.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/fddi/defza.h b/drivers/net/fddi/defza.h
index ed659ed172151..93bda61be8e38 100644
--- a/drivers/net/fddi/defza.h
+++ b/drivers/net/fddi/defza.h
@@ -235,6 +235,7 @@ struct fza_ring_cmd {
 #define FZA_RING_CMD		0x200400	/* command ring address */
 #define FZA_RING_CMD_SIZE	0x40		/* command descriptor ring
 						 * size
+						 */
 /* Command constants. */
 #define FZA_RING_CMD_MASK	0x7fffffff
 #define FZA_RING_CMD_NOP	0x00000000	/* nop */
-- 
GitLab


From 04453b6b241913f85af013eec187bab776428af5 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@linux-mips.org>
Date: Wed, 7 Nov 2018 12:07:05 +0000
Subject: [PATCH 0799/1890] FDDI: defza: Move SMT Tx data buffer declaration
 next to its skb

Move the temporary data buffer used when tapping into the SMT Tx queue
from the outer function level into the conditional block it's actually
used in and its containing skb is also declared, making the structure of
code better.

Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/fddi/defza.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/fddi/defza.c b/drivers/net/fddi/defza.c
index 79980eeb0e438..bebd0d6d2320a 100644
--- a/drivers/net/fddi/defza.c
+++ b/drivers/net/fddi/defza.c
@@ -784,7 +784,7 @@ static void fza_rx(struct net_device *dev)
 static void fza_tx_smt(struct net_device *dev)
 {
 	struct fza_private *fp = netdev_priv(dev);
-	struct fza_buffer_tx __iomem *smt_tx_ptr, *skb_data_ptr;
+	struct fza_buffer_tx __iomem *smt_tx_ptr;
 	int i, len;
 	u32 own;
 
@@ -799,6 +799,7 @@ static void fza_tx_smt(struct net_device *dev)
 
 		if (!netif_queue_stopped(dev)) {
 			if (dev_nit_active(dev)) {
+				struct fza_buffer_tx *skb_data_ptr;
 				struct sk_buff *skb;
 
 				/* Length must be a multiple of 4 as only word
-- 
GitLab


From 8f5365ebf7b17c35dddbb694b4f0ffd1293a947f Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@linux-mips.org>
Date: Wed, 7 Nov 2018 12:07:10 +0000
Subject: [PATCH 0800/1890] FDDI: defza: Make the driver version string
 constant

The driver version string is obviously not meant to be changed at run
time, so mark it `const'.

Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/fddi/defza.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/fddi/defza.c b/drivers/net/fddi/defza.c
index bebd0d6d2320a..c5cae8e74dc40 100644
--- a/drivers/net/fddi/defza.c
+++ b/drivers/net/fddi/defza.c
@@ -56,7 +56,7 @@
 #define DRV_VERSION "v.1.1.4"
 #define DRV_RELDATE "Oct  6 2018"
 
-static char version[] =
+static const char version[] =
 	DRV_NAME ": " DRV_VERSION "  " DRV_RELDATE "  Maciej W. Rozycki\n";
 
 MODULE_AUTHOR("Maciej W. Rozycki <macro@linux-mips.org>");
-- 
GitLab


From 25d202ed820ee347edec0bf3bf553544556bf64b Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 22 Oct 2018 10:21:38 -0500
Subject: [PATCH 0801/1890] mount: Retest MNT_LOCKED in do_umount

It was recently pointed out that the one instance of testing MNT_LOCKED
outside of the namespace_sem is in ksys_umount.

Fix that by adding a test inside of do_umount with namespace_sem and
the mount_lock held.  As it helps to fail fails the existing test is
maintained with an additional comment pointing out that it may be racy
because the locks are not held.

Cc: stable@vger.kernel.org
Reported-by: Al Viro <viro@ZenIV.linux.org.uk>
Fixes: 5ff9d8a65ce8 ("vfs: Lock in place mounts from more privileged users")
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/namespace.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index 98d27da433047..72f10c40fe3f6 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1540,8 +1540,13 @@ static int do_umount(struct mount *mnt, int flags)
 
 	namespace_lock();
 	lock_mount_hash();
-	event++;
 
+	/* Recheck MNT_LOCKED with the locks held */
+	retval = -EINVAL;
+	if (mnt->mnt.mnt_flags & MNT_LOCKED)
+		goto out;
+
+	event++;
 	if (flags & MNT_DETACH) {
 		if (!list_empty(&mnt->mnt_list))
 			umount_tree(mnt, UMOUNT_PROPAGATE);
@@ -1555,6 +1560,7 @@ static int do_umount(struct mount *mnt, int flags)
 			retval = 0;
 		}
 	}
+out:
 	unlock_mount_hash();
 	namespace_unlock();
 	return retval;
@@ -1645,7 +1651,7 @@ int ksys_umount(char __user *name, int flags)
 		goto dput_and_out;
 	if (!check_mnt(mnt))
 		goto dput_and_out;
-	if (mnt->mnt.mnt_flags & MNT_LOCKED)
+	if (mnt->mnt.mnt_flags & MNT_LOCKED) /* Check optimistically */
 		goto dput_and_out;
 	retval = -EPERM;
 	if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
-- 
GitLab


From df7342b240185d58d3d9665c0bbf0a0f5570ec29 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 25 Oct 2018 09:04:18 -0500
Subject: [PATCH 0802/1890] mount: Don't allow copying
 MNT_UNBINDABLE|MNT_LOCKED mounts

Jonathan Calmels from NVIDIA reported that he's able to bypass the
mount visibility security check in place in the Linux kernel by using
a combination of the unbindable property along with the private mount
propagation option to allow a unprivileged user to see a path which
was purposefully hidden by the root user.

Reproducer:
  # Hide a path to all users using a tmpfs
  root@castiana:~# mount -t tmpfs tmpfs /sys/devices/
  root@castiana:~#

  # As an unprivileged user, unshare user namespace and mount namespace
  stgraber@castiana:~$ unshare -U -m -r

  # Confirm the path is still not accessible
  root@castiana:~# ls /sys/devices/

  # Make /sys recursively unbindable and private
  root@castiana:~# mount --make-runbindable /sys
  root@castiana:~# mount --make-private /sys

  # Recursively bind-mount the rest of /sys over to /mnnt
  root@castiana:~# mount --rbind /sys/ /mnt

  # Access our hidden /sys/device as an unprivileged user
  root@castiana:~# ls /mnt/devices/
  breakpoint cpu cstate_core cstate_pkg i915 intel_pt isa kprobe
  LNXSYSTM:00 msr pci0000:00 platform pnp0 power software system
  tracepoint uncore_arb uncore_cbox_0 uncore_cbox_1 uprobe virtual

Solve this by teaching copy_tree to fail if a mount turns out to be
both unbindable and locked.

Cc: stable@vger.kernel.org
Fixes: 5ff9d8a65ce8 ("vfs: Lock in place mounts from more privileged users")
Reported-by: Jonathan Calmels <jcalmels@nvidia.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/namespace.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index 72f10c40fe3f6..e0e0f9cf6c30d 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1734,8 +1734,14 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
 		for (s = r; s; s = next_mnt(s, r)) {
 			if (!(flag & CL_COPY_UNBINDABLE) &&
 			    IS_MNT_UNBINDABLE(s)) {
-				s = skip_mnt_tree(s);
-				continue;
+				if (s->mnt.mnt_flags & MNT_LOCKED) {
+					/* Both unbindable and locked. */
+					q = ERR_PTR(-EPERM);
+					goto out;
+				} else {
+					s = skip_mnt_tree(s);
+					continue;
+				}
 			}
 			if (!(flag & CL_COPY_MNT_NS_FILE) &&
 			    is_mnt_ns_file(s->mnt.mnt_root)) {
-- 
GitLab


From e84b47941e15e6666afb8ee8b21d1c3fc1a013af Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= <mirq-linux@rere.qmqm.pl>
Date: Wed, 7 Nov 2018 17:50:52 +0100
Subject: [PATCH 0803/1890] ibmvnic: fix accelerated VLAN handling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Don't request tag insertion when it isn't present in outgoing skb.

Signed-off-by: MichaÅ‚ MirosÅ‚aw <mirq-linux@rere.qmqm.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 7893beffcc714..c9d5d0a7fbf17 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1545,7 +1545,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 	tx_crq.v1.sge_len = cpu_to_be32(skb->len);
 	tx_crq.v1.ioba = cpu_to_be64(data_dma_addr);
 
-	if (adapter->vlan_header_insertion) {
+	if (adapter->vlan_header_insertion && skb_vlan_tag_present(skb)) {
 		tx_crq.v1.flags2 |= IBMVNIC_TX_VLAN_INSERT;
 		tx_crq.v1.vlan_id = cpu_to_be16(skb->vlan_tci);
 	}
-- 
GitLab


From b25ddb00bc1b96613edcb525f19203a7d1405fce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= <mirq-linux@rere.qmqm.pl>
Date: Wed, 7 Nov 2018 17:50:53 +0100
Subject: [PATCH 0804/1890] qlcnic: remove assumption that vlan_tci != 0
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

VLAN.TCI == 0 is perfectly valid (802.1p), so allow it to be accelerated.

Signed-off-by: MichaÅ‚ MirosÅ‚aw <mirq-linux@rere.qmqm.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
index 9647578cbe6a8..14f26bf3b388b 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
@@ -459,7 +459,7 @@ static int qlcnic_tx_pkt(struct qlcnic_adapter *adapter,
 			 struct cmd_desc_type0 *first_desc, struct sk_buff *skb,
 			 struct qlcnic_host_tx_ring *tx_ring)
 {
-	u8 l4proto, opcode = 0, hdr_len = 0;
+	u8 l4proto, opcode = 0, hdr_len = 0, tag_vlan = 0;
 	u16 flags = 0, vlan_tci = 0;
 	int copied, offset, copy_len, size;
 	struct cmd_desc_type0 *hwdesc;
@@ -472,14 +472,16 @@ static int qlcnic_tx_pkt(struct qlcnic_adapter *adapter,
 		flags = QLCNIC_FLAGS_VLAN_TAGGED;
 		vlan_tci = ntohs(vh->h_vlan_TCI);
 		protocol = ntohs(vh->h_vlan_encapsulated_proto);
+		tag_vlan = 1;
 	} else if (skb_vlan_tag_present(skb)) {
 		flags = QLCNIC_FLAGS_VLAN_OOB;
 		vlan_tci = skb_vlan_tag_get(skb);
+		tag_vlan = 1;
 	}
 	if (unlikely(adapter->tx_pvid)) {
-		if (vlan_tci && !(adapter->flags & QLCNIC_TAGGING_ENABLED))
+		if (tag_vlan && !(adapter->flags & QLCNIC_TAGGING_ENABLED))
 			return -EIO;
-		if (vlan_tci && (adapter->flags & QLCNIC_TAGGING_ENABLED))
+		if (tag_vlan && (adapter->flags & QLCNIC_TAGGING_ENABLED))
 			goto set_flags;
 
 		flags = QLCNIC_FLAGS_VLAN_OOB;
-- 
GitLab


From e6a2d72c10405b30ddba5af2e44a9d3d925a56d3 Mon Sep 17 00:00:00 2001
From: Juri Lelli <juri.lelli@redhat.com>
Date: Wed, 7 Nov 2018 12:10:32 +0100
Subject: [PATCH 0805/1890] posix-cpu-timers: Remove useless call to
 check_dl_overrun()

check_dl_overrun() is used to send a SIGXCPU to users that asked to be
informed when a SCHED_DEADLINE runtime overruns occur.

The function is called by check_thread_timers() already, so the call in
check_process_timers() is redundant/wrong (even though harmless).

Remove it.

Fixes: 34be39305a77 ("sched/deadline: Implement "runtime overrun signal" support")
Signed-off-by: Juri Lelli <juri.lelli@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Daniel Bristot de Oliveira <bristot@redhat.com>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: linux-rt-users@vger.kernel.org
Cc: mtk.manpages@gmail.com
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Luca Abeni <luca.abeni@santannapisa.it>
Cc: Claudio Scordino <claudio@evidence.eu.com>
Link: https://lkml.kernel.org/r/20181107111032.32291-1-juri.lelli@redhat.com
---
 kernel/time/posix-cpu-timers.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index ce32cf741b250..8f0644af40be7 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -917,9 +917,6 @@ static void check_process_timers(struct task_struct *tsk,
 	struct task_cputime cputime;
 	unsigned long soft;
 
-	if (dl_task(tsk))
-		check_dl_overrun(tsk);
-
 	/*
 	 * If cputimer is not running, then there are no active
 	 * process wide timers (POSIX 1.b, itimers, RLIMIT_CPU).
-- 
GitLab


From 0bb2ae1b26e1fb7543ec7474cdd374ac4b88c4da Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.ibm.com>
Date: Mon, 29 Oct 2018 08:11:33 +0000
Subject: [PATCH 0806/1890] s390/perf: Change CPUM_CF return code in event init
 function

The function perf_init_event() creates a new event and
assignes it to a PMU. This a done in a loop over all existing
PMUs. For each listed PMU the event init function is called
and if this function does return any other error than -ENOENT,
the loop is terminated the creation of the event fails.

If the event is invalid, return -ENOENT to try other PMUs.

Signed-off-by: Thomas Richter <tmricht@linux.ibm.com>
Reviewed-by: Hendrik Brueckner <brueckner@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/perf_cpum_cf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index cc085e2d2ce99..74091fd3101e9 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -373,7 +373,7 @@ static int __hw_perf_event_init(struct perf_event *event)
 		return -ENOENT;
 
 	if (ev > PERF_CPUM_CF_MAX_CTR)
-		return -EINVAL;
+		return -ENOENT;
 
 	/* Obtain the counter set to which the specified counter belongs */
 	set = get_counter_set(ev);
-- 
GitLab


From 9c8e0a1b683525464a2abe9fb4b54404a50ed2b4 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 25 Oct 2018 12:05:11 -0500
Subject: [PATCH 0807/1890] mount: Prevent MNT_DETACH from disconnecting locked
 mounts

Timothy Baldwin <timbaldwin@fastmail.co.uk> wrote:
> As per mount_namespaces(7) unprivileged users should not be able to look under mount points:
>
>   Mounts that come as a single unit from more privileged mount are locked
>   together and may not be separated in a less privileged mount namespace.
>
> However they can:
>
> 1. Create a mount namespace.
> 2. In the mount namespace open a file descriptor to the parent of a mount point.
> 3. Destroy the mount namespace.
> 4. Use the file descriptor to look under the mount point.
>
> I have reproduced this with Linux 4.16.18 and Linux 4.18-rc8.
>
> The setup:
>
> $ sudo sysctl kernel.unprivileged_userns_clone=1
> kernel.unprivileged_userns_clone = 1
> $ mkdir -p A/B/Secret
> $ sudo mount -t tmpfs hide A/B
>
>
> "Secret" is indeed hidden as expected:
>
> $ ls -lR A
> A:
> total 0
> drwxrwxrwt 2 root root 40 Feb 12 21:08 B
>
> A/B:
> total 0
>
>
> The attack revealing "Secret":
>
> $ unshare -Umr sh -c "exec unshare -m ls -lR /proc/self/fd/4/ 4<A"
> /proc/self/fd/4/:
> total 0
> drwxr-xr-x 3 root root 60 Feb 12 21:08 B
>
> /proc/self/fd/4/B:
> total 0
> drwxr-xr-x 2 root root 40 Feb 12 21:08 Secret
>
> /proc/self/fd/4/B/Secret:
> total 0

I tracked this down to put_mnt_ns running passing UMOUNT_SYNC and
disconnecting all of the mounts in a mount namespace.  Fix this by
factoring drop_mounts out of drop_collected_mounts and passing
0 instead of UMOUNT_SYNC.

There are two possible behavior differences that result from this.
- No longer setting UMOUNT_SYNC will no longer set MNT_SYNC_UMOUNT on
  the vfsmounts being unmounted.  This effects the lazy rcu walk by
  kicking the walk out of rcu mode and forcing it to be a non-lazy
  walk.
- No longer disconnecting locked mounts will keep some mounts around
  longer as they stay because the are locked to other mounts.

There are only two users of drop_collected mounts: audit_tree.c and
put_mnt_ns.

In audit_tree.c the mounts are private and there are no rcu lazy walks
only calls to iterate_mounts. So the changes should have no effect
except for a small timing effect as the connected mounts are disconnected.

In put_mnt_ns there may be references from process outside the mount
namespace to the mounts.  So the mounts remaining connected will
be the bug fix that is needed.  That rcu walks are allowed to continue
appears not to be a problem especially as the rcu walk change was about
an implementation detail not about semantics.

Cc: stable@vger.kernel.org
Fixes: 5ff9d8a65ce8 ("vfs: Lock in place mounts from more privileged users")
Reported-by: Timothy Baldwin <timbaldwin@fastmail.co.uk>
Tested-by: Timothy Baldwin <timbaldwin@fastmail.co.uk>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/namespace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index e0e0f9cf6c30d..74f64294a4108 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1794,7 +1794,7 @@ void drop_collected_mounts(struct vfsmount *mnt)
 {
 	namespace_lock();
 	lock_mount_hash();
-	umount_tree(real_mount(mnt), UMOUNT_SYNC);
+	umount_tree(real_mount(mnt), 0);
 	unlock_mount_hash();
 	namespace_unlock();
 }
-- 
GitLab


From e5b78f2e349eef5d4fca5dc1cf5a3b4b2cc27abd Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 7 Nov 2018 14:18:50 +0100
Subject: [PATCH 0808/1890] iommu/ipmmu-vmsa: Fix crash on early domain free

If iommu_ops.add_device() fails, iommu_ops.domain_free() is still
called, leading to a crash, as the domain was only partially
initialized:

    ipmmu-vmsa e67b0000.mmu: Cannot accommodate DMA translation for IOMMU page tables
    sata_rcar ee300000.sata: Unable to initialize IPMMU context
    iommu: Failed to add device ee300000.sata to group 0: -22
    Unable to handle kernel NULL pointer dereference at virtual address 0000000000000038
    ...
    Call trace:
     ipmmu_domain_free+0x1c/0xa0
     iommu_group_release+0x48/0x68
     kobject_put+0x74/0xe8
     kobject_del.part.0+0x3c/0x50
     kobject_put+0x60/0xe8
     iommu_group_get_for_dev+0xa8/0x1f0
     ipmmu_add_device+0x1c/0x40
     of_iommu_configure+0x118/0x190

Fix this by checking if the domain's context already exists, before
trying to destroy it.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Fixes: d25a2a16f0889 ('iommu: Add driver for Renesas VMSA-compatible IPMMU')
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/ipmmu-vmsa.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index b98a031895803..ddf3a492e1d59 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -498,6 +498,9 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
 
 static void ipmmu_domain_destroy_context(struct ipmmu_vmsa_domain *domain)
 {
+	if (!domain->mmu)
+		return;
+
 	/*
 	 * Disable the context. Flush the TLB as required when modifying the
 	 * context registers.
-- 
GitLab


From 24efee412c75843755da0ddd7bbc2db2ce9129f5 Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Date: Tue, 6 Nov 2018 22:52:11 +0100
Subject: [PATCH 0809/1890] Compiler Attributes: improve explanation of header

Explain better what "optional" attributes are, and avoid calling
them so to avoid confusion. Simply retain "Optional" as a word
to look for in the comments.

Moreover, add a couple sentences to explain a bit more the intention
and the documentation links.

Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 include/linux/compiler_attributes.h | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h
index 6b28c1b7310c0..f8c400ba1929e 100644
--- a/include/linux/compiler_attributes.h
+++ b/include/linux/compiler_attributes.h
@@ -4,22 +4,26 @@
 
 /*
  * The attributes in this file are unconditionally defined and they directly
- * map to compiler attribute(s) -- except those that are optional.
+ * map to compiler attribute(s), unless one of the compilers does not support
+ * the attribute. In that case, __has_attribute is used to check for support
+ * and the reason is stated in its comment ("Optional: ...").
  *
  * Any other "attributes" (i.e. those that depend on a configuration option,
  * on a compiler, on an architecture, on plugins, on other attributes...)
  * should be defined elsewhere (e.g. compiler_types.h or compiler-*.h).
+ * The intention is to keep this file as simple as possible, as well as
+ * compiler- and version-agnostic (e.g. avoiding GCC_VERSION checks).
  *
  * This file is meant to be sorted (by actual attribute name,
  * not by #define identifier). Use the __attribute__((__name__)) syntax
  * (i.e. with underscores) to avoid future collisions with other macros.
- * If an attribute is optional, state the reason in the comment.
+ * Provide links to the documentation of each supported compiler, if it exists.
  */
 
 /*
- * To check for optional attributes, we use __has_attribute, which is supported
- * on gcc >= 5, clang >= 2.9 and icc >= 17. In the meantime, to support
- * 4.6 <= gcc < 5, we implement __has_attribute by hand.
+ * __has_attribute is supported on gcc >= 5, clang >= 2.9 and icc >= 17.
+ * In the meantime, to support 4.6 <= gcc < 5, we implement __has_attribute
+ * by hand.
  *
  * sparse does not support __has_attribute (yet) and defines __GNUC_MINOR__
  * depending on the compiler used to build it; however, these attributes have
-- 
GitLab


From e46daee53bb50bde38805f1823a182979724c229 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 30 Oct 2018 22:12:56 +0100
Subject: [PATCH 0810/1890] ARM: 8806/1: kprobes: Fix false positive with
 FORTIFY_SOURCE

The arm compiler internally interprets an inline assembly label
as an unsigned long value, not a pointer. As a result, under
CONFIG_FORTIFY_SOURCE, the address of a label has a size of 4 bytes,
which was tripping the runtime checks. Instead, we can just cast the label
(as done with the size calculations earlier).

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1639397

Reported-by: William Cohen <wcohen@redhat.com>
Fixes: 6974f0c4555e ("include/linux/string.h: add the option of fortified string.h functions")
Cc: stable@vger.kernel.org
Acked-by: Laura Abbott <labbott@redhat.com>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Tested-by: William Cohen <wcohen@redhat.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/probes/kprobes/opt-arm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/probes/kprobes/opt-arm.c b/arch/arm/probes/kprobes/opt-arm.c
index b2aa9b32bff2b..2c118a6ab3587 100644
--- a/arch/arm/probes/kprobes/opt-arm.c
+++ b/arch/arm/probes/kprobes/opt-arm.c
@@ -247,7 +247,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *or
 	}
 
 	/* Copy arch-dep-instance from template. */
-	memcpy(code, &optprobe_template_entry,
+	memcpy(code, (unsigned char *)optprobe_template_entry,
 			TMPL_END_IDX * sizeof(kprobe_opcode_t));
 
 	/* Adjust buffer according to instruction. */
-- 
GitLab


From 943210ba807ec50aafa2fa7b13bd6d36a478969b Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Tue, 23 Oct 2018 11:28:28 -0400
Subject: [PATCH 0811/1890] vt: fix broken display when running aptitude

If you run aptitude on framebuffer console, the display is corrupted. The
corruption is caused by the commit d8ae7242. The patch adds "offset" to
"start" when calling scr_memsetw, but it forgets to do the same addition
on a subsequent call to do_update_region.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Fixes: d8ae72427187 ("vt: preserve unicode values corresponding to screen characters")
Reviewed-by: Nicolas Pitre <nico@linaro.org>
Cc: stable@vger.kernel.org	# 4.19
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/vt/vt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index 5f1183b0b89df..476ec4b1b86cd 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -1551,7 +1551,7 @@ static void csi_K(struct vc_data *vc, int vpar)
 	scr_memsetw(start + offset, vc->vc_video_erase_char, 2 * count);
 	vc->vc_need_wrap = 0;
 	if (con_should_update(vc))
-		do_update_region(vc, (unsigned long) start, count);
+		do_update_region(vc, (unsigned long)(start + offset), count);
 }
 
 static void csi_X(struct vc_data *vc, int vpar) /* erase the following vpar positions */
-- 
GitLab


From 991a25194097006ec1e0d2e0814ff920e59e3465 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 22 Oct 2018 09:19:04 -0700
Subject: [PATCH 0812/1890] termios, tty/tty_baudrate.c: fix buffer overrun

On architectures with CBAUDEX == 0 (Alpha and PowerPC), the code in tty_baudrate.c does
not do any limit checking on the tty_baudrate[] array, and in fact a
buffer overrun is possible on both architectures. Add a limit check to
prevent that situation.

This will be followed by a much bigger cleanup/simplification patch.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Requested-by: Cc: Johan Hovold <johan@kernel.org>
Cc: Jiri Slaby <jslaby@suse.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Kate Stewart <kstewart@linuxfoundation.org>
Cc: Philippe Ombredanne <pombredanne@nexb.com>
Cc: Eugene Syromiatnikov <esyr@redhat.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/tty_baudrate.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/tty_baudrate.c b/drivers/tty/tty_baudrate.c
index 7576ceace5715..f438eaa682463 100644
--- a/drivers/tty/tty_baudrate.c
+++ b/drivers/tty/tty_baudrate.c
@@ -77,7 +77,7 @@ speed_t tty_termios_baud_rate(struct ktermios *termios)
 		else
 			cbaud += 15;
 	}
-	return baud_table[cbaud];
+	return cbaud >= n_baud_table ? 0 : baud_table[cbaud];
 }
 EXPORT_SYMBOL(tty_termios_baud_rate);
 
@@ -113,7 +113,7 @@ speed_t tty_termios_input_baud_rate(struct ktermios *termios)
 		else
 			cbaud += 15;
 	}
-	return baud_table[cbaud];
+	return cbaud >= n_baud_table ? 0 : baud_table[cbaud];
 #else	/* IBSHIFT */
 	return tty_termios_baud_rate(termios);
 #endif	/* IBSHIFT */
-- 
GitLab


From d0ffb805b729322626639336986bc83fc2e60871 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin (Intel)" <hpa@zytor.com>
Date: Mon, 22 Oct 2018 09:19:05 -0700
Subject: [PATCH 0813/1890] arch/alpha, termios: implement BOTHER, IBSHIFT and
 termios2

Alpha has had c_ispeed and c_ospeed, but still set speeds in c_cflags
using arbitrary flags. Because BOTHER is not defined, the general
Linux code doesn't allow setting arbitrary baud rates, and because
CBAUDEX == 0, we can have an array overrun of the baud_rate[] table in
drivers/tty/tty_baudrate.c if (c_cflags & CBAUD) == 037.

Resolve both problems by #defining BOTHER to 037 on Alpha.

However, userspace still needs to know if setting BOTHER is actually
safe given legacy kernels (does anyone actually care about that on
Alpha anymore?), so enable the TCGETS2/TCSETS*2 ioctls on Alpha, even
though they use the same structure. Define struct termios2 just for
compatibility; it is the exact same structure as struct termios. In a
future patchset, this will be cleaned up so the uapi headers are
usable from libc.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Cc: Jiri Slaby <jslaby@suse.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Kate Stewart <kstewart@linuxfoundation.org>
Cc: Philippe Ombredanne <pombredanne@nexb.com>
Cc: Eugene Syromiatnikov <esyr@redhat.com>
Cc: <linux-alpha@vger.kernel.org>
Cc: <linux-serial@vger.kernel.org>
Cc: Johan Hovold <johan@kernel.org>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/alpha/include/asm/termios.h       |  8 +++++++-
 arch/alpha/include/uapi/asm/ioctls.h   |  5 +++++
 arch/alpha/include/uapi/asm/termbits.h | 17 +++++++++++++++++
 3 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/arch/alpha/include/asm/termios.h b/arch/alpha/include/asm/termios.h
index 6a8c53dec57e6..b7c77bb1bfd20 100644
--- a/arch/alpha/include/asm/termios.h
+++ b/arch/alpha/include/asm/termios.h
@@ -73,9 +73,15 @@
 })
 
 #define user_termios_to_kernel_termios(k, u) \
-	copy_from_user(k, u, sizeof(struct termios))
+	copy_from_user(k, u, sizeof(struct termios2))
 
 #define kernel_termios_to_user_termios(u, k) \
+	copy_to_user(u, k, sizeof(struct termios2))
+
+#define user_termios_to_kernel_termios_1(k, u) \
+	copy_from_user(k, u, sizeof(struct termios))
+
+#define kernel_termios_to_user_termios_1(u, k) \
 	copy_to_user(u, k, sizeof(struct termios))
 
 #endif	/* _ALPHA_TERMIOS_H */
diff --git a/arch/alpha/include/uapi/asm/ioctls.h b/arch/alpha/include/uapi/asm/ioctls.h
index 3729d92d3fa85..dc8c20ac7191f 100644
--- a/arch/alpha/include/uapi/asm/ioctls.h
+++ b/arch/alpha/include/uapi/asm/ioctls.h
@@ -32,6 +32,11 @@
 #define TCXONC		_IO('t', 30)
 #define TCFLSH		_IO('t', 31)
 
+#define TCGETS2		_IOR('T', 42, struct termios2)
+#define TCSETS2		_IOW('T', 43, struct termios2)
+#define TCSETSW2	_IOW('T', 44, struct termios2)
+#define TCSETSF2	_IOW('T', 45, struct termios2)
+
 #define TIOCSWINSZ	_IOW('t', 103, struct winsize)
 #define TIOCGWINSZ	_IOR('t', 104, struct winsize)
 #define	TIOCSTART	_IO('t', 110)		/* start output, like ^Q */
diff --git a/arch/alpha/include/uapi/asm/termbits.h b/arch/alpha/include/uapi/asm/termbits.h
index de6c8360fbe36..4575ba34a0eae 100644
--- a/arch/alpha/include/uapi/asm/termbits.h
+++ b/arch/alpha/include/uapi/asm/termbits.h
@@ -26,6 +26,19 @@ struct termios {
 	speed_t c_ospeed;		/* output speed */
 };
 
+/* Alpha has identical termios and termios2 */
+
+struct termios2 {
+	tcflag_t c_iflag;		/* input mode flags */
+	tcflag_t c_oflag;		/* output mode flags */
+	tcflag_t c_cflag;		/* control mode flags */
+	tcflag_t c_lflag;		/* local mode flags */
+	cc_t c_cc[NCCS];		/* control characters */
+	cc_t c_line;			/* line discipline (== c_cc[19]) */
+	speed_t c_ispeed;		/* input speed */
+	speed_t c_ospeed;		/* output speed */
+};
+
 /* Alpha has matching termios and ktermios */
 
 struct ktermios {
@@ -152,6 +165,7 @@ struct ktermios {
 #define B3000000  00034
 #define B3500000  00035
 #define B4000000  00036
+#define BOTHER    00037
 
 #define CSIZE	00001400
 #define   CS5	00000000
@@ -169,6 +183,9 @@ struct ktermios {
 #define CMSPAR	  010000000000		/* mark or space (stick) parity */
 #define CRTSCTS	  020000000000		/* flow control */
 
+#define CIBAUD	07600000
+#define IBSHIFT	16
+
 /* c_lflag bits */
 #define ISIG	0x00000080
 #define ICANON	0x00000100
-- 
GitLab


From 0033dfd92a5646a78025e86f8df4d5b18181ba2c Mon Sep 17 00:00:00 2001
From: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Date: Thu, 8 Nov 2018 13:07:23 +0000
Subject: [PATCH 0814/1890] ubd: fix missing initialization of io_req

The SYNC path doesn't initialize io_req->error, which can cause
random errors. Before the conversion to blk-mq, we always
completed requests with BLK_STS_OK status, but now we actually
look at the error field and this issue becomes apparent.

Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>

[axboe: fixed up commit message to explain what is actually going on]

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 arch/um/drivers/ubd_kern.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 08831f5d83db0..28c40624bcb6f 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -1305,6 +1305,7 @@ static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req,
 		io_req->fds[0] = dev->cow.fd;
 	else
 		io_req->fds[0] = dev->fd;
+	io_req->error = 0;
 
 	if (req_op(req) == REQ_OP_FLUSH) {
 		io_req->op = UBD_FLUSH;
@@ -1313,9 +1314,7 @@ static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req,
 		io_req->cow_offset = -1;
 		io_req->offset = off;
 		io_req->length = bvec->bv_len;
-		io_req->error = 0;
 		io_req->sector_mask = 0;
-
 		io_req->op = rq_data_dir(req) == READ ? UBD_READ : UBD_WRITE;
 		io_req->offsets[0] = 0;
 		io_req->offsets[1] = dev->cow.data_offset;
-- 
GitLab


From bb39ba6a8deab70752b836a36c62205b1c65b559 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Thu, 8 Nov 2018 06:34:34 +0000
Subject: [PATCH 0815/1890] sata_rcar: convert to SPDX identifiers

This patch updates license to use SPDX-License-Identifier
instead of verbose license text.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/ata/sata_rcar.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/ata/sata_rcar.c b/drivers/ata/sata_rcar.c
index 10ecb232245db..4b1ff5bc256a3 100644
--- a/drivers/ata/sata_rcar.c
+++ b/drivers/ata/sata_rcar.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * Renesas R-Car SATA driver
  *
  * Author: Vladimir Barinov <source@cogentembedded.com>
  * Copyright (C) 2013-2015 Cogent Embedded, Inc.
  * Copyright (C) 2013-2015 Renesas Solutions Corp.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
  */
 
 #include <linux/kernel.h>
-- 
GitLab


From b469e7e47c8a075cc08bcd1e85d4365134bdcdd5 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 30 Oct 2018 20:29:53 +0200
Subject: [PATCH 0816/1890] fanotify: fix handling of events on child
 sub-directory

When an event is reported on a sub-directory and the parent inode has
a mark mask with FS_EVENT_ON_CHILD|FS_ISDIR, the event will be sent to
fsnotify() even if the event type is not in the parent mark mask
(e.g. FS_OPEN).

Further more, if that event happened on a mount or a filesystem with
a mount/sb mark that does have that event type in their mask, the "on
child" event will be reported on the mount/sb mark.  That is not
desired, because user will get a duplicate event for the same action.

Note that the event reported on the victim inode is never merged with
the event reported on the parent inode, because of the check in
should_merge(): old_fsn->inode == new_fsn->inode.

Fix this by looking for a match of an actual event type (i.e. not just
FS_ISDIR) in parent's inode mark mask and by not reporting an "on child"
event to group if event type is only found on mount/sb marks.

[backport hint: The bug seems to have always been in fanotify, but this
                patch will only apply cleanly to v4.19.y]

Cc: <stable@vger.kernel.org> # v4.19
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fanotify/fanotify.c | 10 +++++-----
 fs/notify/fsnotify.c          |  7 +++++--
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 5769cf3ff035a..e08a6647267b1 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -115,12 +115,12 @@ static bool fanotify_should_send_event(struct fsnotify_iter_info *iter_info,
 			continue;
 		mark = iter_info->marks[type];
 		/*
-		 * if the event is for a child and this inode doesn't care about
-		 * events on the child, don't send it!
+		 * If the event is for a child and this mark doesn't care about
+		 * events on a child, don't send it!
 		 */
-		if (type == FSNOTIFY_OBJ_TYPE_INODE &&
-		    (event_mask & FS_EVENT_ON_CHILD) &&
-		    !(mark->mask & FS_EVENT_ON_CHILD))
+		if (event_mask & FS_EVENT_ON_CHILD &&
+		    (type != FSNOTIFY_OBJ_TYPE_INODE ||
+		     !(mark->mask & FS_EVENT_ON_CHILD)))
 			continue;
 
 		marks_mask |= mark->mask;
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 2172ba516c61d..d2c34900ae05d 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -167,9 +167,9 @@ int __fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask
 	parent = dget_parent(dentry);
 	p_inode = parent->d_inode;
 
-	if (unlikely(!fsnotify_inode_watches_children(p_inode)))
+	if (unlikely(!fsnotify_inode_watches_children(p_inode))) {
 		__fsnotify_update_child_dentry_flags(p_inode);
-	else if (p_inode->i_fsnotify_mask & mask) {
+	} else if (p_inode->i_fsnotify_mask & mask & ALL_FSNOTIFY_EVENTS) {
 		struct name_snapshot name;
 
 		/* we are notifying a parent so come up with the new mask which
@@ -339,6 +339,9 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
 		sb = mnt->mnt.mnt_sb;
 		mnt_or_sb_mask = mnt->mnt_fsnotify_mask | sb->s_fsnotify_mask;
 	}
+	/* An event "on child" is not intended for a mount/sb mark */
+	if (mask & FS_EVENT_ON_CHILD)
+		mnt_or_sb_mask = 0;
 
 	/*
 	 * Optimization: srcu_read_lock() has a memory barrier which can
-- 
GitLab


From c2c6d3ce0d9a1fae4472fc10755372d022a487a4 Mon Sep 17 00:00:00 2001
From: Luis Henriques <lhenriques@suse.com>
Date: Tue, 23 Oct 2018 16:53:14 +0000
Subject: [PATCH 0817/1890] ceph: add destination file data sync before doing
 any remote copy
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If we try to copy into a file that was just written, any data that is
remote copied will be overwritten by our buffered writes once they are
flushed. Â When this happens, the call to invalidate_inode_pages2_range
will also return a -EBUSY error.

This patch fixes this by also sync'ing the destination file before
starting any copy.

Fixes: 503f82a9932d ("ceph: support copy_file_range file operation")
Signed-off-by: Luis Henriques <lhenriques@suse.com>
Reviewed-by: "Yan, Zheng" <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/file.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 27cad84dab23a..189df668b6a0c 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1931,10 +1931,17 @@ static ssize_t ceph_copy_file_range(struct file *src_file, loff_t src_off,
 	if (!prealloc_cf)
 		return -ENOMEM;
 
-	/* Start by sync'ing the source file */
+	/* Start by sync'ing the source and destination files */
 	ret = file_write_and_wait_range(src_file, src_off, (src_off + len));
-	if (ret < 0)
+	if (ret < 0) {
+		dout("failed to write src file (%zd)\n", ret);
+		goto out;
+	}
+	ret = file_write_and_wait_range(dst_file, dst_off, (dst_off + len));
+	if (ret < 0) {
+		dout("failed to write dst file (%zd)\n", ret);
 		goto out;
+	}
 
 	/*
 	 * We need FILE_WR caps for dst_ci and FILE_RD for src_ci as other
-- 
GitLab


From 71f2cc64d027d712f29bf8d09d3e123302d5f245 Mon Sep 17 00:00:00 2001
From: Luis Henriques <lhenriques@suse.com>
Date: Mon, 5 Nov 2018 19:00:52 +0000
Subject: [PATCH 0818/1890] ceph: quota: fix null pointer dereference in quota
 check
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch fixes a possible null pointer dereference in
check_quota_exceeded, detected by the static checker smatch, with the
following warning:

Â Â Â fs/ceph/quota.c:240 check_quota_exceeded()
Â Â Â Â error: we previously assumed 'realm' could be null (see line 188)

Fixes: b7a2921765cf ("ceph: quota: support for ceph.quota.max_files")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Luis Henriques <lhenriques@suse.com>
Reviewed-by: "Yan, Zheng" <zyan@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/quota.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c
index 32d4f13784ba5..03f4d24db8fe0 100644
--- a/fs/ceph/quota.c
+++ b/fs/ceph/quota.c
@@ -237,7 +237,8 @@ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op,
 		ceph_put_snap_realm(mdsc, realm);
 		realm = next;
 	}
-	ceph_put_snap_realm(mdsc, realm);
+	if (realm)
+		ceph_put_snap_realm(mdsc, realm);
 	up_read(&mdsc->snap_rwsem);
 
 	return exceeded;
-- 
GitLab


From 23c625ce3065e40c933a4239efb9b11f1194a343 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 8 Nov 2018 14:55:21 +0100
Subject: [PATCH 0819/1890] libceph: assume argonaut on the server side

No one is running pre-argonaut.  In addition one of the argonaut
features (NOSRCADDR) has been required since day one (and a half,
2.6.34 vs 2.6.35) of the kernel client.

Allow for the possibility of reusing these feature bits later.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 fs/ceph/mds_client.c               | 12 +++---------
 include/linux/ceph/ceph_features.h |  8 +-------
 2 files changed, 4 insertions(+), 16 deletions(-)

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 67a9aeb2f4ecd..bd13a3267ae03 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -80,12 +80,8 @@ static int parse_reply_info_in(void **p, void *end,
 	info->symlink = *p;
 	*p += info->symlink_len;
 
-	if (features & CEPH_FEATURE_DIRLAYOUTHASH)
-		ceph_decode_copy_safe(p, end, &info->dir_layout,
-				      sizeof(info->dir_layout), bad);
-	else
-		memset(&info->dir_layout, 0, sizeof(info->dir_layout));
-
+	ceph_decode_copy_safe(p, end, &info->dir_layout,
+			      sizeof(info->dir_layout), bad);
 	ceph_decode_32_safe(p, end, info->xattr_len, bad);
 	ceph_decode_need(p, end, info->xattr_len, bad);
 	info->xattr_data = *p;
@@ -3182,10 +3178,8 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
 	recon_state.pagelist = pagelist;
 	if (session->s_con.peer_features & CEPH_FEATURE_MDSENC)
 		recon_state.msg_version = 3;
-	else if (session->s_con.peer_features & CEPH_FEATURE_FLOCK)
-		recon_state.msg_version = 2;
 	else
-		recon_state.msg_version = 1;
+		recon_state.msg_version = 2;
 	err = iterate_session_caps(session, encode_caps_cb, &recon_state);
 	if (err < 0)
 		goto fail;
diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index 6b92b3395fa99..65a38c4a02a18 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -213,12 +213,6 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin
 	 CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING |	\
 	 CEPH_FEATURE_CEPHX_V2)
 
-#define CEPH_FEATURES_REQUIRED_DEFAULT   \
-	(CEPH_FEATURE_NOSRCADDR |	 \
-	 CEPH_FEATURE_SUBSCRIBE2 |	 \
-	 CEPH_FEATURE_RECONNECT_SEQ |	 \
-	 CEPH_FEATURE_PGID64 |		 \
-	 CEPH_FEATURE_PGPOOL3 |		 \
-	 CEPH_FEATURE_OSDENC)
+#define CEPH_FEATURES_REQUIRED_DEFAULT	0
 
 #endif
-- 
GitLab


From 01310bb7c9c98752cc763b36532fab028e0f8f81 Mon Sep 17 00:00:00 2001
From: Scott Mayhew <smayhew@redhat.com>
Date: Thu, 8 Nov 2018 11:11:36 -0500
Subject: [PATCH 0820/1890] nfsd: COPY and CLONE operations require the saved
 filehandle to be set

Make sure we have a saved filehandle, otherwise we'll oops with a null
pointer dereference in nfs4_preprocess_stateid_op().

Signed-off-by: Scott Mayhew <smayhew@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4proc.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index edff074d38c75..d505990dac7c9 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1038,6 +1038,9 @@ nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 {
 	__be32 status;
 
+	if (!cstate->save_fh.fh_dentry)
+		return nfserr_nofilehandle;
+
 	status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh,
 					    src_stateid, RD_STATE, src, NULL);
 	if (status) {
-- 
GitLab


From 025911a5f4e36955498ed50806ad1b02f0f76288 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Thu, 8 Nov 2018 02:04:57 +0000
Subject: [PATCH 0821/1890] SUNRPC: drop pointless static qualifier in
 xdr_get_next_encode_buffer()

There is no need to have the '__be32 *p' variable static since new value
always be assigned before use it.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Cc: stable@vger.kernel.org
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 net/sunrpc/xdr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 5cfb9e0a18dcb..f302c6eb87790 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -546,7 +546,7 @@ EXPORT_SYMBOL_GPL(xdr_commit_encode);
 static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
 		size_t nbytes)
 {
-	static __be32 *p;
+	__be32 *p;
 	int space_left;
 	int frag1bytes, frag2bytes;
 
-- 
GitLab


From e7f4ffffa972db4820c23ff9831a6a4f3f6d8cc5 Mon Sep 17 00:00:00 2001
From: Adam Ford <aford173@gmail.com>
Date: Sun, 28 Oct 2018 15:13:48 -0500
Subject: [PATCH 0822/1890] ARM: dts: am3517: Fix pinmuxing for CD on MMC1

The MMC1 is active low, not active high.  For some reason,
this worked with different combination of U-Boot and kernels,
but it's supposed to be active low and is currently broken.

Fixes: cfaa856a2510 ("ARM: dts: am3517: Add pinmuxing, CD and
WP for MMC1") #kernel 4.18+

Signed-off-by: Adam Ford <aford173@gmail.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/boot/dts/am3517-evm.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/am3517-evm.dts b/arch/arm/boot/dts/am3517-evm.dts
index d4d33cd7adad7..1e2bb68231ad4 100644
--- a/arch/arm/boot/dts/am3517-evm.dts
+++ b/arch/arm/boot/dts/am3517-evm.dts
@@ -228,7 +228,7 @@ &mmc1 {
 	vmmc-supply = <&vmmc_fixed>;
 	bus-width = <4>;
 	wp-gpios = <&gpio4 30 GPIO_ACTIVE_HIGH>; /* gpio_126 */
-	cd-gpios = <&gpio4 31 GPIO_ACTIVE_HIGH>; /* gpio_127 */
+	cd-gpios = <&gpio4 31 GPIO_ACTIVE_LOW>; /* gpio_127 */
 };
 
 &mmc3 {
-- 
GitLab


From 6809564d64ff1301d44c13334cc0e8369e825a20 Mon Sep 17 00:00:00 2001
From: Adam Ford <aford173@gmail.com>
Date: Sun, 28 Oct 2018 15:28:32 -0500
Subject: [PATCH 0823/1890] ARM: dts: LogicPD Torpedo: Fix mmc3_dat1 interrupt

When the Torpedo was first introduced back at Kernel 4.2,
the interrupt extended flag has been set incorrectly.

It was subsequently moved, so this patch corrects Kernel
4.18+

Fixes: a38867305203 ("ARM: dts: Move move WiFi bindings to
logicpd-torpedo-37xx-devkit") # v4.18+

Signed-off-by: Adam Ford <aford173@gmail.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts b/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts
index 9d5d53fbe9c0c..c39cf2ca54da8 100644
--- a/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts
+++ b/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts
@@ -35,7 +35,7 @@ wl12xx_vmmc: wl12xx_vmmc {
  * jumpering combinations for the long run.
  */
 &mmc3 {
-	interrupts-extended = <&intc 94 &omap3_pmx_core2 0x46>;
+	interrupts-extended = <&intc 94 &omap3_pmx_core 0x136>;
 	pinctrl-0 = <&mmc3_pins &mmc3_core2_pins>;
 	pinctrl-names = "default";
 	vmmc-supply = <&wl12xx_vmmc>;
-- 
GitLab


From 3d8b804bc528d3720ec0c39c212af92dafaf6e84 Mon Sep 17 00:00:00 2001
From: Adam Ford <aford173@gmail.com>
Date: Sun, 28 Oct 2018 15:29:27 -0500
Subject: [PATCH 0824/1890] ARM: dts: logicpd-somlv: Fix interrupt on mmc3_dat1

The interrupt on mmc3_dat1 is wrong which prevents this from
appearing in /proc/interrupts.

Fixes: ab8dd3aed011 ("ARM: DTS: Add minimal Support for Logic PD
DM3730 SOM-LV") #Kernel 4.9+

Signed-off-by: Adam Ford <aford173@gmail.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/boot/dts/logicpd-som-lv.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/logicpd-som-lv.dtsi b/arch/arm/boot/dts/logicpd-som-lv.dtsi
index ac343330d0c83..98b682a8080cc 100644
--- a/arch/arm/boot/dts/logicpd-som-lv.dtsi
+++ b/arch/arm/boot/dts/logicpd-som-lv.dtsi
@@ -129,7 +129,7 @@ touchscreen: tsc2004@48 {
 };
 
 &mmc3 {
-	interrupts-extended = <&intc 94 &omap3_pmx_core2 0x46>;
+	interrupts-extended = <&intc 94 &omap3_pmx_core 0x136>;
 	pinctrl-0 = <&mmc3_pins &wl127x_gpio>;
 	pinctrl-names = "default";
 	vmmc-supply = <&wl12xx_vmmc>;
-- 
GitLab


From 419b194cdedc76d0d3cd5b0900db0fa8177c4a52 Mon Sep 17 00:00:00 2001
From: Adam Ford <aford173@gmail.com>
Date: Sun, 28 Oct 2018 15:34:21 -0500
Subject: [PATCH 0825/1890] ARM: dts: am3517-som: Fix WL127x Wifi interrupt

At the same time the AM3517 EVM was gaining WiFi support,
separate patches were introduced to move the interrupt
from HIGH to RISING.  Because they overlapped, this was not
done to the AM3517-EVM.  This patch fixes Kernel 4.19+

Fixes: 6bf5e3410f19 ("ARM: dts: am3517-som: Add WL127x Wifi")

Signed-off-by: Adam Ford <aford173@gmail.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/boot/dts/am3517-som.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/am3517-som.dtsi b/arch/arm/boot/dts/am3517-som.dtsi
index dae6e458e59fe..b1c988eed87c6 100644
--- a/arch/arm/boot/dts/am3517-som.dtsi
+++ b/arch/arm/boot/dts/am3517-som.dtsi
@@ -163,7 +163,7 @@ wlcore: wlcore@2 {
 		compatible = "ti,wl1271";
 		reg = <2>;
 		interrupt-parent = <&gpio6>;
-		interrupts = <10 IRQ_TYPE_LEVEL_HIGH>; /* gpio_170 */
+		interrupts = <10 IRQ_TYPE_EDGE_RISING>; /* gpio_170 */
 		ref-clock-frequency = <26000000>;
 		tcxo-clock-frequency = <26000000>;
 	};
-- 
GitLab


From cec83ff1241ec98113a19385ea9e9cfa9aa4125b Mon Sep 17 00:00:00 2001
From: Janusz Krzysztofik <jmkrzyszt@gmail.com>
Date: Wed, 7 Nov 2018 22:30:31 +0100
Subject: [PATCH 0826/1890] ARM: OMAP1: ams-delta: Fix possible use of
 uninitialized field

While playing with initialization order of modem device, it has been
discovered that under some circumstances (early console init, I
believe) its .pm() callback may be called before the
uart_port->private_data pointer is initialized from
plat_serial8250_port->private_data, resulting in NULL pointer
dereference.  Fix it by checking for uninitialized pointer before using
it in modem_pm().

Fixes: aabf31737a6a ("ARM: OMAP1: ams-delta: update the modem to use regulator API")
Signed-off-by: Janusz Krzysztofik <jmkrzyszt@gmail.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap1/board-ams-delta.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c
index 3d191fd52910f..17886744dbe69 100644
--- a/arch/arm/mach-omap1/board-ams-delta.c
+++ b/arch/arm/mach-omap1/board-ams-delta.c
@@ -750,6 +750,9 @@ static void modem_pm(struct uart_port *port, unsigned int state, unsigned old)
 	struct modem_private_data *priv = port->private_data;
 	int ret;
 
+	if (!priv)
+		return;
+
 	if (IS_ERR(priv->regulator))
 		return;
 
-- 
GitLab


From 017ce359a7187192df5222a00fa3c9055eb3736d Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 2 Nov 2018 12:06:43 +0100
Subject: [PATCH 0827/1890] ACPI / PMIC: xpower: fix IOSF_MBI dependency

We still get a link failure with IOSF_MBI=m when the xpower driver
is built-in:

drivers/acpi/pmic/intel_pmic_xpower.o: In function `intel_xpower_pmic_update_power':
intel_pmic_xpower.c:(.text+0x4f2): undefined reference to `iosf_mbi_block_punit_i2c_access'
intel_pmic_xpower.c:(.text+0x5e2): undefined reference to `iosf_mbi_unblock_punit_i2c_access'

This makes the dependency stronger, so we can only build when IOSF_MBI
is built-in.

Fixes: 6a9b593d4b6f (ACPI / PMIC: xpower: Add depends on IOSF_MBI to Kconfig entry)
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 8f3a444c6ea92..7cea769c37df5 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -512,7 +512,7 @@ config CRC_PMIC_OPREGION
 
 config XPOWER_PMIC_OPREGION
 	bool "ACPI operation region support for XPower AXP288 PMIC"
-	depends on MFD_AXP20X_I2C && IOSF_MBI
+	depends on MFD_AXP20X_I2C && IOSF_MBI=y
 	help
 	  This config adds ACPI operation region support for XPower AXP288 PMIC.
 
-- 
GitLab


From aeaf6a4b2d9ed4373e39a64c1c10cb1d93dd6bd1 Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla@arm.com>
Date: Wed, 7 Nov 2018 17:40:58 +0000
Subject: [PATCH 0828/1890] dt-bindings: cpufreq: remove stale
 arm_big_little_dt entry

Most of the ARM platforms used v2 OPP bindings to support big-little
configurations. This arm_big_little_dt binding is incomplete and was
never used.

Commit f174e49e4906 (cpufreq: remove unused arm_big_little_dt driver)
removed the driver supporting this binding, but the binding was left
unnoticed, so let's get rid of it now.

Fixes: f174e49e4906 (cpufreq: remove unused arm_big_little_dt driver)
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 .../bindings/cpufreq/arm_big_little_dt.txt    | 65 -------------------
 1 file changed, 65 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/cpufreq/arm_big_little_dt.txt

diff --git a/Documentation/devicetree/bindings/cpufreq/arm_big_little_dt.txt b/Documentation/devicetree/bindings/cpufreq/arm_big_little_dt.txt
deleted file mode 100644
index 2aa06ac0fac59..0000000000000
--- a/Documentation/devicetree/bindings/cpufreq/arm_big_little_dt.txt
+++ /dev/null
@@ -1,65 +0,0 @@
-Generic ARM big LITTLE cpufreq driver's DT glue
------------------------------------------------
-
-This is DT specific glue layer for generic cpufreq driver for big LITTLE
-systems.
-
-Both required and optional properties listed below must be defined
-under node /cpus/cpu@x. Where x is the first cpu inside a cluster.
-
-FIXME: Cpus should boot in the order specified in DT and all cpus for a cluster
-must be present contiguously. Generic DT driver will check only node 'x' for
-cpu:x.
-
-Required properties:
-- operating-points: Refer to Documentation/devicetree/bindings/opp/opp.txt
-  for details
-
-Optional properties:
-- clock-latency: Specify the possible maximum transition latency for clock,
-  in unit of nanoseconds.
-
-Examples:
-
-cpus {
-	#address-cells = <1>;
-	#size-cells = <0>;
-
-	cpu@0 {
-		compatible = "arm,cortex-a15";
-		reg = <0>;
-		next-level-cache = <&L2>;
-		operating-points = <
-			/* kHz    uV */
-			792000  1100000
-			396000  950000
-			198000  850000
-		>;
-		clock-latency = <61036>; /* two CLK32 periods */
-	};
-
-	cpu@1 {
-		compatible = "arm,cortex-a15";
-		reg = <1>;
-		next-level-cache = <&L2>;
-	};
-
-	cpu@100 {
-		compatible = "arm,cortex-a7";
-		reg = <100>;
-		next-level-cache = <&L2>;
-		operating-points = <
-			/* kHz    uV */
-			792000  950000
-			396000  750000
-			198000  450000
-		>;
-		clock-latency = <61036>; /* two CLK32 periods */
-	};
-
-	cpu@101 {
-		compatible = "arm,cortex-a7";
-		reg = <101>;
-		next-level-cache = <&L2>;
-	};
-};
-- 
GitLab


From 26a4676faa1ad5d99317e0cd701e5d6f3e716b77 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Wed, 7 Nov 2018 18:10:38 +0100
Subject: [PATCH 0829/1890] arm64: mm: define NET_IP_ALIGN to 0

On arm64, there is no need to add 2 bytes of padding to the start of
each network buffer just to make the IP header appear 32-bit aligned.

Since this might actually adversely affect DMA performance some
platforms, let's override NET_IP_ALIGN to 0 to get rid of this
padding.

Acked-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
Tested-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/processor.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 3e2091708b8e5..6b0d4dff50125 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -24,6 +24,14 @@
 #define KERNEL_DS	UL(-1)
 #define USER_DS		(TASK_SIZE_64 - 1)
 
+/*
+ * On arm64 systems, unaligned accesses by the CPU are cheap, and so there is
+ * no point in shifting all network buffers by 2 bytes just to make some IP
+ * header fields appear aligned in memory, potentially sacrificing some DMA
+ * performance on some platforms.
+ */
+#define NET_IP_ALIGN	0
+
 #ifndef __ASSEMBLY__
 #ifdef __KERNEL__
 
-- 
GitLab


From 763f191af51f127cf8e69cd361f50bf6180768a5 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Thu, 1 Nov 2018 13:22:38 +0100
Subject: [PATCH 0830/1890] ARM: cpuidle: Don't register the driver when
 back-end init returns -ENXIO

There's no point to register the cpuidle driver for the current CPU, when
the initialization of the arch specific back-end data fails by returning
-ENXIO.

Instead, let's re-order the sequence to its original flow, by first trying
to initialize the back-end part and then act accordingly on the returned
error code. Additionally, let's print the error message, no matter of what
error code that was returned.

Fixes: a0d46a3dfdc3 (ARM: cpuidle: Register per cpuidle device)
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: 4.19+ <stable@vger.kernel.org> # v4.19+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/cpuidle-arm.c | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/drivers/cpuidle/cpuidle-arm.c b/drivers/cpuidle/cpuidle-arm.c
index 073557f433eb1..df564d7832161 100644
--- a/drivers/cpuidle/cpuidle-arm.c
+++ b/drivers/cpuidle/cpuidle-arm.c
@@ -103,13 +103,6 @@ static int __init arm_idle_init_cpu(int cpu)
 		goto out_kfree_drv;
 	}
 
-	ret = cpuidle_register_driver(drv);
-	if (ret) {
-		if (ret != -EBUSY)
-			pr_err("Failed to register cpuidle driver\n");
-		goto out_kfree_drv;
-	}
-
 	/*
 	 * Call arch CPU operations in order to initialize
 	 * idle states suspend back-end specific data
@@ -117,15 +110,20 @@ static int __init arm_idle_init_cpu(int cpu)
 	ret = arm_cpuidle_init(cpu);
 
 	/*
-	 * Skip the cpuidle device initialization if the reported
+	 * Allow the initialization to continue for other CPUs, if the reported
 	 * failure is a HW misconfiguration/breakage (-ENXIO).
 	 */
-	if (ret == -ENXIO)
-		return 0;
-
 	if (ret) {
 		pr_err("CPU %d failed to init idle CPU ops\n", cpu);
-		goto out_unregister_drv;
+		ret = ret == -ENXIO ? 0 : ret;
+		goto out_kfree_drv;
+	}
+
+	ret = cpuidle_register_driver(drv);
+	if (ret) {
+		if (ret != -EBUSY)
+			pr_err("Failed to register cpuidle driver\n");
+		goto out_kfree_drv;
 	}
 
 	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
-- 
GitLab


From 3e452e636d006fa759a9914c044398869acba98f Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Thu, 1 Nov 2018 11:15:58 +0100
Subject: [PATCH 0831/1890] ARM: cpuidle: Convert to use
 cpuidle_register|unregister()

The only reason that remains, to why the ARM cpuidle driver calls
cpuidle_register_driver(), is to avoid printing an error message in case
another driver already have been registered for the CPU. This seems a bit
silly, but more importantly, if that is a common scenario, perhaps we
should change cpuidle_register() accordingly instead.

In either case, let's consolidate the code, by converting to use
cpuidle_register|unregister(), which also avoids the unnecessary allocation
of the struct cpuidle_device.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Reviewed-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/cpuidle-arm.c | 30 +++---------------------------
 1 file changed, 3 insertions(+), 27 deletions(-)

diff --git a/drivers/cpuidle/cpuidle-arm.c b/drivers/cpuidle/cpuidle-arm.c
index df564d7832161..3a407a3ef22b4 100644
--- a/drivers/cpuidle/cpuidle-arm.c
+++ b/drivers/cpuidle/cpuidle-arm.c
@@ -82,7 +82,6 @@ static int __init arm_idle_init_cpu(int cpu)
 {
 	int ret;
 	struct cpuidle_driver *drv;
-	struct cpuidle_device *dev;
 
 	drv = kmemdup(&arm_idle_driver, sizeof(*drv), GFP_KERNEL);
 	if (!drv)
@@ -119,33 +118,12 @@ static int __init arm_idle_init_cpu(int cpu)
 		goto out_kfree_drv;
 	}
 
-	ret = cpuidle_register_driver(drv);
-	if (ret) {
-		if (ret != -EBUSY)
-			pr_err("Failed to register cpuidle driver\n");
+	ret = cpuidle_register(drv, NULL);
+	if (ret)
 		goto out_kfree_drv;
-	}
-
-	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
-	if (!dev) {
-		ret = -ENOMEM;
-		goto out_unregister_drv;
-	}
-	dev->cpu = cpu;
-
-	ret = cpuidle_register_device(dev);
-	if (ret) {
-		pr_err("Failed to register cpuidle device for CPU %d\n",
-		       cpu);
-		goto out_kfree_dev;
-	}
 
 	return 0;
 
-out_kfree_dev:
-	kfree(dev);
-out_unregister_drv:
-	cpuidle_unregister_driver(drv);
 out_kfree_drv:
 	kfree(drv);
 	return ret;
@@ -176,9 +154,7 @@ static int __init arm_idle_init(void)
 	while (--cpu >= 0) {
 		dev = per_cpu(cpuidle_devices, cpu);
 		drv = cpuidle_get_cpu_driver(dev);
-		cpuidle_unregister_device(dev);
-		cpuidle_unregister_driver(drv);
-		kfree(dev);
+		cpuidle_unregister(drv);
 		kfree(drv);
 	}
 
-- 
GitLab


From 24cc61d8cb5a9232fadf21a830061853c1268fdd Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Wed, 7 Nov 2018 15:16:06 +0100
Subject: [PATCH 0832/1890] arm64: memblock: don't permit memblock resizing
 until linear mapping is up

Bhupesh reports that having numerous memblock reservations at early
boot may result in the following crash:

  Unable to handle kernel paging request at virtual address ffff80003ffe0000
  ...
  Call trace:
   __memcpy+0x110/0x180
   memblock_add_range+0x134/0x2e8
   memblock_reserve+0x70/0xb8
   memblock_alloc_base_nid+0x6c/0x88
   __memblock_alloc_base+0x3c/0x4c
   memblock_alloc_base+0x28/0x4c
   memblock_alloc+0x2c/0x38
   early_pgtable_alloc+0x20/0xb0
   paging_init+0x28/0x7f8

This is caused by the fact that we permit memblock resizing before the
linear mapping is up, and so the memblock_reserved() array is moved
into memory that is not mapped yet.

So let's ensure that this crash can no longer occur, by deferring to
call to memblock_allow_resize() to after the linear mapping has been
created.

Reported-by: Bhupesh Sharma <bhsharma@redhat.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Tested-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/mm/init.c | 2 --
 arch/arm64/mm/mmu.c  | 2 ++
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 9d9582cac6c40..9b432d9fcada8 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -483,8 +483,6 @@ void __init arm64_memblock_init(void)
 	high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
 
 	dma_contiguous_reserve(arm64_dma_phys_limit);
-
-	memblock_allow_resize();
 }
 
 void __init bootmem_init(void)
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 394b8d554def4..d1d6601b385d9 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -659,6 +659,8 @@ void __init paging_init(void)
 
 	memblock_free(__pa_symbol(init_pg_dir),
 		      __pa_symbol(init_pg_end) - __pa_symbol(init_pg_dir));
+
+	memblock_allow_resize();
 }
 
 /*
-- 
GitLab


From e2576c8bdfd462c34b8a46c0084e7c30b0851bf4 Mon Sep 17 00:00:00 2001
From: Christian Hewitt <christianshewitt@gmail.com>
Date: Tue, 6 Nov 2018 00:08:20 +0100
Subject: [PATCH 0833/1890] clk: meson-gxbb: set fclk_div3 as CLK_IS_CRITICAL

On the Khadas VIM2 (GXM) and LePotato (GXL) board there are problems
with reboot; e.g. a ~60 second delay between issuing reboot and the
board power cycling (and in some OS configurations reboot will fail
and require manual power cycling).

Similar to 'commit c987ac6f1f088663b6dad39281071aeb31d450a8 ("clk:
meson-gxbb: set fclk_div2 as CLK_IS_CRITICAL")' the SCPI Cortex-M4
Co-Processor seems to depend on FCLK_DIV3 being operational.

Until commit 05f814402d6174369b3b29832cbb5eb5ed287059 ("clk:
meson: add fdiv clock gates"), this clock was modeled and left on by
the bootloader.

We don't have precise documentation about the SCPI Co-Processor and
its clock requirement so we are learning things the hard way.

Marking this clock as critical solves the problem but it should not
be viewed as final solution. Ideally, the SCPI driver should claim
these clocks. We also depends on some clock hand-off mechanism
making its way to CCF, to make sure the clock stays on between its
registration and the SCPI driver probe.

Fixes: 05f814402d61 ("clk: meson: add fdiv clock gates")
Signed-off-by: Christian Hewitt <christianshewitt@gmail.com>
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/meson/gxbb.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/clk/meson/gxbb.c b/drivers/clk/meson/gxbb.c
index 9309cfaaa464e..4ada9668fd49c 100644
--- a/drivers/clk/meson/gxbb.c
+++ b/drivers/clk/meson/gxbb.c
@@ -506,6 +506,18 @@ static struct clk_regmap gxbb_fclk_div3 = {
 		.ops = &clk_regmap_gate_ops,
 		.parent_names = (const char *[]){ "fclk_div3_div" },
 		.num_parents = 1,
+		/*
+		 * FIXME:
+		 * This clock, as fdiv2, is used by the SCPI FW and is required
+		 * by the platform to operate correctly.
+		 * Until the following condition are met, we need this clock to
+		 * be marked as critical:
+		 * a) The SCPI generic driver claims and enable all the clocks
+		 *    it needs
+		 * b) CCF has a clock hand-off mechanism to make the sure the
+		 *    clock stays on until the proper driver comes along
+		 */
+		.flags = CLK_IS_CRITICAL,
 	},
 };
 
-- 
GitLab


From d6ee1e7e9004d3d246cdfa14196989e0a9466c16 Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Thu, 8 Nov 2018 10:31:23 +0100
Subject: [PATCH 0834/1890] clk: meson: axg: mark fdiv2 and fdiv3 as critical

Similar to gxbb and gxl platforms, axg SCPI Cortex-M co-processor
uses the fdiv2 and fdiv3 to, among other things, provide the cpu
clock.

Until clock hand-off mechanism makes its way to CCF and the generic
SCPI claims platform specific clocks, these clocks must be marked as
critical to make sure they are never disabled when needed by the
co-processor.

Fixes: 05f814402d61 ("clk: meson: add fdiv clock gates")
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Acked-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/meson/axg.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/clk/meson/axg.c b/drivers/clk/meson/axg.c
index c981159b02c0f..792735d7e46ea 100644
--- a/drivers/clk/meson/axg.c
+++ b/drivers/clk/meson/axg.c
@@ -325,6 +325,7 @@ static struct clk_regmap axg_fclk_div2 = {
 		.ops = &clk_regmap_gate_ops,
 		.parent_names = (const char *[]){ "fclk_div2_div" },
 		.num_parents = 1,
+		.flags = CLK_IS_CRITICAL,
 	},
 };
 
@@ -349,6 +350,18 @@ static struct clk_regmap axg_fclk_div3 = {
 		.ops = &clk_regmap_gate_ops,
 		.parent_names = (const char *[]){ "fclk_div3_div" },
 		.num_parents = 1,
+		/*
+		 * FIXME:
+		 * This clock, as fdiv2, is used by the SCPI FW and is required
+		 * by the platform to operate correctly.
+		 * Until the following condition are met, we need this clock to
+		 * be marked as critical:
+		 * a) The SCPI generic driver claims and enable all the clocks
+		 *    it needs
+		 * b) CCF has a clock hand-off mechanism to make the sure the
+		 *    clock stays on until the proper driver comes along
+		 */
+		.flags = CLK_IS_CRITICAL,
 	},
 };
 
-- 
GitLab


From 6778be4e520959659b27a441c06a84c9cb009085 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Wed, 7 Nov 2018 16:30:32 +0000
Subject: [PATCH 0835/1890] of/device: Really only set bus DMA mask when
 appropriate

of_dma_configure() was *supposed* to be following the same logic as
acpi_dma_configure() and only setting bus_dma_mask if some range was
specified by the firmware. However, it seems that subtlety got lost in
the process of fitting it into the differently-shaped control flow, and
as a result the force_dma==true case ends up always setting the bus mask
to the 32-bit default, which is not what anyone wants.

Make sure we only touch it if the DT actually said so.

Fixes: 6c2fb2ea7636 ("of/device: Set bus DMA mask as appropriate")
Reported-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Reported-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
Tested-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Tested-by: John Stultz <john.stultz@linaro.org>
Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
Tested-by: Robert Richter <robert.richter@cavium.com>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 drivers/of/device.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/of/device.c b/drivers/of/device.c
index 84e810421418b..258742830e366 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -149,9 +149,11 @@ int of_dma_configure(struct device *dev, struct device_node *np, bool force_dma)
 	 * set by the driver.
 	 */
 	mask = DMA_BIT_MASK(ilog2(dma_addr + size - 1) + 1);
-	dev->bus_dma_mask = mask;
 	dev->coherent_dma_mask &= mask;
 	*dev->dma_mask &= mask;
+	/* ...but only set bus mask if we found valid dma-ranges earlier */
+	if (!ret)
+		dev->bus_dma_mask = mask;
 
 	coherent = of_dma_is_coherent(np);
 	dev_dbg(dev, "device is%sdma coherent\n",
-- 
GitLab


From 89c38422e072bb453e3045b8f1b962a344c3edea Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Thu, 8 Nov 2018 18:17:03 +0800
Subject: [PATCH 0836/1890] of, numa: Validate some distance map rules

Currently the NUMA distance map parsing does not validate the distance
table for the distance-matrix rules 1-2 in [1].

However the arch NUMA code may enforce some of these rules, but not all.
Such is the case for the arm64 port, which does not enforce the rule that
the distance between separates nodes cannot equal LOCAL_DISTANCE.

The patch adds the following rules validation:
- distance of node to self equals LOCAL_DISTANCE
- distance of separate nodes > LOCAL_DISTANCE

This change avoids a yet-unresolved crash reported in [2].

A note on dealing with symmetrical distances between nodes:

Validating symmetrical distances between nodes is difficult. If it were
mandated in the bindings that every distance must be recorded in the
table, then it would be easy. However, it isn't.

In addition to this, it is also possible to record [b, a] distance only
(and not [a, b]). So, when processing the table for [b, a], we cannot
assert that current distance of [a, b] != [b, a] as invalid, as [a, b]
distance may not be present in the table and current distance would be
default at REMOTE_DISTANCE.

As such, we maintain the policy that we overwrite distance [a, b] = [b, a]
for b > a. This policy is different to kernel ACPI SLIT validation, which
allows non-symmetrical distances (ACPI spec SLIT rules allow it). However,
the distance debug message is dropped as it may be misleading (for a distance
which is later overwritten).

Some final notes on semantics:

- It is implied that it is the responsibility of the arch NUMA code to
  reset the NUMA distance map for an error in distance map parsing.

- It is the responsibility of the FW NUMA topology parsing (whether OF or
  ACPI) to enforce NUMA distance rules, and not arch NUMA code.

[1] Documents/devicetree/bindings/numa.txt
[2] https://www.spinics.net/lists/arm-kernel/msg683304.html

Cc: stable@vger.kernel.org # 4.7
Signed-off-by: John Garry <john.garry@huawei.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 drivers/of/of_numa.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/of/of_numa.c b/drivers/of/of_numa.c
index 35c64a4295e07..fe6b13608e510 100644
--- a/drivers/of/of_numa.c
+++ b/drivers/of/of_numa.c
@@ -104,9 +104,14 @@ static int __init of_numa_parse_distance_map_v1(struct device_node *map)
 		distance = of_read_number(matrix, 1);
 		matrix++;
 
+		if ((nodea == nodeb && distance != LOCAL_DISTANCE) ||
+		    (nodea != nodeb && distance <= LOCAL_DISTANCE)) {
+			pr_err("Invalid distance[node%d -> node%d] = %d\n",
+			       nodea, nodeb, distance);
+			return -EINVAL;
+		}
+
 		numa_set_distance(nodea, nodeb, distance);
-		pr_debug("distance[node%d -> node%d] = %d\n",
-			 nodea, nodeb, distance);
 
 		/* Set default distance of node B->A same as A->B */
 		if (nodeb > nodea)
-- 
GitLab


From e12c225258f2584906765234ca6db4ad4c618192 Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Thu, 8 Nov 2018 10:13:19 +0800
Subject: [PATCH 0837/1890] net: hns3: bugfix for not checking return value

hns3_reset_notify_init_enet() only return error early if the return
value of hns3_restore_vlan() is not 0.

This patch adds checking for the return value of hns3_restore_vlan.

Fixes: 7fa6be4fd2f6 ("net: hns3: fix incorrect return value/type of some functions")
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 3f96aa30068ec..20fcf0d1c2ce5 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -3760,7 +3760,8 @@ static int hns3_reset_notify_init_enet(struct hnae3_handle *handle)
 	/* Hardware table is only clear when pf resets */
 	if (!(handle->flags & HNAE3_SUPPORT_VF)) {
 		ret = hns3_restore_vlan(netdev);
-		return ret;
+		if (ret)
+			return ret;
 	}
 
 	ret = hns3_restore_fd_rules(netdev);
-- 
GitLab


From f2ffd4e5bc7b629b312be48fbe3c3a2afbd67e4a Mon Sep 17 00:00:00 2001
From: Bill Kuzeja <William.Kuzeja@stratus.com>
Date: Mon, 5 Nov 2018 11:23:50 -0500
Subject: [PATCH 0838/1890] scsi: qla2xxx: Timeouts occur on surprise removal
 of QLogic adapter

When doing a surprise removal of an adapter, some in flight I/Os can get
stuck and take a while to complete (they actually time out and are
retried). We are not handling an early error exit from qla2xxx_eh_abort
properly.

Fixes: 45235022da99 ("scsi: qla2xxx: Fix driver unload by shutting down chip")
Signed-off-by: Bill Kuzeja <william.kuzeja@stratus.com>
Reviewed-by: Laurence Oberman <loberman@redhat.com>
Acked-by: Himanshu Madhani <himanshu.madhani@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qla2xxx/qla_os.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 20c85eed1a750..b658b9a5eb1e1 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -1749,7 +1749,7 @@ qla2x00_loop_reset(scsi_qla_host_t *vha)
 static void
 __qla2x00_abort_all_cmds(struct qla_qpair *qp, int res)
 {
-	int cnt;
+	int cnt, status;
 	unsigned long flags;
 	srb_t *sp;
 	scsi_qla_host_t *vha = qp->vha;
@@ -1799,10 +1799,16 @@ __qla2x00_abort_all_cmds(struct qla_qpair *qp, int res)
 					if (!sp_get(sp)) {
 						spin_unlock_irqrestore
 							(qp->qp_lock_ptr, flags);
-						qla2xxx_eh_abort(
+						status = qla2xxx_eh_abort(
 							GET_CMD_SP(sp));
 						spin_lock_irqsave
 							(qp->qp_lock_ptr, flags);
+						/*
+						 * Get rid of extra reference caused
+						 * by early exit from qla2xxx_eh_abort
+						 */
+						if (status == FAST_IO_FAIL)
+							atomic_dec(&sp->ref_count);
 					}
 				}
 				sp->done(sp, res);
-- 
GitLab


From 8e4829c6f7470722c1f5a1dc5769ebe09ef036d6 Mon Sep 17 00:00:00 2001
From: Wei Li <liwei213@huawei.com>
Date: Thu, 8 Nov 2018 09:08:29 -0800
Subject: [PATCH 0839/1890] scsi: ufs: Fix hynix ufs bug with quirk on hi36xx
 SoC

Hynix ufs has deviations on hi36xx platform which will result in ufs bursts
transfer failures.

To fix the problem, the Hynix device must set the register
VS_DebugSaveConfigTime to 0x10, which will set time reference for
SaveConfigTime is 250 ns. The time reference for SaveConfigTime is 40 ns by
default.

This patch is necessary to boot on HiKey960 boards that use Hynix UFS chips
(H28U62301AMR model: hB8aL1).

Cc: Vinayak Holikatti <vinholikatti@gmail.com>
Cc: "James E.J. Bottomley" <jejb@linux.vnet.ibm.com>
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
Cc: linux-scsi@vger.kernel.org
Signed-off-by: Wei Li <liwei213@huawei.com>
Signed-off-by: Dmitry Shmidt <dimitrysh@google.com>
[jstultz: Forward ported from older code, slight tweak to commit message]
Signed-off-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/ufs/ufs-hisi.c   | 9 +++++++++
 drivers/scsi/ufs/ufs_quirks.h | 6 ++++++
 drivers/scsi/ufs/ufshcd.c     | 2 ++
 3 files changed, 17 insertions(+)

diff --git a/drivers/scsi/ufs/ufs-hisi.c b/drivers/scsi/ufs/ufs-hisi.c
index 46df707e6f2c0..452e19f8fb470 100644
--- a/drivers/scsi/ufs/ufs-hisi.c
+++ b/drivers/scsi/ufs/ufs-hisi.c
@@ -20,6 +20,7 @@
 #include "unipro.h"
 #include "ufs-hisi.h"
 #include "ufshci.h"
+#include "ufs_quirks.h"
 
 static int ufs_hisi_check_hibern8(struct ufs_hba *hba)
 {
@@ -390,6 +391,14 @@ static void ufs_hisi_set_dev_cap(struct ufs_hisi_dev_params *hisi_param)
 
 static void ufs_hisi_pwr_change_pre_change(struct ufs_hba *hba)
 {
+	if (hba->dev_quirks & UFS_DEVICE_QUIRK_HOST_VS_DEBUGSAVECONFIGTIME) {
+		pr_info("ufs flash device must set VS_DebugSaveConfigTime 0x10\n");
+		/* VS_DebugSaveConfigTime */
+		ufshcd_dme_set(hba, UIC_ARG_MIB(0xD0A0), 0x10);
+		/* sync length */
+		ufshcd_dme_set(hba, UIC_ARG_MIB(0x1556), 0x48);
+	}
+
 	/* update */
 	ufshcd_dme_set(hba, UIC_ARG_MIB(0x15A8), 0x1);
 	/* PA_TxSkip */
diff --git a/drivers/scsi/ufs/ufs_quirks.h b/drivers/scsi/ufs/ufs_quirks.h
index 71f73d1d1ad1f..5d2dfdb41a6ff 100644
--- a/drivers/scsi/ufs/ufs_quirks.h
+++ b/drivers/scsi/ufs/ufs_quirks.h
@@ -131,4 +131,10 @@ struct ufs_dev_fix {
  */
 #define UFS_DEVICE_QUIRK_HOST_PA_SAVECONFIGTIME	(1 << 8)
 
+/*
+ * Some UFS devices require VS_DebugSaveConfigTime is 0x10,
+ * enabling this quirk ensure this.
+ */
+#define UFS_DEVICE_QUIRK_HOST_VS_DEBUGSAVECONFIGTIME	(1 << 9)
+
 #endif /* UFS_QUIRKS_H_ */
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 27db55b0ca7f8..f1c57cd33b5ba 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -231,6 +231,8 @@ static struct ufs_dev_fix ufs_fixups[] = {
 	UFS_FIX(UFS_VENDOR_SKHYNIX, UFS_ANY_MODEL, UFS_DEVICE_NO_VCCQ),
 	UFS_FIX(UFS_VENDOR_SKHYNIX, UFS_ANY_MODEL,
 		UFS_DEVICE_QUIRK_HOST_PA_SAVECONFIGTIME),
+	UFS_FIX(UFS_VENDOR_SKHYNIX, "hB8aL1" /*H28U62301AMR*/,
+		UFS_DEVICE_QUIRK_HOST_VS_DEBUGSAVECONFIGTIME),
 
 	END_FIX
 };
-- 
GitLab


From 0d5b9311baf27bb545f187f12ecfd558220c607d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 8 Nov 2018 17:34:27 -0800
Subject: [PATCH 0840/1890] inet: frags: better deal with smp races
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Multiple cpus might attempt to insert a new fragment in rhashtable,
if for example RPS is buggy, as reported by ë°°ì„ì§„ in
https://patchwork.ozlabs.org/patch/994601/

We use rhashtable_lookup_get_insert_key() instead of
rhashtable_insert_fast() to let cpus losing the race
free their own inet_frag_queue and use the one that
was inserted by another cpu.

Fixes: 648700f76b03 ("inet: frags: use rhashtables for reassembly units")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: ë°°ì„ì§„ <soukjin.bae@samsung.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_fragment.c | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index bcb11f3a27c0c..760a9e52e02b9 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -178,21 +178,22 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
 }
 
 static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
-						void *arg)
+						void *arg,
+						struct inet_frag_queue **prev)
 {
 	struct inet_frags *f = nf->f;
 	struct inet_frag_queue *q;
-	int err;
 
 	q = inet_frag_alloc(nf, f, arg);
-	if (!q)
+	if (!q) {
+		*prev = ERR_PTR(-ENOMEM);
 		return NULL;
-
+	}
 	mod_timer(&q->timer, jiffies + nf->timeout);
 
-	err = rhashtable_insert_fast(&nf->rhashtable, &q->node,
-				     f->rhash_params);
-	if (err < 0) {
+	*prev = rhashtable_lookup_get_insert_key(&nf->rhashtable, &q->key,
+						 &q->node, f->rhash_params);
+	if (*prev) {
 		q->flags |= INET_FRAG_COMPLETE;
 		inet_frag_kill(q);
 		inet_frag_destroy(q);
@@ -204,22 +205,22 @@ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
 /* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */
 struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key)
 {
-	struct inet_frag_queue *fq;
+	struct inet_frag_queue *fq = NULL, *prev;
 
 	if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh)
 		return NULL;
 
 	rcu_read_lock();
 
-	fq = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params);
-	if (fq) {
+	prev = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params);
+	if (!prev)
+		fq = inet_frag_create(nf, key, &prev);
+	if (prev && !IS_ERR(prev)) {
+		fq = prev;
 		if (!refcount_inc_not_zero(&fq->refcnt))
 			fq = NULL;
-		rcu_read_unlock();
-		return fq;
 	}
 	rcu_read_unlock();
-
-	return inet_frag_create(nf, key);
+	return fq;
 }
 EXPORT_SYMBOL(inet_frag_find);
-- 
GitLab


From 39477551df940ddb1339203817de04f5caaacf7a Mon Sep 17 00:00:00 2001
From: Denis Bolotin <denis.bolotin@cavium.com>
Date: Thu, 8 Nov 2018 16:46:08 +0200
Subject: [PATCH 0841/1890] qed: Fix memory/entry leak in qed_init_sp_request()

Free the allocated SPQ entry or return the acquired SPQ entry to the free
list in error flows.

Signed-off-by: Denis Bolotin <denis.bolotin@cavium.com>
Signed-off-by: Michal Kalderon <michal.kalderon@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/qlogic/qed/qed_sp_commands.c    | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
index 77b6248ad3b97..e86a1ea236133 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
@@ -80,7 +80,7 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn,
 
 	case QED_SPQ_MODE_BLOCK:
 		if (!p_data->p_comp_data)
-			return -EINVAL;
+			goto err;
 
 		p_ent->comp_cb.cookie = p_data->p_comp_data->cookie;
 		break;
@@ -95,7 +95,7 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn,
 	default:
 		DP_NOTICE(p_hwfn, "Unknown SPQE completion mode %d\n",
 			  p_ent->comp_mode);
-		return -EINVAL;
+		goto err;
 	}
 
 	DP_VERBOSE(p_hwfn, QED_MSG_SPQ,
@@ -109,6 +109,18 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn,
 	memset(&p_ent->ramrod, 0, sizeof(p_ent->ramrod));
 
 	return 0;
+
+err:
+	/* qed_spq_get_entry() can either get an entry from the free_pool,
+	 * or, if no entries are left, allocate a new entry and add it to
+	 * the unlimited_pending list.
+	 */
+	if (p_ent->queue == &p_hwfn->p_spq->unlimited_pending)
+		kfree(p_ent);
+	else
+		qed_spq_return_entry(p_hwfn, p_ent);
+
+	return -EINVAL;
 }
 
 static enum tunnel_clss qed_tunn_clss_to_fw_clss(u8 type)
-- 
GitLab


From 2632f22ebd08da249c2017962a199a0cfb2324bf Mon Sep 17 00:00:00 2001
From: Denis Bolotin <denis.bolotin@cavium.com>
Date: Thu, 8 Nov 2018 16:46:09 +0200
Subject: [PATCH 0842/1890] qed: Fix blocking/unlimited SPQ entries leak

When there are no SPQ entries left in the free_pool, new entries are
allocated and are added to the unlimited list. When an entry in the pool
is available, the content is copied from the original entry, and the new
entry is sent to the device. qed_spq_post() is not aware of that, so the
additional entry is stored in the original entry as p_post_ent, which can
later be returned to the pool.

Signed-off-by: Denis Bolotin <denis.bolotin@cavium.com>
Signed-off-by: Michal Kalderon <michal.kalderon@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_sp.h  |  3 ++
 drivers/net/ethernet/qlogic/qed/qed_spq.c | 57 ++++++++++++-----------
 2 files changed, 33 insertions(+), 27 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h
index e95431f6acd46..04259df8a5c24 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h
@@ -167,6 +167,9 @@ struct qed_spq_entry {
 	enum spq_mode			comp_mode;
 	struct qed_spq_comp_cb		comp_cb;
 	struct qed_spq_comp_done	comp_done; /* SPQ_MODE_EBLOCK */
+
+	/* Posted entry for unlimited list entry in EBLOCK mode */
+	struct qed_spq_entry		*post_ent;
 };
 
 struct qed_eq {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c
index c4a6274dd625c..c1a81ec0524b8 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_spq.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c
@@ -685,6 +685,8 @@ static int qed_spq_add_entry(struct qed_hwfn *p_hwfn,
 			/* EBLOCK responsible to free the allocated p_ent */
 			if (p_ent->comp_mode != QED_SPQ_MODE_EBLOCK)
 				kfree(p_ent);
+			else
+				p_ent->post_ent = p_en2;
 
 			p_ent = p_en2;
 		}
@@ -767,6 +769,25 @@ static int qed_spq_pend_post(struct qed_hwfn *p_hwfn)
 				 SPQ_HIGH_PRI_RESERVE_DEFAULT);
 }
 
+/* Avoid overriding of SPQ entries when getting out-of-order completions, by
+ * marking the completions in a bitmap and increasing the chain consumer only
+ * for the first successive completed entries.
+ */
+static void qed_spq_comp_bmap_update(struct qed_hwfn *p_hwfn, __le16 echo)
+{
+	u16 pos = le16_to_cpu(echo) % SPQ_RING_SIZE;
+	struct qed_spq *p_spq = p_hwfn->p_spq;
+
+	__set_bit(pos, p_spq->p_comp_bitmap);
+	while (test_bit(p_spq->comp_bitmap_idx,
+			p_spq->p_comp_bitmap)) {
+		__clear_bit(p_spq->comp_bitmap_idx,
+			    p_spq->p_comp_bitmap);
+		p_spq->comp_bitmap_idx++;
+		qed_chain_return_produced(&p_spq->chain);
+	}
+}
+
 int qed_spq_post(struct qed_hwfn *p_hwfn,
 		 struct qed_spq_entry *p_ent, u8 *fw_return_code)
 {
@@ -824,11 +845,12 @@ int qed_spq_post(struct qed_hwfn *p_hwfn,
 				   p_ent->queue == &p_spq->unlimited_pending);
 
 		if (p_ent->queue == &p_spq->unlimited_pending) {
-			/* This is an allocated p_ent which does not need to
-			 * return to pool.
-			 */
+			struct qed_spq_entry *p_post_ent = p_ent->post_ent;
+
 			kfree(p_ent);
-			return rc;
+
+			/* Return the entry which was actually posted */
+			p_ent = p_post_ent;
 		}
 
 		if (rc)
@@ -842,7 +864,7 @@ int qed_spq_post(struct qed_hwfn *p_hwfn,
 spq_post_fail2:
 	spin_lock_bh(&p_spq->lock);
 	list_del(&p_ent->list);
-	qed_chain_return_produced(&p_spq->chain);
+	qed_spq_comp_bmap_update(p_hwfn, p_ent->elem.hdr.echo);
 
 spq_post_fail:
 	/* return to the free pool */
@@ -874,25 +896,8 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn,
 	spin_lock_bh(&p_spq->lock);
 	list_for_each_entry_safe(p_ent, tmp, &p_spq->completion_pending, list) {
 		if (p_ent->elem.hdr.echo == echo) {
-			u16 pos = le16_to_cpu(echo) % SPQ_RING_SIZE;
-
 			list_del(&p_ent->list);
-
-			/* Avoid overriding of SPQ entries when getting
-			 * out-of-order completions, by marking the completions
-			 * in a bitmap and increasing the chain consumer only
-			 * for the first successive completed entries.
-			 */
-			__set_bit(pos, p_spq->p_comp_bitmap);
-
-			while (test_bit(p_spq->comp_bitmap_idx,
-					p_spq->p_comp_bitmap)) {
-				__clear_bit(p_spq->comp_bitmap_idx,
-					    p_spq->p_comp_bitmap);
-				p_spq->comp_bitmap_idx++;
-				qed_chain_return_produced(&p_spq->chain);
-			}
-
+			qed_spq_comp_bmap_update(p_hwfn, echo);
 			p_spq->comp_count++;
 			found = p_ent;
 			break;
@@ -931,11 +936,9 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn,
 			   QED_MSG_SPQ,
 			   "Got a completion without a callback function\n");
 
-	if ((found->comp_mode != QED_SPQ_MODE_EBLOCK) ||
-	    (found->queue == &p_spq->unlimited_pending))
+	if (found->comp_mode != QED_SPQ_MODE_EBLOCK)
 		/* EBLOCK  is responsible for returning its own entry into the
-		 * free list, unless it originally added the entry into the
-		 * unlimited pending list.
+		 * free list.
 		 */
 		qed_spq_return_entry(p_hwfn, found);
 
-- 
GitLab


From fb5e7438e7a3c8966e04ccb0760170e9e06f3699 Mon Sep 17 00:00:00 2001
From: Denis Bolotin <denis.bolotin@cavium.com>
Date: Thu, 8 Nov 2018 16:46:10 +0200
Subject: [PATCH 0843/1890] qed: Fix SPQ entries not returned to pool in error
 flows

qed_sp_destroy_request() API was added for SPQ users that need to
free/return the entry they acquired in their error flows.

Signed-off-by: Denis Bolotin <denis.bolotin@cavium.com>
Signed-off-by: Michal Kalderon <michal.kalderon@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_fcoe.c    | 11 +++++++---
 drivers/net/ethernet/qlogic/qed/qed_iscsi.c   |  1 +
 drivers/net/ethernet/qlogic/qed/qed_l2.c      | 12 ++++++----
 drivers/net/ethernet/qlogic/qed/qed_rdma.c    |  1 +
 drivers/net/ethernet/qlogic/qed/qed_roce.c    |  1 +
 drivers/net/ethernet/qlogic/qed/qed_sp.h      | 11 ++++++++++
 .../net/ethernet/qlogic/qed/qed_sp_commands.c | 22 ++++++++++++-------
 drivers/net/ethernet/qlogic/qed/qed_sriov.c   |  1 +
 8 files changed, 45 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_fcoe.c b/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
index cc1b373c0ace5..46dc93d3b9b53 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
@@ -147,7 +147,8 @@ qed_sp_fcoe_func_start(struct qed_hwfn *p_hwfn,
 		       "Cannot satisfy CQ amount. CQs requested %d, CQs available %d. Aborting function start\n",
 		       fcoe_pf_params->num_cqs,
 		       p_hwfn->hw_info.feat_num[QED_FCOE_CQ]);
-		return -EINVAL;
+		rc = -EINVAL;
+		goto err;
 	}
 
 	p_data->mtu = cpu_to_le16(fcoe_pf_params->mtu);
@@ -156,14 +157,14 @@ qed_sp_fcoe_func_start(struct qed_hwfn *p_hwfn,
 
 	rc = qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_FCOE, &dummy_cid);
 	if (rc)
-		return rc;
+		goto err;
 
 	cxt_info.iid = dummy_cid;
 	rc = qed_cxt_get_cid_info(p_hwfn, &cxt_info);
 	if (rc) {
 		DP_NOTICE(p_hwfn, "Cannot find context info for dummy cid=%d\n",
 			  dummy_cid);
-		return rc;
+		goto err;
 	}
 	p_cxt = cxt_info.p_cxt;
 	SET_FIELD(p_cxt->tstorm_ag_context.flags3,
@@ -240,6 +241,10 @@ qed_sp_fcoe_func_start(struct qed_hwfn *p_hwfn,
 	rc = qed_spq_post(p_hwfn, p_ent, NULL);
 
 	return rc;
+
+err:
+	qed_sp_destroy_request(p_hwfn, p_ent);
+	return rc;
 }
 
 static int
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
index 1135387bd99d7..4f8a685d1a55f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
@@ -200,6 +200,7 @@ qed_sp_iscsi_func_start(struct qed_hwfn *p_hwfn,
 		       "Cannot satisfy CQ amount. Queues requested %d, CQs available %d. Aborting function start\n",
 		       p_params->num_queues,
 		       p_hwfn->hw_info.feat_num[QED_ISCSI_CQ]);
+		qed_sp_destroy_request(p_hwfn, p_ent);
 		return -EINVAL;
 	}
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index 82a1bd1f8a8ce..67c02ea939062 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -740,8 +740,7 @@ int qed_sp_vport_update(struct qed_hwfn *p_hwfn,
 
 	rc = qed_sp_vport_update_rss(p_hwfn, p_ramrod, p_rss_params);
 	if (rc) {
-		/* Return spq entry which is taken in qed_sp_init_request()*/
-		qed_spq_return_entry(p_hwfn, p_ent);
+		qed_sp_destroy_request(p_hwfn, p_ent);
 		return rc;
 	}
 
@@ -1355,6 +1354,7 @@ qed_filter_ucast_common(struct qed_hwfn *p_hwfn,
 			DP_NOTICE(p_hwfn,
 				  "%d is not supported yet\n",
 				  p_filter_cmd->opcode);
+			qed_sp_destroy_request(p_hwfn, *pp_ent);
 			return -EINVAL;
 		}
 
@@ -2056,13 +2056,13 @@ qed_configure_rfs_ntuple_filter(struct qed_hwfn *p_hwfn,
 	} else {
 		rc = qed_fw_vport(p_hwfn, p_params->vport_id, &abs_vport_id);
 		if (rc)
-			return rc;
+			goto err;
 
 		if (p_params->qid != QED_RFS_NTUPLE_QID_RSS) {
 			rc = qed_fw_l2_queue(p_hwfn, p_params->qid,
 					     &abs_rx_q_id);
 			if (rc)
-				return rc;
+				goto err;
 
 			p_ramrod->rx_qid_valid = 1;
 			p_ramrod->rx_qid = cpu_to_le16(abs_rx_q_id);
@@ -2083,6 +2083,10 @@ qed_configure_rfs_ntuple_filter(struct qed_hwfn *p_hwfn,
 		   (u64)p_params->addr, p_params->length);
 
 	return qed_spq_post(p_hwfn, p_ent, NULL);
+
+err:
+	qed_sp_destroy_request(p_hwfn, p_ent);
+	return rc;
 }
 
 int qed_get_rxq_coalesce(struct qed_hwfn *p_hwfn,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
index c71391b9c757a..62113438c8809 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
@@ -1514,6 +1514,7 @@ qed_rdma_register_tid(void *rdma_cxt,
 	default:
 		rc = -EINVAL;
 		DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "rc = %d\n", rc);
+		qed_sp_destroy_request(p_hwfn, p_ent);
 		return rc;
 	}
 	SET_FIELD(p_ramrod->flags1,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c
index f9167d1354bbe..e49fada854108 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_roce.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c
@@ -745,6 +745,7 @@ static int qed_roce_sp_destroy_qp_responder(struct qed_hwfn *p_hwfn,
 		DP_NOTICE(p_hwfn,
 			  "qed destroy responder failed: cannot allocate memory (ramrod). rc = %d\n",
 			  rc);
+		qed_sp_destroy_request(p_hwfn, p_ent);
 		return rc;
 	}
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h
index 04259df8a5c24..3157c0d994417 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h
@@ -399,6 +399,17 @@ struct qed_sp_init_data {
 	struct qed_spq_comp_cb *p_comp_data;
 };
 
+/**
+ * @brief Returns a SPQ entry to the pool / frees the entry if allocated.
+ *        Should be called on in error flows after initializing the SPQ entry
+ *        and before posting it.
+ *
+ * @param p_hwfn
+ * @param p_ent
+ */
+void qed_sp_destroy_request(struct qed_hwfn *p_hwfn,
+			    struct qed_spq_entry *p_ent);
+
 int qed_sp_init_request(struct qed_hwfn *p_hwfn,
 			struct qed_spq_entry **pp_ent,
 			u8 cmd,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
index e86a1ea236133..888274fa208bc 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
@@ -47,6 +47,19 @@
 #include "qed_sp.h"
 #include "qed_sriov.h"
 
+void qed_sp_destroy_request(struct qed_hwfn *p_hwfn,
+			    struct qed_spq_entry *p_ent)
+{
+	/* qed_spq_get_entry() can either get an entry from the free_pool,
+	 * or, if no entries are left, allocate a new entry and add it to
+	 * the unlimited_pending list.
+	 */
+	if (p_ent->queue == &p_hwfn->p_spq->unlimited_pending)
+		kfree(p_ent);
+	else
+		qed_spq_return_entry(p_hwfn, p_ent);
+}
+
 int qed_sp_init_request(struct qed_hwfn *p_hwfn,
 			struct qed_spq_entry **pp_ent,
 			u8 cmd, u8 protocol, struct qed_sp_init_data *p_data)
@@ -111,14 +124,7 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn,
 	return 0;
 
 err:
-	/* qed_spq_get_entry() can either get an entry from the free_pool,
-	 * or, if no entries are left, allocate a new entry and add it to
-	 * the unlimited_pending list.
-	 */
-	if (p_ent->queue == &p_hwfn->p_spq->unlimited_pending)
-		kfree(p_ent);
-	else
-		qed_spq_return_entry(p_hwfn, p_ent);
+	qed_sp_destroy_request(p_hwfn, p_ent);
 
 	return -EINVAL;
 }
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
index 9b08a9d9e1513..ca6290fa0f309 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
@@ -101,6 +101,7 @@ static int qed_sp_vf_start(struct qed_hwfn *p_hwfn, struct qed_vf_info *p_vf)
 	default:
 		DP_NOTICE(p_hwfn, "Unknown VF personality %d\n",
 			  p_hwfn->hw_info.personality);
+		qed_sp_destroy_request(p_hwfn, p_ent);
 		return -EINVAL;
 	}
 
-- 
GitLab


From fa5c448d98f0df660bfcad3dd5facc027ef84cd3 Mon Sep 17 00:00:00 2001
From: Sagiv Ozeri <sagiv.ozeri@cavium.com>
Date: Thu, 8 Nov 2018 16:46:11 +0200
Subject: [PATCH 0844/1890] qed: Fix potential memory corruption

A stuck ramrod should be deleted from the completion_pending list,
otherwise it will be added again in the future and corrupt the list.

Return error value to inform that ramrod is stuck and should be deleted.

Signed-off-by: Sagiv Ozeri <sagiv.ozeri@cavium.com>
Signed-off-by: Denis Bolotin <denis.bolotin@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_spq.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c
index c1a81ec0524b8..0a9c5bb0fa486 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_spq.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c
@@ -142,6 +142,7 @@ static int qed_spq_block(struct qed_hwfn *p_hwfn,
 
 	DP_INFO(p_hwfn, "Ramrod is stuck, requesting MCP drain\n");
 	rc = qed_mcp_drain(p_hwfn, p_ptt);
+	qed_ptt_release(p_hwfn, p_ptt);
 	if (rc) {
 		DP_NOTICE(p_hwfn, "MCP drain failed\n");
 		goto err;
@@ -150,18 +151,15 @@ static int qed_spq_block(struct qed_hwfn *p_hwfn,
 	/* Retry after drain */
 	rc = __qed_spq_block(p_hwfn, p_ent, p_fw_ret, true);
 	if (!rc)
-		goto out;
+		return 0;
 
 	comp_done = (struct qed_spq_comp_done *)p_ent->comp_cb.cookie;
-	if (comp_done->done == 1)
+	if (comp_done->done == 1) {
 		if (p_fw_ret)
 			*p_fw_ret = comp_done->fw_return_code;
-out:
-	qed_ptt_release(p_hwfn, p_ptt);
-	return 0;
-
+		return 0;
+	}
 err:
-	qed_ptt_release(p_hwfn, p_ptt);
 	DP_NOTICE(p_hwfn,
 		  "Ramrod is stuck [CID %08x cmd %02x protocol %02x echo %04x]\n",
 		  le32_to_cpu(p_ent->elem.hdr.cid),
-- 
GitLab


From 8137b6ef0ce469154e5cf19f8e7fe04d9a72ac5e Mon Sep 17 00:00:00 2001
From: Thor Thayer <thor.thayer@linux.intel.com>
Date: Thu, 8 Nov 2018 11:42:14 -0600
Subject: [PATCH 0845/1890] net: stmmac: Fix RX packet size > 8191

Ping problems with packets > 8191 as shown:

PING 192.168.1.99 (192.168.1.99) 8150(8178) bytes of data.
8158 bytes from 192.168.1.99: icmp_seq=1 ttl=64 time=0.669 ms
wrong data byte 8144 should be 0xd0 but was 0x0
16    10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f
      20 21 22 23 24 25 26 27 28 29 2a 2b 2c 2d 2e 2f
%< ---------------snip--------------------------------------
8112  b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 ba bb bc bd be bf
      c0 c1 c2 c3 c4 c5 c6 c7 c8 c9 ca cb cc cd ce cf
8144  0 0 0 0 d0 d1
      ^^^^^^^
Notice the 4 bytes of 0 before the expected byte of d0.

Databook notes that the RX buffer must be a multiple of 4/8/16
bytes [1].

Update the DMA Buffer size define to 8188 instead of 8192. Remove
the -1 from the RX buffer size allocations and use the new
DMA Buffer size directly.

[1] Synopsys DesignWare Cores Ethernet MAC Universal v3.70a
    [section 8.4.2 - Table 8-24]

Tested on SoCFPGA Stratix10 with ping sweep from 100 to 8300 byte packets.

Fixes: 286a83721720 ("stmmac: add CHAINED descriptor mode support (V4)")
Suggested-by: Jose Abreu <jose.abreu@synopsys.com>
Signed-off-by: Thor Thayer <thor.thayer@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/common.h    | 3 ++-
 drivers/net/ethernet/stmicro/stmmac/descs_com.h | 2 +-
 drivers/net/ethernet/stmicro/stmmac/enh_desc.c  | 2 +-
 drivers/net/ethernet/stmicro/stmmac/ring_mode.c | 2 +-
 4 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index b1b305f8f4143..272b9ca663148 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -365,7 +365,8 @@ struct dma_features {
 
 /* GMAC TX FIFO is 8K, Rx FIFO is 16K */
 #define BUF_SIZE_16KiB 16384
-#define BUF_SIZE_8KiB 8192
+/* RX Buffer size must be < 8191 and multiple of 4/8/16 bytes */
+#define BUF_SIZE_8KiB 8188
 #define BUF_SIZE_4KiB 4096
 #define BUF_SIZE_2KiB 2048
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/descs_com.h b/drivers/net/ethernet/stmicro/stmmac/descs_com.h
index ca9d7e48034ce..40d6356a7e73c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/descs_com.h
+++ b/drivers/net/ethernet/stmicro/stmmac/descs_com.h
@@ -31,7 +31,7 @@
 /* Enhanced descriptors */
 static inline void ehn_desc_rx_set_on_ring(struct dma_desc *p, int end)
 {
-	p->des1 |= cpu_to_le32(((BUF_SIZE_8KiB - 1)
+	p->des1 |= cpu_to_le32((BUF_SIZE_8KiB
 			<< ERDES1_BUFFER2_SIZE_SHIFT)
 		   & ERDES1_BUFFER2_SIZE_MASK);
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
index 77914c89d7497..5ef91a790f9d1 100644
--- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
@@ -262,7 +262,7 @@ static void enh_desc_init_rx_desc(struct dma_desc *p, int disable_rx_ic,
 				  int mode, int end)
 {
 	p->des0 |= cpu_to_le32(RDES0_OWN);
-	p->des1 |= cpu_to_le32((BUF_SIZE_8KiB - 1) & ERDES1_BUFFER1_SIZE_MASK);
+	p->des1 |= cpu_to_le32(BUF_SIZE_8KiB & ERDES1_BUFFER1_SIZE_MASK);
 
 	if (mode == STMMAC_CHAIN_MODE)
 		ehn_desc_rx_set_on_chain(p);
diff --git a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c
index abc3f85270cd0..d8c5bc4122195 100644
--- a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c
+++ b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c
@@ -140,7 +140,7 @@ static void clean_desc3(void *priv_ptr, struct dma_desc *p)
 static int set_16kib_bfsize(int mtu)
 {
 	int ret = 0;
-	if (unlikely(mtu >= BUF_SIZE_8KiB))
+	if (unlikely(mtu > BUF_SIZE_8KiB))
 		ret = BUF_SIZE_16KiB;
 	return ret;
 }
-- 
GitLab


From 85b18b0237ce9986a81a1b9534b5e2ee116f5504 Mon Sep 17 00:00:00 2001
From: Stefan Wahren <stefan.wahren@i2se.com>
Date: Thu, 8 Nov 2018 20:38:26 +0100
Subject: [PATCH 0846/1890] net: smsc95xx: Fix MTU range

The commit f77f0aee4da4 ("net: use core MTU range checking in USB NIC
drivers") introduce a common MTU handling for usbnet. But it's missing
the necessary changes for smsc95xx. So set the MTU range accordingly.

This patch has been tested on a Raspberry Pi 3.

Fixes: f77f0aee4da4 ("net: use core MTU range checking in USB NIC drivers")
Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/smsc95xx.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index 2d17f3b9bb165..f2d01cb6f958c 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -1321,6 +1321,8 @@ static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf)
 	dev->net->ethtool_ops = &smsc95xx_ethtool_ops;
 	dev->net->flags |= IFF_MULTICAST;
 	dev->net->hard_header_len += SMSC95XX_TX_OVERHEAD_CSUM;
+	dev->net->min_mtu = ETH_MIN_MTU;
+	dev->net->max_mtu = ETH_DATA_LEN;
 	dev->hard_mtu = dev->net->mtu + dev->net->hard_header_len;
 
 	pdata->dev = dev;
-- 
GitLab


From 1457d8cf7664f34c4ba534c1073821a559a2f6f9 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Wed, 7 Nov 2018 18:01:00 +0100
Subject: [PATCH 0847/1890] x86/xen: fix pv boot

Commit 9da3f2b7405440 ("x86/fault: BUG() when uaccess helpers fault on
kernel addresses") introduced a regression for booting Xen PV guests.

Xen PV guests are using __put_user() and __get_user() for accessing the
p2m map (physical to machine frame number map) as accesses might fail
in case of not populated areas of the map.

With above commit using __put_user() and __get_user() for accessing
kernel pages is no longer valid. So replace the Xen hack by adding
appropriate p2m access functions using the default fixup handler.

Fixes: 9da3f2b7405440 ("x86/fault: BUG() when uaccess helpers fault on kernel addresses")
Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/x86/include/asm/xen/page.h | 35 +++++++++++++++++++++++++++++----
 arch/x86/xen/p2m.c              |  3 +--
 2 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 123e669bf363d..790ce08e41f20 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -9,7 +9,7 @@
 #include <linux/mm.h>
 #include <linux/device.h>
 
-#include <linux/uaccess.h>
+#include <asm/extable.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 
@@ -93,12 +93,39 @@ clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
  */
 static inline int xen_safe_write_ulong(unsigned long *addr, unsigned long val)
 {
-	return __put_user(val, (unsigned long __user *)addr);
+	int ret = 0;
+
+	asm volatile("1: mov %[val], %[ptr]\n"
+		     "2:\n"
+		     ".section .fixup, \"ax\"\n"
+		     "3: sub $1, %[ret]\n"
+		     "   jmp 2b\n"
+		     ".previous\n"
+		     _ASM_EXTABLE(1b, 3b)
+		     : [ret] "+r" (ret), [ptr] "=m" (*addr)
+		     : [val] "r" (val));
+
+	return ret;
 }
 
-static inline int xen_safe_read_ulong(unsigned long *addr, unsigned long *val)
+static inline int xen_safe_read_ulong(const unsigned long *addr,
+				      unsigned long *val)
 {
-	return __get_user(*val, (unsigned long __user *)addr);
+	int ret = 0;
+	unsigned long rval = ~0ul;
+
+	asm volatile("1: mov %[ptr], %[rval]\n"
+		     "2:\n"
+		     ".section .fixup, \"ax\"\n"
+		     "3: sub $1, %[ret]\n"
+		     "   jmp 2b\n"
+		     ".previous\n"
+		     _ASM_EXTABLE(1b, 3b)
+		     : [ret] "+r" (ret), [rval] "+r" (rval)
+		     : [ptr] "m" (*addr));
+	*val = rval;
+
+	return ret;
 }
 
 #ifdef CONFIG_XEN_PV
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index d6d74efd8912a..4fe84436d5a7e 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -656,8 +656,7 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
 
 	/*
 	 * The interface requires atomic updates on p2m elements.
-	 * xen_safe_write_ulong() is using __put_user which does an atomic
-	 * store via asm().
+	 * xen_safe_write_ulong() is using an atomic store via asm().
 	 */
 	if (likely(!xen_safe_write_ulong(xen_p2m_addr + pfn, mfn)))
 		return true;
-- 
GitLab


From b13b8787c95c156b093305e9f006a6fb7d9119b1 Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Wed, 7 Nov 2018 13:36:07 -0800
Subject: [PATCH 0848/1890] bpf: Fix IPv6 dport byte order in bpf_sk_lookup_udp

Lookup functions in sk_lookup have different expectations about byte
order of provided arguments.

Specifically __inet_lookup, __udp4_lib_lookup and __udp6_lib_lookup
expect dport to be in network byte order and do ntohs(dport) internally.

At the same time __inet6_lookup expects dport to be in host byte order
and correspondingly name the argument hnum.

sk_lookup works correctly with __inet_lookup, __udp4_lib_lookup and
__inet6_lookup with regard to dport. But in __udp6_lib_lookup case it
uses host instead of expected network byte order. It makes result
returned by bpf_sk_lookup_udp for IPv6 incorrect.

The patch fixes byte order of dport passed to __udp6_lib_lookup.

Originally sk_lookup properly handled UDPv6, but not TCPv6. 5ef0ae84f02a
fixes TCPv6 but breaks UDPv6.

Fixes: 5ef0ae84f02a ("bpf: Fix IPv6 dport byte-order in bpf_sk_lookup")
Signed-off-by: Andrey Ignatov <rdna@fb.com>
Acked-by: Joe Stringer <joe@wand.net.nz>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 net/core/filter.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/net/core/filter.c b/net/core/filter.c
index e521c5ebc7d11..9a1327eb25faf 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4852,18 +4852,17 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
 	} else {
 		struct in6_addr *src6 = (struct in6_addr *)&tuple->ipv6.saddr;
 		struct in6_addr *dst6 = (struct in6_addr *)&tuple->ipv6.daddr;
-		u16 hnum = ntohs(tuple->ipv6.dport);
 		int sdif = inet6_sdif(skb);
 
 		if (proto == IPPROTO_TCP)
 			sk = __inet6_lookup(net, &tcp_hashinfo, skb, 0,
 					    src6, tuple->ipv6.sport,
-					    dst6, hnum,
+					    dst6, ntohs(tuple->ipv6.dport),
 					    dif, sdif, &refcounted);
 		else if (likely(ipv6_bpf_stub))
 			sk = ipv6_bpf_stub->udp6_lib_lookup(net,
 							    src6, tuple->ipv6.sport,
-							    dst6, hnum,
+							    dst6, tuple->ipv6.dport,
 							    dif, sdif,
 							    &udp_table, skb);
 #endif
-- 
GitLab


From 49a249c387268882e8393dd1fafc51e3b21cb7a2 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Wed, 7 Nov 2018 19:55:36 -0800
Subject: [PATCH 0849/1890] tools/bpftool: copy a few net uapi headers to tools
 directory

Commit f6f3bac08ff9 ("tools/bpf: bpftool: add net support")
added certain networking support to bpftool.
The implementation relies on a relatively recent uapi header file
linux/tc_act/tc_bpf.h on the host which contains the marco
definition of TCA_ACT_BPF_ID.

Unfortunately, this is not the case for all distributions.
See the email message below where rhel-7.2 does not have
an up-to-date linux/tc_act/tc_bpf.h.
  https://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1799211.html
Further investigation found that linux/pkt_cls.h is also needed for macro
TCA_BPF_TAG.

This patch fixed the issue by copying linux/tc_act/tc_bpf.h
and linux/pkt_cls.h from kernel include/uapi directory to
tools/include/uapi directory so building the bpftool does not depend
on host system for these files.

Fixes: f6f3bac08ff9 ("tools/bpf: bpftool: add net support")
Reported-by: kernel test robot <rong.a.chen@intel.com>
Cc: Li Zhijian <zhijianx.li@intel.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/include/uapi/linux/pkt_cls.h       | 612 +++++++++++++++++++++++
 tools/include/uapi/linux/tc_act/tc_bpf.h |  37 ++
 2 files changed, 649 insertions(+)
 create mode 100644 tools/include/uapi/linux/pkt_cls.h
 create mode 100644 tools/include/uapi/linux/tc_act/tc_bpf.h

diff --git a/tools/include/uapi/linux/pkt_cls.h b/tools/include/uapi/linux/pkt_cls.h
new file mode 100644
index 0000000000000..401d0c1e612d3
--- /dev/null
+++ b/tools/include/uapi/linux/pkt_cls.h
@@ -0,0 +1,612 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __LINUX_PKT_CLS_H
+#define __LINUX_PKT_CLS_H
+
+#include <linux/types.h>
+#include <linux/pkt_sched.h>
+
+#define TC_COOKIE_MAX_SIZE 16
+
+/* Action attributes */
+enum {
+	TCA_ACT_UNSPEC,
+	TCA_ACT_KIND,
+	TCA_ACT_OPTIONS,
+	TCA_ACT_INDEX,
+	TCA_ACT_STATS,
+	TCA_ACT_PAD,
+	TCA_ACT_COOKIE,
+	__TCA_ACT_MAX
+};
+
+#define TCA_ACT_MAX __TCA_ACT_MAX
+#define TCA_OLD_COMPAT (TCA_ACT_MAX+1)
+#define TCA_ACT_MAX_PRIO 32
+#define TCA_ACT_BIND	1
+#define TCA_ACT_NOBIND	0
+#define TCA_ACT_UNBIND	1
+#define TCA_ACT_NOUNBIND	0
+#define TCA_ACT_REPLACE		1
+#define TCA_ACT_NOREPLACE	0
+
+#define TC_ACT_UNSPEC	(-1)
+#define TC_ACT_OK		0
+#define TC_ACT_RECLASSIFY	1
+#define TC_ACT_SHOT		2
+#define TC_ACT_PIPE		3
+#define TC_ACT_STOLEN		4
+#define TC_ACT_QUEUED		5
+#define TC_ACT_REPEAT		6
+#define TC_ACT_REDIRECT		7
+#define TC_ACT_TRAP		8 /* For hw path, this means "trap to cpu"
+				   * and don't further process the frame
+				   * in hardware. For sw path, this is
+				   * equivalent of TC_ACT_STOLEN - drop
+				   * the skb and act like everything
+				   * is alright.
+				   */
+#define TC_ACT_VALUE_MAX	TC_ACT_TRAP
+
+/* There is a special kind of actions called "extended actions",
+ * which need a value parameter. These have a local opcode located in
+ * the highest nibble, starting from 1. The rest of the bits
+ * are used to carry the value. These two parts together make
+ * a combined opcode.
+ */
+#define __TC_ACT_EXT_SHIFT 28
+#define __TC_ACT_EXT(local) ((local) << __TC_ACT_EXT_SHIFT)
+#define TC_ACT_EXT_VAL_MASK ((1 << __TC_ACT_EXT_SHIFT) - 1)
+#define TC_ACT_EXT_OPCODE(combined) ((combined) & (~TC_ACT_EXT_VAL_MASK))
+#define TC_ACT_EXT_CMP(combined, opcode) (TC_ACT_EXT_OPCODE(combined) == opcode)
+
+#define TC_ACT_JUMP __TC_ACT_EXT(1)
+#define TC_ACT_GOTO_CHAIN __TC_ACT_EXT(2)
+#define TC_ACT_EXT_OPCODE_MAX	TC_ACT_GOTO_CHAIN
+
+/* Action type identifiers*/
+enum {
+	TCA_ID_UNSPEC=0,
+	TCA_ID_POLICE=1,
+	/* other actions go here */
+	__TCA_ID_MAX=255
+};
+
+#define TCA_ID_MAX __TCA_ID_MAX
+
+struct tc_police {
+	__u32			index;
+	int			action;
+#define TC_POLICE_UNSPEC	TC_ACT_UNSPEC
+#define TC_POLICE_OK		TC_ACT_OK
+#define TC_POLICE_RECLASSIFY	TC_ACT_RECLASSIFY
+#define TC_POLICE_SHOT		TC_ACT_SHOT
+#define TC_POLICE_PIPE		TC_ACT_PIPE
+
+	__u32			limit;
+	__u32			burst;
+	__u32			mtu;
+	struct tc_ratespec	rate;
+	struct tc_ratespec	peakrate;
+	int			refcnt;
+	int			bindcnt;
+	__u32			capab;
+};
+
+struct tcf_t {
+	__u64   install;
+	__u64   lastuse;
+	__u64   expires;
+	__u64   firstuse;
+};
+
+struct tc_cnt {
+	int                   refcnt;
+	int                   bindcnt;
+};
+
+#define tc_gen \
+	__u32                 index; \
+	__u32                 capab; \
+	int                   action; \
+	int                   refcnt; \
+	int                   bindcnt
+
+enum {
+	TCA_POLICE_UNSPEC,
+	TCA_POLICE_TBF,
+	TCA_POLICE_RATE,
+	TCA_POLICE_PEAKRATE,
+	TCA_POLICE_AVRATE,
+	TCA_POLICE_RESULT,
+	TCA_POLICE_TM,
+	TCA_POLICE_PAD,
+	__TCA_POLICE_MAX
+#define TCA_POLICE_RESULT TCA_POLICE_RESULT
+};
+
+#define TCA_POLICE_MAX (__TCA_POLICE_MAX - 1)
+
+/* tca flags definitions */
+#define TCA_CLS_FLAGS_SKIP_HW	(1 << 0) /* don't offload filter to HW */
+#define TCA_CLS_FLAGS_SKIP_SW	(1 << 1) /* don't use filter in SW */
+#define TCA_CLS_FLAGS_IN_HW	(1 << 2) /* filter is offloaded to HW */
+#define TCA_CLS_FLAGS_NOT_IN_HW (1 << 3) /* filter isn't offloaded to HW */
+#define TCA_CLS_FLAGS_VERBOSE	(1 << 4) /* verbose logging */
+
+/* U32 filters */
+
+#define TC_U32_HTID(h) ((h)&0xFFF00000)
+#define TC_U32_USERHTID(h) (TC_U32_HTID(h)>>20)
+#define TC_U32_HASH(h) (((h)>>12)&0xFF)
+#define TC_U32_NODE(h) ((h)&0xFFF)
+#define TC_U32_KEY(h) ((h)&0xFFFFF)
+#define TC_U32_UNSPEC	0
+#define TC_U32_ROOT	(0xFFF00000)
+
+enum {
+	TCA_U32_UNSPEC,
+	TCA_U32_CLASSID,
+	TCA_U32_HASH,
+	TCA_U32_LINK,
+	TCA_U32_DIVISOR,
+	TCA_U32_SEL,
+	TCA_U32_POLICE,
+	TCA_U32_ACT,
+	TCA_U32_INDEV,
+	TCA_U32_PCNT,
+	TCA_U32_MARK,
+	TCA_U32_FLAGS,
+	TCA_U32_PAD,
+	__TCA_U32_MAX
+};
+
+#define TCA_U32_MAX (__TCA_U32_MAX - 1)
+
+struct tc_u32_key {
+	__be32		mask;
+	__be32		val;
+	int		off;
+	int		offmask;
+};
+
+struct tc_u32_sel {
+	unsigned char		flags;
+	unsigned char		offshift;
+	unsigned char		nkeys;
+
+	__be16			offmask;
+	__u16			off;
+	short			offoff;
+
+	short			hoff;
+	__be32			hmask;
+	struct tc_u32_key	keys[0];
+};
+
+struct tc_u32_mark {
+	__u32		val;
+	__u32		mask;
+	__u32		success;
+};
+
+struct tc_u32_pcnt {
+	__u64 rcnt;
+	__u64 rhit;
+	__u64 kcnts[0];
+};
+
+/* Flags */
+
+#define TC_U32_TERMINAL		1
+#define TC_U32_OFFSET		2
+#define TC_U32_VAROFFSET	4
+#define TC_U32_EAT		8
+
+#define TC_U32_MAXDEPTH 8
+
+
+/* RSVP filter */
+
+enum {
+	TCA_RSVP_UNSPEC,
+	TCA_RSVP_CLASSID,
+	TCA_RSVP_DST,
+	TCA_RSVP_SRC,
+	TCA_RSVP_PINFO,
+	TCA_RSVP_POLICE,
+	TCA_RSVP_ACT,
+	__TCA_RSVP_MAX
+};
+
+#define TCA_RSVP_MAX (__TCA_RSVP_MAX - 1 )
+
+struct tc_rsvp_gpi {
+	__u32	key;
+	__u32	mask;
+	int	offset;
+};
+
+struct tc_rsvp_pinfo {
+	struct tc_rsvp_gpi dpi;
+	struct tc_rsvp_gpi spi;
+	__u8	protocol;
+	__u8	tunnelid;
+	__u8	tunnelhdr;
+	__u8	pad;
+};
+
+/* ROUTE filter */
+
+enum {
+	TCA_ROUTE4_UNSPEC,
+	TCA_ROUTE4_CLASSID,
+	TCA_ROUTE4_TO,
+	TCA_ROUTE4_FROM,
+	TCA_ROUTE4_IIF,
+	TCA_ROUTE4_POLICE,
+	TCA_ROUTE4_ACT,
+	__TCA_ROUTE4_MAX
+};
+
+#define TCA_ROUTE4_MAX (__TCA_ROUTE4_MAX - 1)
+
+
+/* FW filter */
+
+enum {
+	TCA_FW_UNSPEC,
+	TCA_FW_CLASSID,
+	TCA_FW_POLICE,
+	TCA_FW_INDEV, /*  used by CONFIG_NET_CLS_IND */
+	TCA_FW_ACT, /* used by CONFIG_NET_CLS_ACT */
+	TCA_FW_MASK,
+	__TCA_FW_MAX
+};
+
+#define TCA_FW_MAX (__TCA_FW_MAX - 1)
+
+/* TC index filter */
+
+enum {
+	TCA_TCINDEX_UNSPEC,
+	TCA_TCINDEX_HASH,
+	TCA_TCINDEX_MASK,
+	TCA_TCINDEX_SHIFT,
+	TCA_TCINDEX_FALL_THROUGH,
+	TCA_TCINDEX_CLASSID,
+	TCA_TCINDEX_POLICE,
+	TCA_TCINDEX_ACT,
+	__TCA_TCINDEX_MAX
+};
+
+#define TCA_TCINDEX_MAX     (__TCA_TCINDEX_MAX - 1)
+
+/* Flow filter */
+
+enum {
+	FLOW_KEY_SRC,
+	FLOW_KEY_DST,
+	FLOW_KEY_PROTO,
+	FLOW_KEY_PROTO_SRC,
+	FLOW_KEY_PROTO_DST,
+	FLOW_KEY_IIF,
+	FLOW_KEY_PRIORITY,
+	FLOW_KEY_MARK,
+	FLOW_KEY_NFCT,
+	FLOW_KEY_NFCT_SRC,
+	FLOW_KEY_NFCT_DST,
+	FLOW_KEY_NFCT_PROTO_SRC,
+	FLOW_KEY_NFCT_PROTO_DST,
+	FLOW_KEY_RTCLASSID,
+	FLOW_KEY_SKUID,
+	FLOW_KEY_SKGID,
+	FLOW_KEY_VLAN_TAG,
+	FLOW_KEY_RXHASH,
+	__FLOW_KEY_MAX,
+};
+
+#define FLOW_KEY_MAX	(__FLOW_KEY_MAX - 1)
+
+enum {
+	FLOW_MODE_MAP,
+	FLOW_MODE_HASH,
+};
+
+enum {
+	TCA_FLOW_UNSPEC,
+	TCA_FLOW_KEYS,
+	TCA_FLOW_MODE,
+	TCA_FLOW_BASECLASS,
+	TCA_FLOW_RSHIFT,
+	TCA_FLOW_ADDEND,
+	TCA_FLOW_MASK,
+	TCA_FLOW_XOR,
+	TCA_FLOW_DIVISOR,
+	TCA_FLOW_ACT,
+	TCA_FLOW_POLICE,
+	TCA_FLOW_EMATCHES,
+	TCA_FLOW_PERTURB,
+	__TCA_FLOW_MAX
+};
+
+#define TCA_FLOW_MAX	(__TCA_FLOW_MAX - 1)
+
+/* Basic filter */
+
+enum {
+	TCA_BASIC_UNSPEC,
+	TCA_BASIC_CLASSID,
+	TCA_BASIC_EMATCHES,
+	TCA_BASIC_ACT,
+	TCA_BASIC_POLICE,
+	__TCA_BASIC_MAX
+};
+
+#define TCA_BASIC_MAX (__TCA_BASIC_MAX - 1)
+
+
+/* Cgroup classifier */
+
+enum {
+	TCA_CGROUP_UNSPEC,
+	TCA_CGROUP_ACT,
+	TCA_CGROUP_POLICE,
+	TCA_CGROUP_EMATCHES,
+	__TCA_CGROUP_MAX,
+};
+
+#define TCA_CGROUP_MAX (__TCA_CGROUP_MAX - 1)
+
+/* BPF classifier */
+
+#define TCA_BPF_FLAG_ACT_DIRECT		(1 << 0)
+
+enum {
+	TCA_BPF_UNSPEC,
+	TCA_BPF_ACT,
+	TCA_BPF_POLICE,
+	TCA_BPF_CLASSID,
+	TCA_BPF_OPS_LEN,
+	TCA_BPF_OPS,
+	TCA_BPF_FD,
+	TCA_BPF_NAME,
+	TCA_BPF_FLAGS,
+	TCA_BPF_FLAGS_GEN,
+	TCA_BPF_TAG,
+	TCA_BPF_ID,
+	__TCA_BPF_MAX,
+};
+
+#define TCA_BPF_MAX (__TCA_BPF_MAX - 1)
+
+/* Flower classifier */
+
+enum {
+	TCA_FLOWER_UNSPEC,
+	TCA_FLOWER_CLASSID,
+	TCA_FLOWER_INDEV,
+	TCA_FLOWER_ACT,
+	TCA_FLOWER_KEY_ETH_DST,		/* ETH_ALEN */
+	TCA_FLOWER_KEY_ETH_DST_MASK,	/* ETH_ALEN */
+	TCA_FLOWER_KEY_ETH_SRC,		/* ETH_ALEN */
+	TCA_FLOWER_KEY_ETH_SRC_MASK,	/* ETH_ALEN */
+	TCA_FLOWER_KEY_ETH_TYPE,	/* be16 */
+	TCA_FLOWER_KEY_IP_PROTO,	/* u8 */
+	TCA_FLOWER_KEY_IPV4_SRC,	/* be32 */
+	TCA_FLOWER_KEY_IPV4_SRC_MASK,	/* be32 */
+	TCA_FLOWER_KEY_IPV4_DST,	/* be32 */
+	TCA_FLOWER_KEY_IPV4_DST_MASK,	/* be32 */
+	TCA_FLOWER_KEY_IPV6_SRC,	/* struct in6_addr */
+	TCA_FLOWER_KEY_IPV6_SRC_MASK,	/* struct in6_addr */
+	TCA_FLOWER_KEY_IPV6_DST,	/* struct in6_addr */
+	TCA_FLOWER_KEY_IPV6_DST_MASK,	/* struct in6_addr */
+	TCA_FLOWER_KEY_TCP_SRC,		/* be16 */
+	TCA_FLOWER_KEY_TCP_DST,		/* be16 */
+	TCA_FLOWER_KEY_UDP_SRC,		/* be16 */
+	TCA_FLOWER_KEY_UDP_DST,		/* be16 */
+
+	TCA_FLOWER_FLAGS,
+	TCA_FLOWER_KEY_VLAN_ID,		/* be16 */
+	TCA_FLOWER_KEY_VLAN_PRIO,	/* u8   */
+	TCA_FLOWER_KEY_VLAN_ETH_TYPE,	/* be16 */
+
+	TCA_FLOWER_KEY_ENC_KEY_ID,	/* be32 */
+	TCA_FLOWER_KEY_ENC_IPV4_SRC,	/* be32 */
+	TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK,/* be32 */
+	TCA_FLOWER_KEY_ENC_IPV4_DST,	/* be32 */
+	TCA_FLOWER_KEY_ENC_IPV4_DST_MASK,/* be32 */
+	TCA_FLOWER_KEY_ENC_IPV6_SRC,	/* struct in6_addr */
+	TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK,/* struct in6_addr */
+	TCA_FLOWER_KEY_ENC_IPV6_DST,	/* struct in6_addr */
+	TCA_FLOWER_KEY_ENC_IPV6_DST_MASK,/* struct in6_addr */
+
+	TCA_FLOWER_KEY_TCP_SRC_MASK,	/* be16 */
+	TCA_FLOWER_KEY_TCP_DST_MASK,	/* be16 */
+	TCA_FLOWER_KEY_UDP_SRC_MASK,	/* be16 */
+	TCA_FLOWER_KEY_UDP_DST_MASK,	/* be16 */
+	TCA_FLOWER_KEY_SCTP_SRC_MASK,	/* be16 */
+	TCA_FLOWER_KEY_SCTP_DST_MASK,	/* be16 */
+
+	TCA_FLOWER_KEY_SCTP_SRC,	/* be16 */
+	TCA_FLOWER_KEY_SCTP_DST,	/* be16 */
+
+	TCA_FLOWER_KEY_ENC_UDP_SRC_PORT,	/* be16 */
+	TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK,	/* be16 */
+	TCA_FLOWER_KEY_ENC_UDP_DST_PORT,	/* be16 */
+	TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK,	/* be16 */
+
+	TCA_FLOWER_KEY_FLAGS,		/* be32 */
+	TCA_FLOWER_KEY_FLAGS_MASK,	/* be32 */
+
+	TCA_FLOWER_KEY_ICMPV4_CODE,	/* u8 */
+	TCA_FLOWER_KEY_ICMPV4_CODE_MASK,/* u8 */
+	TCA_FLOWER_KEY_ICMPV4_TYPE,	/* u8 */
+	TCA_FLOWER_KEY_ICMPV4_TYPE_MASK,/* u8 */
+	TCA_FLOWER_KEY_ICMPV6_CODE,	/* u8 */
+	TCA_FLOWER_KEY_ICMPV6_CODE_MASK,/* u8 */
+	TCA_FLOWER_KEY_ICMPV6_TYPE,	/* u8 */
+	TCA_FLOWER_KEY_ICMPV6_TYPE_MASK,/* u8 */
+
+	TCA_FLOWER_KEY_ARP_SIP,		/* be32 */
+	TCA_FLOWER_KEY_ARP_SIP_MASK,	/* be32 */
+	TCA_FLOWER_KEY_ARP_TIP,		/* be32 */
+	TCA_FLOWER_KEY_ARP_TIP_MASK,	/* be32 */
+	TCA_FLOWER_KEY_ARP_OP,		/* u8 */
+	TCA_FLOWER_KEY_ARP_OP_MASK,	/* u8 */
+	TCA_FLOWER_KEY_ARP_SHA,		/* ETH_ALEN */
+	TCA_FLOWER_KEY_ARP_SHA_MASK,	/* ETH_ALEN */
+	TCA_FLOWER_KEY_ARP_THA,		/* ETH_ALEN */
+	TCA_FLOWER_KEY_ARP_THA_MASK,	/* ETH_ALEN */
+
+	TCA_FLOWER_KEY_MPLS_TTL,	/* u8 - 8 bits */
+	TCA_FLOWER_KEY_MPLS_BOS,	/* u8 - 1 bit */
+	TCA_FLOWER_KEY_MPLS_TC,		/* u8 - 3 bits */
+	TCA_FLOWER_KEY_MPLS_LABEL,	/* be32 - 20 bits */
+
+	TCA_FLOWER_KEY_TCP_FLAGS,	/* be16 */
+	TCA_FLOWER_KEY_TCP_FLAGS_MASK,	/* be16 */
+
+	TCA_FLOWER_KEY_IP_TOS,		/* u8 */
+	TCA_FLOWER_KEY_IP_TOS_MASK,	/* u8 */
+	TCA_FLOWER_KEY_IP_TTL,		/* u8 */
+	TCA_FLOWER_KEY_IP_TTL_MASK,	/* u8 */
+
+	TCA_FLOWER_KEY_CVLAN_ID,	/* be16 */
+	TCA_FLOWER_KEY_CVLAN_PRIO,	/* u8   */
+	TCA_FLOWER_KEY_CVLAN_ETH_TYPE,	/* be16 */
+
+	TCA_FLOWER_KEY_ENC_IP_TOS,	/* u8 */
+	TCA_FLOWER_KEY_ENC_IP_TOS_MASK,	/* u8 */
+	TCA_FLOWER_KEY_ENC_IP_TTL,	/* u8 */
+	TCA_FLOWER_KEY_ENC_IP_TTL_MASK,	/* u8 */
+
+	TCA_FLOWER_KEY_ENC_OPTS,
+	TCA_FLOWER_KEY_ENC_OPTS_MASK,
+
+	TCA_FLOWER_IN_HW_COUNT,
+
+	__TCA_FLOWER_MAX,
+};
+
+#define TCA_FLOWER_MAX (__TCA_FLOWER_MAX - 1)
+
+enum {
+	TCA_FLOWER_KEY_ENC_OPTS_UNSPEC,
+	TCA_FLOWER_KEY_ENC_OPTS_GENEVE, /* Nested
+					 * TCA_FLOWER_KEY_ENC_OPT_GENEVE_
+					 * attributes
+					 */
+	__TCA_FLOWER_KEY_ENC_OPTS_MAX,
+};
+
+#define TCA_FLOWER_KEY_ENC_OPTS_MAX (__TCA_FLOWER_KEY_ENC_OPTS_MAX - 1)
+
+enum {
+	TCA_FLOWER_KEY_ENC_OPT_GENEVE_UNSPEC,
+	TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS,            /* u16 */
+	TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE,             /* u8 */
+	TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA,             /* 4 to 128 bytes */
+
+	__TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX,
+};
+
+#define TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX \
+		(__TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX - 1)
+
+enum {
+	TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0),
+	TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1),
+};
+
+/* Match-all classifier */
+
+enum {
+	TCA_MATCHALL_UNSPEC,
+	TCA_MATCHALL_CLASSID,
+	TCA_MATCHALL_ACT,
+	TCA_MATCHALL_FLAGS,
+	__TCA_MATCHALL_MAX,
+};
+
+#define TCA_MATCHALL_MAX (__TCA_MATCHALL_MAX - 1)
+
+/* Extended Matches */
+
+struct tcf_ematch_tree_hdr {
+	__u16		nmatches;
+	__u16		progid;
+};
+
+enum {
+	TCA_EMATCH_TREE_UNSPEC,
+	TCA_EMATCH_TREE_HDR,
+	TCA_EMATCH_TREE_LIST,
+	__TCA_EMATCH_TREE_MAX
+};
+#define TCA_EMATCH_TREE_MAX (__TCA_EMATCH_TREE_MAX - 1)
+
+struct tcf_ematch_hdr {
+	__u16		matchid;
+	__u16		kind;
+	__u16		flags;
+	__u16		pad; /* currently unused */
+};
+
+/*  0                   1
+ *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 
+ * +-----------------------+-+-+---+
+ * |         Unused        |S|I| R |
+ * +-----------------------+-+-+---+
+ *
+ * R(2) ::= relation to next ematch
+ *          where: 0 0 END (last ematch)
+ *                 0 1 AND
+ *                 1 0 OR
+ *                 1 1 Unused (invalid)
+ * I(1) ::= invert result
+ * S(1) ::= simple payload
+ */
+#define TCF_EM_REL_END	0
+#define TCF_EM_REL_AND	(1<<0)
+#define TCF_EM_REL_OR	(1<<1)
+#define TCF_EM_INVERT	(1<<2)
+#define TCF_EM_SIMPLE	(1<<3)
+
+#define TCF_EM_REL_MASK	3
+#define TCF_EM_REL_VALID(v) (((v) & TCF_EM_REL_MASK) != TCF_EM_REL_MASK)
+
+enum {
+	TCF_LAYER_LINK,
+	TCF_LAYER_NETWORK,
+	TCF_LAYER_TRANSPORT,
+	__TCF_LAYER_MAX
+};
+#define TCF_LAYER_MAX (__TCF_LAYER_MAX - 1)
+
+/* Ematch type assignments
+ *   1..32767		Reserved for ematches inside kernel tree
+ *   32768..65535	Free to use, not reliable
+ */
+#define	TCF_EM_CONTAINER	0
+#define	TCF_EM_CMP		1
+#define	TCF_EM_NBYTE		2
+#define	TCF_EM_U32		3
+#define	TCF_EM_META		4
+#define	TCF_EM_TEXT		5
+#define	TCF_EM_VLAN		6
+#define	TCF_EM_CANID		7
+#define	TCF_EM_IPSET		8
+#define	TCF_EM_IPT		9
+#define	TCF_EM_MAX		9
+
+enum {
+	TCF_EM_PROG_TC
+};
+
+enum {
+	TCF_EM_OPND_EQ,
+	TCF_EM_OPND_GT,
+	TCF_EM_OPND_LT
+};
+
+#endif
diff --git a/tools/include/uapi/linux/tc_act/tc_bpf.h b/tools/include/uapi/linux/tc_act/tc_bpf.h
new file mode 100644
index 0000000000000..6e89a5df49a46
--- /dev/null
+++ b/tools/include/uapi/linux/tc_act/tc_bpf.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __LINUX_TC_BPF_H
+#define __LINUX_TC_BPF_H
+
+#include <linux/pkt_cls.h>
+
+#define TCA_ACT_BPF 13
+
+struct tc_act_bpf {
+	tc_gen;
+};
+
+enum {
+	TCA_ACT_BPF_UNSPEC,
+	TCA_ACT_BPF_TM,
+	TCA_ACT_BPF_PARMS,
+	TCA_ACT_BPF_OPS_LEN,
+	TCA_ACT_BPF_OPS,
+	TCA_ACT_BPF_FD,
+	TCA_ACT_BPF_NAME,
+	TCA_ACT_BPF_PAD,
+	TCA_ACT_BPF_TAG,
+	TCA_ACT_BPF_ID,
+	__TCA_ACT_BPF_MAX,
+};
+#define TCA_ACT_BPF_MAX (__TCA_ACT_BPF_MAX - 1)
+
+#endif
-- 
GitLab


From 53909030aa29bffe1f8490df62176c2375135652 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Thu, 8 Nov 2018 11:52:25 +0000
Subject: [PATCH 0850/1890] tools: bpftool: prevent infinite loop in
 get_fdinfo()

Function getline() returns -1 on failure to read a line, thus creating
an infinite loop in get_fdinfo() if the key is not found. Fix it by
calling the function only as long as we get a strictly positive return
value.

Found by copying the code for a key which is not always present...

Fixes: 71bb428fe2c1 ("tools: bpf: add bpftool")
Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index 25af85304ebee..e070b00ac3094 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -304,7 +304,7 @@ char *get_fdinfo(int fd, const char *key)
 		return NULL;
 	}
 
-	while ((n = getline(&line, &line_n, fdi))) {
+	while ((n = getline(&line, &line_n, fdi)) > 0) {
 		char *value;
 		int len;
 
-- 
GitLab


From a8bfd2bc29f3bf35c7b588b102662375b0184475 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Thu, 8 Nov 2018 11:52:26 +0000
Subject: [PATCH 0851/1890] tools: bpftool: fix plain output and doc for
 --bpffs option

Edit the documentation of the -f|--bpffs option to make it explicit that
it dumps paths of pinned programs when bpftool is used to list the
programs only, so that users do not believe they will see the name of
the newly pinned program with "bpftool prog pin" or "bpftool prog load".

Also fix the plain output: do not add a blank line after each program
block, in order to remain consistent with what bpftool does when the
option is not passed.

Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/Documentation/bpftool-prog.rst | 3 ++-
 tools/bpf/bpftool/prog.c                         | 3 +--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index ac4e904b10fbd..81fb97acfaeb4 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -124,7 +124,8 @@ OPTIONS
 		  Generate human-readable JSON output. Implies **-j**.
 
 	-f, --bpffs
-		  Show file names of pinned programs.
+		  When showing BPF programs, show file names of pinned
+		  programs.
 
 EXAMPLES
 ========
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 5302ee282409e..9785244acc7be 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -357,10 +357,9 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd)
 	if (!hash_empty(prog_table.table)) {
 		struct pinned_obj *obj;
 
-		printf("\n");
 		hash_for_each_possible(prog_table.table, obj, hash, info->id) {
 			if (obj->id == info->id)
-				printf("\tpinned %s\n", obj->path);
+				printf("\n\tpinned %s", obj->path);
 		}
 	}
 
-- 
GitLab


From f120919f9905a2cad9dea792a28a11fb623f72c1 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Thu, 8 Nov 2018 11:52:27 +0000
Subject: [PATCH 0852/1890] tools: bpftool: pass an argument to silence
 open_obj_pinned()

Function open_obj_pinned() prints error messages when it fails to open a
link in the BPF virtual file system. However, in some occasions it is
not desirable to print an error, for example when we parse all links
under the bpffs root, and the error is due to some paths actually being
symbolic links.

Example output:

    # ls -l /sys/fs/bpf/
    lrwxrwxrwx 1 root root 0 Oct 18 19:00 ip -> /sys/fs/bpf/tc/
    drwx------ 3 root root 0 Oct 18 19:00 tc
    lrwxrwxrwx 1 root root 0 Oct 18 19:00 xdp -> /sys/fs/bpf/tc/

    # bpftool --bpffs prog show
    Error: bpf obj get (/sys/fs/bpf): Permission denied
    Error: bpf obj get (/sys/fs/bpf): Permission denied

    # strace -e bpf bpftool --bpffs prog show
    bpf(BPF_OBJ_GET, {pathname="/sys/fs/bpf/ip", bpf_fd=0}, 72) = -1 EACCES (Permission denied)
    Error: bpf obj get (/sys/fs/bpf): Permission denied
    bpf(BPF_OBJ_GET, {pathname="/sys/fs/bpf/xdp", bpf_fd=0}, 72) = -1 EACCES (Permission denied)
    Error: bpf obj get (/sys/fs/bpf): Permission denied
    ...

To fix it, pass a bool as a second argument to the function, and prevent
it from printing an error when the argument is set to true.

Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/common.c | 15 ++++++++-------
 tools/bpf/bpftool/main.h   |  2 +-
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index e070b00ac3094..70fd48d79f611 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -130,16 +130,17 @@ static int mnt_bpffs(const char *target, char *buff, size_t bufflen)
 	return 0;
 }
 
-int open_obj_pinned(char *path)
+int open_obj_pinned(char *path, bool quiet)
 {
 	int fd;
 
 	fd = bpf_obj_get(path);
 	if (fd < 0) {
-		p_err("bpf obj get (%s): %s", path,
-		      errno == EACCES && !is_bpffs(dirname(path)) ?
-		    "directory not in bpf file system (bpffs)" :
-		    strerror(errno));
+		if (!quiet)
+			p_err("bpf obj get (%s): %s", path,
+			      errno == EACCES && !is_bpffs(dirname(path)) ?
+			    "directory not in bpf file system (bpffs)" :
+			    strerror(errno));
 		return -1;
 	}
 
@@ -151,7 +152,7 @@ int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type)
 	enum bpf_obj_type type;
 	int fd;
 
-	fd = open_obj_pinned(path);
+	fd = open_obj_pinned(path, false);
 	if (fd < 0)
 		return -1;
 
@@ -384,7 +385,7 @@ int build_pinned_obj_table(struct pinned_obj_table *tab,
 		while ((ftse = fts_read(fts))) {
 			if (!(ftse->fts_info & FTS_F))
 				continue;
-			fd = open_obj_pinned(ftse->fts_path);
+			fd = open_obj_pinned(ftse->fts_path, true);
 			if (fd < 0)
 				continue;
 
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 28322ace28565..a8bf1e2d9818d 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -127,7 +127,7 @@ int cmd_select(const struct cmd *cmds, int argc, char **argv,
 int get_fd_type(int fd);
 const char *get_fd_type_name(enum bpf_obj_type type);
 char *get_fdinfo(int fd, const char *key);
-int open_obj_pinned(char *path);
+int open_obj_pinned(char *path, bool quiet);
 int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type);
 int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32));
 int do_pin_fd(int fd, const char *name);
-- 
GitLab


From f98e46a251d04e1f9a7a14a7f2d166b34da525e5 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Thu, 8 Nov 2018 11:52:28 +0000
Subject: [PATCH 0853/1890] tools: bpftool: update references to other man
 pages in documentation

Update references to other bpftool man pages at the bottom of each
manual page. Also reference the "bpf(2)" and "bpf-helpers(7)" man pages.

References are sorted by number of man section, then by
"prog-and-map-go-first", the other pages in alphabetical order.

Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/Documentation/bpftool-cgroup.rst | 8 +++++++-
 tools/bpf/bpftool/Documentation/bpftool-map.rst    | 8 +++++++-
 tools/bpf/bpftool/Documentation/bpftool-net.rst    | 8 +++++++-
 tools/bpf/bpftool/Documentation/bpftool-perf.rst   | 8 +++++++-
 tools/bpf/bpftool/Documentation/bpftool-prog.rst   | 8 +++++++-
 tools/bpf/bpftool/Documentation/bpftool.rst        | 9 +++++++--
 6 files changed, 42 insertions(+), 7 deletions(-)

diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
index edbe81534c6d2..d07ccf8a23f71 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
@@ -137,4 +137,10 @@ EXAMPLES
 
 SEE ALSO
 ========
-	**bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-map**\ (8)
+	**bpf**\ (2),
+	**bpf-helpers**\ (7),
+	**bpftool**\ (8),
+	**bpftool-prog**\ (8),
+	**bpftool-map**\ (8),
+	**bpftool-net**\ (8),
+	**bpftool-perf**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index f55a2daed59b7..7bb787cfa9714 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -171,4 +171,10 @@ The following three commands are equivalent:
 
 SEE ALSO
 ========
-	**bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8)
+	**bpf**\ (2),
+	**bpf-helpers**\ (7),
+	**bpftool**\ (8),
+	**bpftool-prog**\ (8),
+	**bpftool-cgroup**\ (8),
+	**bpftool-net**\ (8),
+	**bpftool-perf**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-net.rst b/tools/bpf/bpftool/Documentation/bpftool-net.rst
index 408ec30d88726..ed87c9b619adc 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-net.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst
@@ -136,4 +136,10 @@ EXAMPLES
 
 SEE ALSO
 ========
-	**bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-map**\ (8)
+	**bpf**\ (2),
+	**bpf-helpers**\ (7),
+	**bpftool**\ (8),
+	**bpftool-prog**\ (8),
+	**bpftool-map**\ (8),
+	**bpftool-cgroup**\ (8),
+	**bpftool-perf**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-perf.rst b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
index e3eb0eab76419..f4c5e5538bb8e 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-perf.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
@@ -78,4 +78,10 @@ EXAMPLES
 
 SEE ALSO
 ========
-	**bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-map**\ (8)
+	**bpf**\ (2),
+	**bpf-helpers**\ (7),
+	**bpftool**\ (8),
+	**bpftool-prog**\ (8),
+	**bpftool-map**\ (8),
+	**bpftool-cgroup**\ (8),
+	**bpftool-net**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 81fb97acfaeb4..ecf618807125d 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -207,4 +207,10 @@ EXAMPLES
 
 SEE ALSO
 ========
-	**bpftool**\ (8), **bpftool-map**\ (8), **bpftool-cgroup**\ (8)
+	**bpf**\ (2),
+	**bpf-helpers**\ (7),
+	**bpftool**\ (8),
+	**bpftool-map**\ (8),
+	**bpftool-cgroup**\ (8),
+	**bpftool-net**\ (8),
+	**bpftool-perf**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst
index 04cd4f92ab89c..129b7a9c0f9bc 100644
--- a/tools/bpf/bpftool/Documentation/bpftool.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool.rst
@@ -63,5 +63,10 @@ OPTIONS
 
 SEE ALSO
 ========
-	**bpftool-map**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8)
-        **bpftool-perf**\ (8), **bpftool-net**\ (8)
+	**bpf**\ (2),
+	**bpf-helpers**\ (7),
+	**bpftool-prog**\ (8),
+	**bpftool-map**\ (8),
+	**bpftool-cgroup**\ (8),
+	**bpftool-net**\ (8),
+	**bpftool-perf**\ (8)
-- 
GitLab


From 05cc09de4c017663a217630682041066f2f9a5cd Mon Sep 17 00:00:00 2001
From: Alexey Khoroshilov <khoroshilov@ispras.ru>
Date: Fri, 5 Oct 2018 23:22:06 +0300
Subject: [PATCH 0854/1890] mac80211_hwsim: fix module init error paths for
 netlink

There is no unregister netlink notifier and family on error paths
in init_mac80211_hwsim(). Also there is an error path where
hwsim_class is not destroyed.

Found by Linux Driver Verification project (linuxtesting.org).

Signed-off-by: Alexey Khoroshilov <khoroshilov@ispras.ru>
Fixes: 62759361eb49 ("mac80211-hwsim: Provide multicast event for HWSIM_CMD_NEW_RADIO")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/mac80211_hwsim.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index aa8058264d5b5..07f958c633345 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -3703,16 +3703,16 @@ static int __init init_mac80211_hwsim(void)
 	if (err)
 		goto out_unregister_pernet;
 
+	err = hwsim_init_netlink();
+	if (err)
+		goto out_unregister_driver;
+
 	hwsim_class = class_create(THIS_MODULE, "mac80211_hwsim");
 	if (IS_ERR(hwsim_class)) {
 		err = PTR_ERR(hwsim_class);
-		goto out_unregister_driver;
+		goto out_exit_netlink;
 	}
 
-	err = hwsim_init_netlink();
-	if (err < 0)
-		goto out_unregister_driver;
-
 	for (i = 0; i < radios; i++) {
 		struct hwsim_new_radio_params param = { 0 };
 
@@ -3818,6 +3818,8 @@ static int __init init_mac80211_hwsim(void)
 	free_netdev(hwsim_mon);
 out_free_radios:
 	mac80211_hwsim_free();
+out_exit_netlink:
+	hwsim_exit_netlink();
 out_unregister_driver:
 	platform_driver_unregister(&mac80211_hwsim_driver);
 out_unregister_pernet:
-- 
GitLab


From 33483a6b88e4c4c3fc50178b185da52c55288b95 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Tue, 16 Oct 2018 02:35:30 +0000
Subject: [PATCH 0855/1890] mac80211: fix missing unlock on error in
 ieee80211_mark_sta_auth()

Add the missing unlock before return from function
ieee80211_mark_sta_auth() in the error handling case.

Cc: stable@vger.kernel.org
Fixes: fc107a933071 ("mac80211: Helper function for marking STA authenticated")
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
[use result variable/label instead of duplicating]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mlme.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index d2bc8d57c87eb..bcf5ffc1567a4 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2766,6 +2766,7 @@ static bool ieee80211_mark_sta_auth(struct ieee80211_sub_if_data *sdata,
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	struct sta_info *sta;
+	bool result = true;
 
 	sdata_info(sdata, "authenticated\n");
 	ifmgd->auth_data->done = true;
@@ -2778,15 +2779,18 @@ static bool ieee80211_mark_sta_auth(struct ieee80211_sub_if_data *sdata,
 	sta = sta_info_get(sdata, bssid);
 	if (!sta) {
 		WARN_ONCE(1, "%s: STA %pM not found", sdata->name, bssid);
-		return false;
+		result = false;
+		goto out;
 	}
 	if (sta_info_move_state(sta, IEEE80211_STA_AUTH)) {
 		sdata_info(sdata, "failed moving %pM to auth\n", bssid);
-		return false;
+		result = false;
+		goto out;
 	}
-	mutex_unlock(&sdata->local->sta_mtx);
 
-	return true;
+out:
+	mutex_unlock(&sdata->local->sta_mtx);
+	return result;
 }
 
 static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
-- 
GitLab


From a1881c9b8a1edef0a5ae1d5c1b61406fe3402114 Mon Sep 17 00:00:00 2001
From: Vasyl Vavrychuk <vasyl.vavrychuk@globallogic.com>
Date: Thu, 18 Oct 2018 01:02:12 +0300
Subject: [PATCH 0856/1890] mac80211_hwsim: Timer should be initialized before
 device registered

Otherwise if network manager starts configuring Wi-Fi interface
immidiatelly after getting notification of its creation, we will get
NULL pointer dereference:

  BUG: unable to handle kernel NULL pointer dereference at           (null)
  IP: [<ffffffff95ae94c8>] hrtimer_active+0x28/0x50
  ...
  Call Trace:
   [<ffffffff95ae9997>] ? hrtimer_try_to_cancel+0x27/0x110
   [<ffffffff95ae9a95>] ? hrtimer_cancel+0x15/0x20
   [<ffffffffc0803bf0>] ? mac80211_hwsim_config+0x140/0x1c0 [mac80211_hwsim]

Cc: stable@vger.kernel.org
Signed-off-by: Vasyl Vavrychuk <vasyl.vavrychuk@globallogic.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/mac80211_hwsim.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 07f958c633345..d1464e3e1be21 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -2884,6 +2884,10 @@ static int mac80211_hwsim_new_radio(struct genl_info *info,
 
 	wiphy_ext_feature_set(hw->wiphy, NL80211_EXT_FEATURE_CQM_RSSI_LIST);
 
+	tasklet_hrtimer_init(&data->beacon_timer,
+			     mac80211_hwsim_beacon,
+			     CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+
 	err = ieee80211_register_hw(hw);
 	if (err < 0) {
 		pr_debug("mac80211_hwsim: ieee80211_register_hw failed (%d)\n",
@@ -2908,10 +2912,6 @@ static int mac80211_hwsim_new_radio(struct genl_info *info,
 				    data->debugfs,
 				    data, &hwsim_simulate_radar);
 
-	tasklet_hrtimer_init(&data->beacon_timer,
-			     mac80211_hwsim_beacon,
-			     CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
-
 	spin_lock_bh(&hwsim_radio_lock);
 	err = rhashtable_insert_fast(&hwsim_radios_rht, &data->rht,
 				     hwsim_rht_params);
-- 
GitLab


From 81c5dce2cd0bb0ecb61b6212410da5eb78cd8f79 Mon Sep 17 00:00:00 2001
From: Sergey Matyukevich <sergey.matyukevich.os@quantenna.com>
Date: Fri, 19 Oct 2018 15:40:13 +0000
Subject: [PATCH 0857/1890] cfg80211: add missing constraint for user-supplied
 VHT mask

Do a logical vht_capa &= vht_capa_mask of user-supplied VHT mask with
the driver-supplied mask of modifiable VHT capabilities.
Fix whitespaces and comment typos.

Signed-off-by: Sergey Matyukevich <sergey.matyukevich.os@quantenna.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/mlme.c | 4 ++--
 net/wireless/sme.c  | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 12b3edf70a7b9..1615e503f8e39 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -272,11 +272,11 @@ void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa,
 
 	p1 = (u8*)(ht_capa);
 	p2 = (u8*)(ht_capa_mask);
-	for (i = 0; i<sizeof(*ht_capa); i++)
+	for (i = 0; i < sizeof(*ht_capa); i++)
 		p1[i] &= p2[i];
 }
 
-/*  Do a logical ht_capa &= ht_capa_mask.  */
+/*  Do a logical vht_capa &= vht_capa_mask.  */
 void cfg80211_oper_and_vht_capa(struct ieee80211_vht_cap *vht_capa,
 				const struct ieee80211_vht_cap *vht_capa_mask)
 {
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index d536b07582f8c..1c93412038dc7 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -1171,6 +1171,8 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev,
 
 	cfg80211_oper_and_ht_capa(&connect->ht_capa_mask,
 				  rdev->wiphy.ht_capa_mod_mask);
+	cfg80211_oper_and_vht_capa(&connect->vht_capa_mask,
+				   rdev->wiphy.vht_capa_mod_mask);
 
 	if (connkeys && connkeys->def >= 0) {
 		int idx;
-- 
GitLab


From c752cac9db1b0c469db7ba9d17af4ba708984db5 Mon Sep 17 00:00:00 2001
From: Yan-Hsuan Chuang <yhchuang@realtek.com>
Date: Tue, 23 Oct 2018 11:24:44 +0800
Subject: [PATCH 0858/1890] mac80211: fix GFP_KERNEL under tasklet context

cfg80211_sta_opmode_change_notify needs a gfp_t flag to hint the nl80211
stack when allocating new skb, but it is called under tasklet context
here with GFP_KERNEL and kernel will yield a warning about it.

Cc: stable@vger.kernel.org
Fixes: ff84e7bfe176 ("mac80211: Add support to notify ht/vht opmode modification.")
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
ACKed-by: Larry Finger <Larry.Finger@lwfinger.net>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 3bd3b57697970..a69ecfb212ed7 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -3063,7 +3063,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 			cfg80211_sta_opmode_change_notify(sdata->dev,
 							  rx->sta->addr,
 							  &sta_opmode,
-							  GFP_KERNEL);
+							  GFP_ATOMIC);
 			goto handled;
 		}
 		case WLAN_HT_ACTION_NOTIFY_CHANWIDTH: {
@@ -3100,7 +3100,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 			cfg80211_sta_opmode_change_notify(sdata->dev,
 							  rx->sta->addr,
 							  &sta_opmode,
-							  GFP_KERNEL);
+							  GFP_ATOMIC);
 			goto handled;
 		}
 		default:
-- 
GitLab


From 5c21e8100dfd57c806e833ae905e26efbb87840f Mon Sep 17 00:00:00 2001
From: Ben Greear <greearb@candelatech.com>
Date: Tue, 23 Oct 2018 13:36:52 -0700
Subject: [PATCH 0859/1890] mac80211: Clear beacon_int in ieee80211_do_stop

This fixes stale beacon-int values that would keep a netdev
from going up.

To reproduce:

Create two VAP on one radio.
vap1 has beacon-int 100, start it.
vap2 has beacon-int 240, start it (and it will fail
  because beacon-int mismatch).
reconfigure vap2 to have beacon-int 100 and start it.
  It will fail because the stale beacon-int 240 will be used
  in the ifup path and hostapd never gets a chance to set the
  new beacon interval.

Cc: stable@vger.kernel.org
Signed-off-by: Ben Greear <greearb@candelatech.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/iface.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 5836ddeac9e34..5f3c81e705c7d 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1015,6 +1015,8 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
 	if (local->open_count == 0)
 		ieee80211_clear_tx_pending(local);
 
+	sdata->vif.bss_conf.beacon_int = 0;
+
 	/*
 	 * If the interface goes down while suspended, presumably because
 	 * the device was unplugged and that happens before our resume,
-- 
GitLab


From c177db2d0d5e751d52d3827b8cfdb6ef92a95a2d Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 30 Oct 2018 09:17:44 +0100
Subject: [PATCH 0860/1890] cfg80211/mac80211: fix FTM settings across CSA

When FTM is enabled, doing a CSA will unexpectedly lose it since
the value of ftm_responder may be initialized to 0 instead of -1,
so fix that.

Fixes: 81e54d08d9d8 ("cfg80211: support FTM responder configuration/statistics")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 744b5851bbf90..8d763725498c1 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -7870,6 +7870,7 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	memset(&params, 0, sizeof(params));
+	params.beacon_csa.ftm_responder = -1;
 
 	if (!info->attrs[NL80211_ATTR_WIPHY_FREQ] ||
 	    !info->attrs[NL80211_ATTR_CH_SWITCH_COUNT])
-- 
GitLab


From 03b738625b1e58f4ae2bddf04706ab85c677af2d Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 30 Oct 2018 09:17:45 +0100
Subject: [PATCH 0861/1890] mac80211: fix CSA beacon allocation size

If the FTM responder settings are changed simultaneously with
the CSA beacon, the buffer size allocated isn't sufficient and
we'll have a heap overrun. Fix this.

While at it, also clean up the ftm_responder assignment, doing
it only if ftm_responder is non-zero is valid as it's 0 to start
with, but not really useful to understand the code.

Fixes: bc847970f432 ("mac80211: support FTM responder configuration/statistics")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/cfg.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 51622333d4602..818aa00603495 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -2891,7 +2891,7 @@ cfg80211_beacon_dup(struct cfg80211_beacon_data *beacon)
 
 	len = beacon->head_len + beacon->tail_len + beacon->beacon_ies_len +
 	      beacon->proberesp_ies_len + beacon->assocresp_ies_len +
-	      beacon->probe_resp_len;
+	      beacon->probe_resp_len + beacon->lci_len + beacon->civicloc_len;
 
 	new_beacon = kzalloc(sizeof(*new_beacon) + len, GFP_KERNEL);
 	if (!new_beacon)
@@ -2934,8 +2934,9 @@ cfg80211_beacon_dup(struct cfg80211_beacon_data *beacon)
 		memcpy(pos, beacon->probe_resp, beacon->probe_resp_len);
 		pos += beacon->probe_resp_len;
 	}
-	if (beacon->ftm_responder)
-		new_beacon->ftm_responder = beacon->ftm_responder;
+
+	/* might copy -1, meaning no changes requested */
+	new_beacon->ftm_responder = beacon->ftm_responder;
 	if (beacon->lci) {
 		new_beacon->lci_len = beacon->lci_len;
 		new_beacon->lci = pos;
-- 
GitLab


From 113f3aaa81bd56aba02659786ed65cbd9cb9a6fc Mon Sep 17 00:00:00 2001
From: Sriram R <srirrama@codeaurora.org>
Date: Fri, 19 Oct 2018 14:42:59 +0530
Subject: [PATCH 0862/1890] cfg80211: Prevent regulatory restore during STA
 disconnect in concurrent interfaces

Currently when an AP and STA interfaces are active in the same or different
radios, regulatory settings are restored whenever the STA disconnects. This
restores all channel information including dfs states in all radios.
For example, if an AP interface is active in one radio and STA in another,
when radar is detected on the AP interface, the dfs state of the channel
will be changed to UNAVAILABLE. But when the STA interface disconnects,
this issues a regulatory disconnect hint which restores all regulatory
settings in all the radios attached and thereby losing the stored dfs
state on the other radio where the channel was marked as unavailable
earlier. Hence prevent such regulatory restore whenever another active
beaconing interface is present in the same or other radios.

Signed-off-by: Sriram R <srirrama@codeaurora.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/sme.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 1c93412038dc7..f741d8376a463 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -642,11 +642,15 @@ static bool cfg80211_is_all_idle(void)
 	 * All devices must be idle as otherwise if you are actively
 	 * scanning some new beacon hints could be learned and would
 	 * count as new regulatory hints.
+	 * Also if there is any other active beaconing interface we
+	 * need not issue a disconnect hint and reset any info such
+	 * as chan dfs state, etc.
 	 */
 	list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
 		list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
 			wdev_lock(wdev);
-			if (wdev->conn || wdev->current_bss)
+			if (wdev->conn || wdev->current_bss ||
+			    cfg80211_beaconing_iface_active(wdev))
 				is_all_idle = false;
 			wdev_unlock(wdev);
 		}
-- 
GitLab


From 68a031d22c57b94870ba13513c9d93b8a8119ab2 Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Mon, 5 Nov 2018 20:35:14 +0800
Subject: [PATCH 0863/1890] crypto: hisilicon - Fix NULL dereference for same
 dst and src

When the source and destination addresses for the cipher are the same, we
will get a NULL dereference from accessing the split destination
scatterlist memories, as shown:

[   56.565719] tcrypt:
[   56.565719] testing speed of async ecb(aes) (hisi_sec_aes_ecb) encryption
[   56.574683] tcrypt: test 0 (128 bit key, 16 byte blocks):
[   56.587585] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000
[   56.596361] Mem abort info:
[   56.599151]   ESR = 0x96000006
[   56.602196]   Exception class = DABT (current EL), IL = 32 bits
[   56.608105]   SET = 0, FnV = 0
[   56.611149]   EA = 0, S1PTW = 0
[   56.614280] Data abort info:
[   56.617151]   ISV = 0, ISS = 0x00000006
[   56.620976]   CM = 0, WnR = 0
[   56.623930] user pgtable: 4k pages, 48-bit VAs, pgdp = (____ptrval____)
[   56.630533] [0000000000000000] pgd=0000041fc7e4d003, pud=0000041fcd9bf003, pmd=0000000000000000
[   56.639224] Internal error: Oops: 96000006 [#1] PREEMPT SMP
[   56.644782] Modules linked in: tcrypt(+)
[   56.648695] CPU: 21 PID: 2326 Comm: insmod Tainted: G        W         4.19.0-rc6-00001-g3fabfb8-dirty #716
[   56.658420] Hardware name: Huawei Taishan 2280 /D05, BIOS Hisilicon D05 IT17 Nemo 2.0 RC0 10/05/2018
[   56.667537] pstate: 20000005 (nzCv daif -PAN -UAO)
[   56.672322] pc : sec_alg_skcipher_crypto+0x318/0x748
[   56.677274] lr : sec_alg_skcipher_crypto+0x178/0x748
[   56.682224] sp : ffff0000118e3840
[   56.685525] x29: ffff0000118e3840 x28: ffff841fbb3f8118
[   56.690825] x27: 0000000000000000 x26: 0000000000000000
[   56.696125] x25: ffff841fbb3f8080 x24: ffff841fbadc0018
[   56.701425] x23: ffff000009119000 x22: ffff841fbb24e280
[   56.706724] x21: ffff841ff212e780 x20: ffff841ff212e700
[   56.712023] x19: 0000000000000001 x18: ffffffffffffffff
[   56.717322] x17: 0000000000000000 x16: 0000000000000000
[   56.722621] x15: ffff0000091196c8 x14: 72635f7265687069
[   56.727920] x13: 636b735f676c615f x12: ffff000009119940
[   56.733219] x11: 0000000000000000 x10: 00000000006080c0
[   56.738519] x9 : 0000000000000000 x8 : ffff841fbb24e480
[   56.743818] x7 : ffff841fbb24e500 x6 : ffff841ff00cdcc0
[   56.749117] x5 : 0000000000000010 x4 : 0000000000000000
[   56.754416] x3 : ffff841fbb24e380 x2 : ffff841fbb24e480
[   56.759715] x1 : 0000000000000000 x0 : ffff000008f682c8
[   56.765016] Process insmod (pid: 2326, stack limit = 0x(____ptrval____))
[   56.771702] Call trace:
[   56.774136]  sec_alg_skcipher_crypto+0x318/0x748
[   56.778740]  sec_alg_skcipher_encrypt+0x10/0x18
[   56.783259]  test_skcipher_speed+0x2a0/0x700 [tcrypt]
[   56.788298]  do_test+0x18f8/0x48c8 [tcrypt]
[   56.792469]  tcrypt_mod_init+0x60/0x1000 [tcrypt]
[   56.797161]  do_one_initcall+0x5c/0x178
[   56.800985]  do_init_module+0x58/0x1b4
[   56.804721]  load_module+0x1da4/0x2150
[   56.808456]  __se_sys_init_module+0x14c/0x1e8
[   56.812799]  __arm64_sys_init_module+0x18/0x20
[   56.817231]  el0_svc_common+0x60/0xe8
[   56.820880]  el0_svc_handler+0x2c/0x80
[   56.824615]  el0_svc+0x8/0xc
[   56.827483] Code: a94c87a3 910b2000 f87b7842 f9004ba2 (b87b7821)
[   56.833564] ---[ end trace 0f63290590e93d94 ]---
Segmentation fault

Fix this by only accessing these memories when we have different src and
dst.

Fixes: 915e4e8413da ("crypto: hisilicon - SEC security accelerator driver")
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: John Garry <john.garry@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/sec/sec_algs.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/crypto/hisilicon/sec/sec_algs.c b/drivers/crypto/hisilicon/sec/sec_algs.c
index f7d6d690116ee..32c6c02cb9ae0 100644
--- a/drivers/crypto/hisilicon/sec/sec_algs.c
+++ b/drivers/crypto/hisilicon/sec/sec_algs.c
@@ -732,6 +732,7 @@ static int sec_alg_skcipher_crypto(struct skcipher_request *skreq,
 	int *splits_in_nents;
 	int *splits_out_nents = NULL;
 	struct sec_request_el *el, *temp;
+	bool split = skreq->src != skreq->dst;
 
 	mutex_init(&sec_req->lock);
 	sec_req->req_base = &skreq->base;
@@ -750,7 +751,7 @@ static int sec_alg_skcipher_crypto(struct skcipher_request *skreq,
 	if (ret)
 		goto err_free_split_sizes;
 
-	if (skreq->src != skreq->dst) {
+	if (split) {
 		sec_req->len_out = sg_nents(skreq->dst);
 		ret = sec_map_and_split_sg(skreq->dst, split_sizes, steps,
 					   &splits_out, &splits_out_nents,
@@ -785,8 +786,9 @@ static int sec_alg_skcipher_crypto(struct skcipher_request *skreq,
 					       split_sizes[i],
 					       skreq->src != skreq->dst,
 					       splits_in[i], splits_in_nents[i],
-					       splits_out[i],
-					       splits_out_nents[i], info);
+					       split ? splits_out[i] : NULL,
+					       split ? splits_out_nents[i] : 0,
+					       info);
 		if (IS_ERR(el)) {
 			ret = PTR_ERR(el);
 			goto err_free_elements;
@@ -854,7 +856,7 @@ static int sec_alg_skcipher_crypto(struct skcipher_request *skreq,
 				 crypto_skcipher_ivsize(atfm),
 				 DMA_BIDIRECTIONAL);
 err_unmap_out_sg:
-	if (skreq->src != skreq->dst)
+	if (split)
 		sec_unmap_sg_on_err(skreq->dst, steps, splits_out,
 				    splits_out_nents, sec_req->len_out,
 				    info->dev);
-- 
GitLab


From 0b0cf6af3f3151c26c27e8e51def5527091c3e69 Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Mon, 5 Nov 2018 20:35:15 +0800
Subject: [PATCH 0864/1890] crypto: hisilicon - Fix reference after free of
 memories on error path

coccicheck currently warns of the following issues in the driver:
drivers/crypto/hisilicon/sec/sec_algs.c:864:51-66: ERROR: reference preceded by free on line 812
drivers/crypto/hisilicon/sec/sec_algs.c:864:40-49: ERROR: reference preceded by free on line 813
drivers/crypto/hisilicon/sec/sec_algs.c:861:8-24: ERROR: reference preceded by free on line 814
drivers/crypto/hisilicon/sec/sec_algs.c:860:41-51: ERROR: reference preceded by free on line 815
drivers/crypto/hisilicon/sec/sec_algs.c:867:7-18: ERROR: reference preceded by free on line 816

It would appear than on certain error paths that we may attempt reference-
after-free some memories.

This patch fixes those issues. The solution doesn't look perfect, but
having same memories free'd possibly from separate functions makes it
tricky.

Fixes: 915e4e8413da ("crypto: hisilicon - SEC security accelerator driver")
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: John Garry <john.garry@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/sec/sec_algs.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/drivers/crypto/hisilicon/sec/sec_algs.c b/drivers/crypto/hisilicon/sec/sec_algs.c
index 32c6c02cb9ae0..cdc4f9a171d98 100644
--- a/drivers/crypto/hisilicon/sec/sec_algs.c
+++ b/drivers/crypto/hisilicon/sec/sec_algs.c
@@ -808,13 +808,6 @@ static int sec_alg_skcipher_crypto(struct skcipher_request *skreq,
 	 * more refined but this is unlikely to happen so no need.
 	 */
 
-	/* Cleanup - all elements in pointer arrays have been coppied */
-	kfree(splits_in_nents);
-	kfree(splits_in);
-	kfree(splits_out_nents);
-	kfree(splits_out);
-	kfree(split_sizes);
-
 	/* Grab a big lock for a long time to avoid concurrency issues */
 	mutex_lock(&queue->queuelock);
 
@@ -829,13 +822,13 @@ static int sec_alg_skcipher_crypto(struct skcipher_request *skreq,
 	     (!queue->havesoftqueue ||
 	      kfifo_avail(&queue->softqueue) > steps)) ||
 	    !list_empty(&ctx->backlog)) {
+		ret = -EBUSY;
 		if ((skreq->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) {
 			list_add_tail(&sec_req->backlog_head, &ctx->backlog);
 			mutex_unlock(&queue->queuelock);
-			return -EBUSY;
+			goto out;
 		}
 
-		ret = -EBUSY;
 		mutex_unlock(&queue->queuelock);
 		goto err_free_elements;
 	}
@@ -844,7 +837,15 @@ static int sec_alg_skcipher_crypto(struct skcipher_request *skreq,
 	if (ret)
 		goto err_free_elements;
 
-	return -EINPROGRESS;
+	ret = -EINPROGRESS;
+out:
+	/* Cleanup - all elements in pointer arrays have been copied */
+	kfree(splits_in_nents);
+	kfree(splits_in);
+	kfree(splits_out_nents);
+	kfree(splits_out);
+	kfree(split_sizes);
+	return ret;
 
 err_free_elements:
 	list_for_each_entry_safe(el, temp, &sec_req->elements, head) {
-- 
GitLab


From 508a1c4df085a547187eed346f1bfe5e381797f1 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Thu, 8 Nov 2018 23:55:16 +0100
Subject: [PATCH 0865/1890] crypto: simd - correctly take reqsize of wrapped
 skcipher into account

The simd wrapper's skcipher request context structure consists
of a single subrequest whose size is taken from the subordinate
skcipher. However, in simd_skcipher_init(), the reqsize that is
retrieved is not from the subordinate skcipher but from the
cryptd request structure, whose size is completely unrelated to
the actual wrapped skcipher.

Reported-by: Qian Cai <cai@gmx.us>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Qian Cai <cai@gmx.us>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/simd.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/crypto/simd.c b/crypto/simd.c
index ea7240be3001b..78e8d037ae2b3 100644
--- a/crypto/simd.c
+++ b/crypto/simd.c
@@ -124,8 +124,9 @@ static int simd_skcipher_init(struct crypto_skcipher *tfm)
 
 	ctx->cryptd_tfm = cryptd_tfm;
 
-	reqsize = sizeof(struct skcipher_request);
-	reqsize += crypto_skcipher_reqsize(&cryptd_tfm->base);
+	reqsize = crypto_skcipher_reqsize(cryptd_skcipher_child(cryptd_tfm));
+	reqsize = max(reqsize, crypto_skcipher_reqsize(&cryptd_tfm->base));
+	reqsize += sizeof(struct skcipher_request);
 
 	crypto_skcipher_set_reqsize(tfm, reqsize);
 
-- 
GitLab


From f43f39958beb206b53292801e216d9b8a660f087 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sat, 3 Nov 2018 14:56:00 -0700
Subject: [PATCH 0866/1890] crypto: user - fix leaking uninitialized memory to
 userspace

All bytes of the NETLINK_CRYPTO report structures must be initialized,
since they are copied to userspace.  The change from strncpy() to
strlcpy() broke this.  As a minimal fix, change it back.

Fixes: 4473710df1f8 ("crypto: user - Prepare for CRYPTO_MAX_ALG_NAME expansion")
Cc: <stable@vger.kernel.org> # v4.12+
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/crypto_user_base.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/crypto/crypto_user_base.c b/crypto/crypto_user_base.c
index e41f6cc33fff4..784748dbb19f0 100644
--- a/crypto/crypto_user_base.c
+++ b/crypto/crypto_user_base.c
@@ -84,7 +84,7 @@ static int crypto_report_cipher(struct sk_buff *skb, struct crypto_alg *alg)
 {
 	struct crypto_report_cipher rcipher;
 
-	strlcpy(rcipher.type, "cipher", sizeof(rcipher.type));
+	strncpy(rcipher.type, "cipher", sizeof(rcipher.type));
 
 	rcipher.blocksize = alg->cra_blocksize;
 	rcipher.min_keysize = alg->cra_cipher.cia_min_keysize;
@@ -103,7 +103,7 @@ static int crypto_report_comp(struct sk_buff *skb, struct crypto_alg *alg)
 {
 	struct crypto_report_comp rcomp;
 
-	strlcpy(rcomp.type, "compression", sizeof(rcomp.type));
+	strncpy(rcomp.type, "compression", sizeof(rcomp.type));
 	if (nla_put(skb, CRYPTOCFGA_REPORT_COMPRESS,
 		    sizeof(struct crypto_report_comp), &rcomp))
 		goto nla_put_failure;
@@ -117,7 +117,7 @@ static int crypto_report_acomp(struct sk_buff *skb, struct crypto_alg *alg)
 {
 	struct crypto_report_acomp racomp;
 
-	strlcpy(racomp.type, "acomp", sizeof(racomp.type));
+	strncpy(racomp.type, "acomp", sizeof(racomp.type));
 
 	if (nla_put(skb, CRYPTOCFGA_REPORT_ACOMP,
 		    sizeof(struct crypto_report_acomp), &racomp))
@@ -132,7 +132,7 @@ static int crypto_report_akcipher(struct sk_buff *skb, struct crypto_alg *alg)
 {
 	struct crypto_report_akcipher rakcipher;
 
-	strlcpy(rakcipher.type, "akcipher", sizeof(rakcipher.type));
+	strncpy(rakcipher.type, "akcipher", sizeof(rakcipher.type));
 
 	if (nla_put(skb, CRYPTOCFGA_REPORT_AKCIPHER,
 		    sizeof(struct crypto_report_akcipher), &rakcipher))
@@ -147,7 +147,7 @@ static int crypto_report_kpp(struct sk_buff *skb, struct crypto_alg *alg)
 {
 	struct crypto_report_kpp rkpp;
 
-	strlcpy(rkpp.type, "kpp", sizeof(rkpp.type));
+	strncpy(rkpp.type, "kpp", sizeof(rkpp.type));
 
 	if (nla_put(skb, CRYPTOCFGA_REPORT_KPP,
 		    sizeof(struct crypto_report_kpp), &rkpp))
@@ -161,10 +161,10 @@ static int crypto_report_kpp(struct sk_buff *skb, struct crypto_alg *alg)
 static int crypto_report_one(struct crypto_alg *alg,
 			     struct crypto_user_alg *ualg, struct sk_buff *skb)
 {
-	strlcpy(ualg->cru_name, alg->cra_name, sizeof(ualg->cru_name));
-	strlcpy(ualg->cru_driver_name, alg->cra_driver_name,
+	strncpy(ualg->cru_name, alg->cra_name, sizeof(ualg->cru_name));
+	strncpy(ualg->cru_driver_name, alg->cra_driver_name,
 		sizeof(ualg->cru_driver_name));
-	strlcpy(ualg->cru_module_name, module_name(alg->cra_module),
+	strncpy(ualg->cru_module_name, module_name(alg->cra_module),
 		sizeof(ualg->cru_module_name));
 
 	ualg->cru_type = 0;
@@ -177,7 +177,7 @@ static int crypto_report_one(struct crypto_alg *alg,
 	if (alg->cra_flags & CRYPTO_ALG_LARVAL) {
 		struct crypto_report_larval rl;
 
-		strlcpy(rl.type, "larval", sizeof(rl.type));
+		strncpy(rl.type, "larval", sizeof(rl.type));
 		if (nla_put(skb, CRYPTOCFGA_REPORT_LARVAL,
 			    sizeof(struct crypto_report_larval), &rl))
 			goto nla_put_failure;
-- 
GitLab


From 9f4debe38415583086ce814798eeb864aeb39551 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Sat, 3 Nov 2018 14:56:01 -0700
Subject: [PATCH 0867/1890] crypto: user - Zeroize whole structure given to
 user space

For preventing uninitialized data to be given to user-space (and so leak
potential useful data), the crypto_stat structure must be correctly
initialized.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Fixes: cac5818c25d0 ("crypto: user - Implement a generic crypto statistics")
Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
[EB: also fix it in crypto_reportstat_one()]
[EB: use sizeof(var) rather than sizeof(type)]
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/crypto_user_stat.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/crypto/crypto_user_stat.c b/crypto/crypto_user_stat.c
index 021ad06bbb628..1dfaa0ccd555b 100644
--- a/crypto/crypto_user_stat.c
+++ b/crypto/crypto_user_stat.c
@@ -37,6 +37,8 @@ static int crypto_report_aead(struct sk_buff *skb, struct crypto_alg *alg)
 	u64 v64;
 	u32 v32;
 
+	memset(&raead, 0, sizeof(raead));
+
 	strncpy(raead.type, "aead", sizeof(raead.type));
 
 	v32 = atomic_read(&alg->encrypt_cnt);
@@ -65,6 +67,8 @@ static int crypto_report_cipher(struct sk_buff *skb, struct crypto_alg *alg)
 	u64 v64;
 	u32 v32;
 
+	memset(&rcipher, 0, sizeof(rcipher));
+
 	strlcpy(rcipher.type, "cipher", sizeof(rcipher.type));
 
 	v32 = atomic_read(&alg->encrypt_cnt);
@@ -93,6 +97,8 @@ static int crypto_report_comp(struct sk_buff *skb, struct crypto_alg *alg)
 	u64 v64;
 	u32 v32;
 
+	memset(&rcomp, 0, sizeof(rcomp));
+
 	strlcpy(rcomp.type, "compression", sizeof(rcomp.type));
 	v32 = atomic_read(&alg->compress_cnt);
 	rcomp.stat_compress_cnt = v32;
@@ -120,6 +126,8 @@ static int crypto_report_acomp(struct sk_buff *skb, struct crypto_alg *alg)
 	u64 v64;
 	u32 v32;
 
+	memset(&racomp, 0, sizeof(racomp));
+
 	strlcpy(racomp.type, "acomp", sizeof(racomp.type));
 	v32 = atomic_read(&alg->compress_cnt);
 	racomp.stat_compress_cnt = v32;
@@ -147,6 +155,8 @@ static int crypto_report_akcipher(struct sk_buff *skb, struct crypto_alg *alg)
 	u64 v64;
 	u32 v32;
 
+	memset(&rakcipher, 0, sizeof(rakcipher));
+
 	strncpy(rakcipher.type, "akcipher", sizeof(rakcipher.type));
 	v32 = atomic_read(&alg->encrypt_cnt);
 	rakcipher.stat_encrypt_cnt = v32;
@@ -177,6 +187,8 @@ static int crypto_report_kpp(struct sk_buff *skb, struct crypto_alg *alg)
 	struct crypto_stat rkpp;
 	u32 v;
 
+	memset(&rkpp, 0, sizeof(rkpp));
+
 	strlcpy(rkpp.type, "kpp", sizeof(rkpp.type));
 
 	v = atomic_read(&alg->setsecret_cnt);
@@ -203,6 +215,8 @@ static int crypto_report_ahash(struct sk_buff *skb, struct crypto_alg *alg)
 	u64 v64;
 	u32 v32;
 
+	memset(&rhash, 0, sizeof(rhash));
+
 	strncpy(rhash.type, "ahash", sizeof(rhash.type));
 
 	v32 = atomic_read(&alg->hash_cnt);
@@ -227,6 +241,8 @@ static int crypto_report_shash(struct sk_buff *skb, struct crypto_alg *alg)
 	u64 v64;
 	u32 v32;
 
+	memset(&rhash, 0, sizeof(rhash));
+
 	strncpy(rhash.type, "shash", sizeof(rhash.type));
 
 	v32 = atomic_read(&alg->hash_cnt);
@@ -251,6 +267,8 @@ static int crypto_report_rng(struct sk_buff *skb, struct crypto_alg *alg)
 	u64 v64;
 	u32 v32;
 
+	memset(&rrng, 0, sizeof(rrng));
+
 	strncpy(rrng.type, "rng", sizeof(rrng.type));
 
 	v32 = atomic_read(&alg->generate_cnt);
@@ -275,6 +293,8 @@ static int crypto_reportstat_one(struct crypto_alg *alg,
 				 struct crypto_user_alg *ualg,
 				 struct sk_buff *skb)
 {
+	memset(ualg, 0, sizeof(*ualg));
+
 	strlcpy(ualg->cru_name, alg->cra_name, sizeof(ualg->cru_name));
 	strlcpy(ualg->cru_driver_name, alg->cra_driver_name,
 		sizeof(ualg->cru_driver_name));
@@ -291,6 +311,7 @@ static int crypto_reportstat_one(struct crypto_alg *alg,
 	if (alg->cra_flags & CRYPTO_ALG_LARVAL) {
 		struct crypto_stat rl;
 
+		memset(&rl, 0, sizeof(rl));
 		strlcpy(rl.type, "larval", sizeof(rl.type));
 		if (nla_put(skb, CRYPTOCFGA_STAT_LARVAL,
 			    sizeof(struct crypto_stat), &rl))
-- 
GitLab


From da85d8bfd1512461bcfb64929e8426cf340b3707 Mon Sep 17 00:00:00 2001
From: Li Zhijian <lizhijian@cn.fujitsu.com>
Date: Mon, 5 Nov 2018 16:57:48 +0800
Subject: [PATCH 0868/1890] kselftests/bpf: use ping6 as the default ipv6 ping
 binary when it exists

At commit deee2cae27d1 ("kselftests/bpf: use ping6 as the default ipv6 ping
binary if it exists"), it fixed similar issues for shell script, but it
missed a same issue in the C code.

Fixes: 371e4fcc9d96 ("selftests/bpf: cgroup local storage-based network counters")
Reported-by: kernel test robot <rong.a.chen@intel.com>
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
CC: Philip Li <philip.li@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_netcnt.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/test_netcnt.c b/tools/testing/selftests/bpf/test_netcnt.c
index 7887df6933998..44ed7f29f8ab6 100644
--- a/tools/testing/selftests/bpf/test_netcnt.c
+++ b/tools/testing/selftests/bpf/test_netcnt.c
@@ -81,7 +81,10 @@ int main(int argc, char **argv)
 		goto err;
 	}
 
-	assert(system("ping localhost -6 -c 10000 -f -q > /dev/null") == 0);
+	if (system("which ping6 &>/dev/null") == 0)
+		assert(!system("ping6 localhost -c 10000 -f -q > /dev/null"));
+	else
+		assert(!system("ping -6 localhost -c 10000 -f -q > /dev/null"));
 
 	if (bpf_prog_query(cgroup_fd, BPF_CGROUP_INET_EGRESS, 0, NULL, NULL,
 			   &prog_cnt)) {
-- 
GitLab


From a05a14049999598a3bb6fab12db6b768a0215522 Mon Sep 17 00:00:00 2001
From: Vladimir Zapolskiy <vz@mleia.com>
Date: Fri, 2 Nov 2018 15:39:43 +0200
Subject: [PATCH 0869/1890] gpio: don't free unallocated ida on
 gpiochip_add_data_with_key() error path

The change corrects the error path in gpiochip_add_data_with_key()
by avoiding to call ida_simple_remove(), if ida_simple_get() returns
an error.

Note that ida_simple_remove()/ida_free() throws a BUG(), if id argument
is negative, it allows to easily check the correctness of the fix by
fuzzing the return value from ida_simple_get().

Fixes: ff2b13592299 ("gpio: make the gpiochip a real device")
Cc: stable@vger.kernel.org # v4.6+
Signed-off-by: Vladimir Zapolskiy <vz@mleia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 230e41562462b..a2cbb474901c2 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1295,7 +1295,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data,
 	gdev->descs = kcalloc(chip->ngpio, sizeof(gdev->descs[0]), GFP_KERNEL);
 	if (!gdev->descs) {
 		status = -ENOMEM;
-		goto err_free_gdev;
+		goto err_free_ida;
 	}
 
 	if (chip->ngpio == 0) {
@@ -1427,8 +1427,9 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data,
 	kfree_const(gdev->label);
 err_free_descs:
 	kfree(gdev->descs);
-err_free_gdev:
+err_free_ida:
 	ida_simple_remove(&gpio_ida, gdev->id);
+err_free_gdev:
 	/* failures here can mean systems won't boot... */
 	pr_err("%s: GPIOs %d..%d (%s) failed to register, %d\n", __func__,
 	       gdev->base, gdev->base + gdev->ngpio - 1,
-- 
GitLab


From 595b0674ce781e38522097b18718ce3c3bffc1a1 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 5 Nov 2018 18:33:34 +0200
Subject: [PATCH 0870/1890] MAINTAINERS: Add tree link for Intel pin control
 driver

Intel pin control driver gets its own tree. Update MAINTAINERS accordingly.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index f4855974f3250..570a8c593b52f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11730,6 +11730,7 @@ F:	Documentation/devicetree/bindings/pinctrl/fsl,*
 PIN CONTROLLER - INTEL
 M:	Mika Westerberg <mika.westerberg@linux.intel.com>
 M:	Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/pinctrl/intel.git
 S:	Maintained
 F:	drivers/pinctrl/intel/
 
-- 
GitLab


From 10283ea525d30f2e99828978fd04d8427876a7ad Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Mon, 5 Nov 2018 22:57:24 +0000
Subject: [PATCH 0871/1890] gfs2: Put bitmap buffers in put_super

gfs2_put_super calls gfs2_clear_rgrpd to destroy the gfs2_rgrpd objects
attached to the resource group glocks.  That function should release the
buffers attached to the gfs2_bitmap objects (bi_bh), but the call to
gfs2_rgrp_brelse for doing that is missing.

When gfs2_releasepage later runs across these buffers which are still
referenced, it refuses to free them.  This causes the pages the buffers
are attached to to remain referenced as well.  With enough mount/unmount
cycles, the system will eventually run out of memory.

Fix this by adding the missing call to gfs2_rgrp_brelse in
gfs2_clear_rgrpd.

(Also fix a gfs2_rgrp_relse -> gfs2_rgrp_brelse typo in a comment.)

Fixes: 39b0f1e92908 ("GFS2: Don't brelse rgrp buffer_heads every allocation")
Cc: stable@vger.kernel.org # v4.2+
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/rgrp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index ffe3032b1043d..b08a530433adf 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -733,6 +733,7 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
 
 		if (gl) {
 			glock_clear_object(gl, rgd);
+			gfs2_rgrp_brelse(rgd);
 			gfs2_glock_put(gl);
 		}
 
@@ -1174,7 +1175,7 @@ static u32 count_unlinked(struct gfs2_rgrpd *rgd)
  * @rgd: the struct gfs2_rgrpd describing the RG to read in
  *
  * Read in all of a Resource Group's header and bitmap blocks.
- * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps.
+ * Caller must eventually call gfs2_rgrp_brelse() to free the bitmaps.
  *
  * Returns: errno
  */
-- 
GitLab


From e7445ceddfc220c1aede6d42758a5acb8844e9c3 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Thu, 8 Nov 2018 20:14:29 +0000
Subject: [PATCH 0872/1890] gfs2: Fix metadata read-ahead during truncate (2)

The previous attempt to fix for metadata read-ahead during truncate was
incorrect: for files with a height > 2 (1006989312 bytes with a block
size of 4096 bytes), read-ahead requests were not being issued for some
of the indirect blocks discovered while walking the metadata tree,
leading to significant slow-downs when deleting large files.  Fix that.

In addition, only issue read-ahead requests in the first pass through
the meta-data tree, while deallocating data blocks.

Fixes: c3ce5aa9b0 ("gfs2: Fix metadata read-ahead during truncate")
Cc: stable@vger.kernel.org # v4.16+
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/bmap.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 5f3ea07ef5e23..38d88fcb69883 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1908,10 +1908,16 @@ static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length)
 			if (ret < 0)
 				goto out;
 
-			/* issue read-ahead on metadata */
-			if (mp.mp_aheight > 1) {
-				for (; ret > 1; ret--) {
-					metapointer_range(&mp, mp.mp_aheight - ret,
+			/* On the first pass, issue read-ahead on metadata. */
+			if (mp.mp_aheight > 1 && strip_h == ip->i_height - 1) {
+				unsigned int height = mp.mp_aheight - 1;
+
+				/* No read-ahead for data blocks. */
+				if (mp.mp_aheight - 1 == strip_h)
+					height--;
+
+				for (; height >= mp.mp_aheight - ret; height--) {
+					metapointer_range(&mp, height,
 							  start_list, start_aligned,
 							  end_list, end_aligned,
 							  &start, &end);
-- 
GitLab


From 13c45007e0a87e912da21223599583fdea677914 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 9 Nov 2018 11:56:45 +0000
Subject: [PATCH 0873/1890] staging: most: use format specifier "%s" in
 snprintf

Passing string ch_data_type[i].name as the format specifier is
potentially hazardous because it could (although very unlikely to)
have a format specifier embedded in it causing issues when parsing
the non-existent arguments to these.  Follow best practice by using
the "%s" format string for the string.

Cleans up clang warning:
format string is not a string literal (potentially insecure) [-Wformat-security]

Fixes: e7f2b70fd3a9 ("staging: most: replace multiple if..else with table lookup")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/most/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/most/core.c b/drivers/staging/most/core.c
index 6a18cf73c85ea..18936cdb10830 100644
--- a/drivers/staging/most/core.c
+++ b/drivers/staging/most/core.c
@@ -351,7 +351,7 @@ static ssize_t set_datatype_show(struct device *dev,
 
 	for (i = 0; i < ARRAY_SIZE(ch_data_type); i++) {
 		if (c->cfg.data_type & ch_data_type[i].most_ch_data_type)
-			return snprintf(buf, PAGE_SIZE, ch_data_type[i].name);
+			return snprintf(buf, PAGE_SIZE, "%s", ch_data_type[i].name);
 	}
 	return snprintf(buf, PAGE_SIZE, "unconfigured\n");
 }
-- 
GitLab


From 8561fb31a1f9594e2807681f5c0721894e367f19 Mon Sep 17 00:00:00 2001
From: Larry Finger <Larry.Finger@lwfinger.net>
Date: Thu, 8 Nov 2018 23:30:09 -0600
Subject: [PATCH 0874/1890] staging: rtl8723bs: Add missing return for
 cfg80211_rtw_get_station

With Androidx86 8.1, wificond returns "failed to get
nl80211_sta_info_tx_failed" and wificondControl returns "Invalid signal
poll result from wificond". The fix is to OR sinfo->filled with
BIT_ULL(NL80211_STA_INFO_TX_FAILED).

This missing bit is apparently not needed with NetworkManager, but it
does no harm in that case.

Reported-and-Tested-by: youling257 <youling257@gmail.com>
Cc: linux-wireless@vger.kernel.org
Cc: youling257 <youling257@gmail.com>
Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
index af2234798fa8e..db553f2e4c0b8 100644
--- a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
+++ b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
@@ -1277,7 +1277,7 @@ static int cfg80211_rtw_get_station(struct wiphy *wiphy,
 
 		sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_PACKETS);
 		sinfo->tx_packets = psta->sta_stats.tx_pkts;
-
+		sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_FAILED);
 	}
 
 	/* for Ad-Hoc/AP mode */
-- 
GitLab


From 65766ee0bf7fe8b3be80e2e1c3ef54ad59b29476 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 9 Nov 2018 11:59:45 +0100
Subject: [PATCH 0875/1890] ALSA: oss: Use kvzalloc() for local buffer
 allocations

PCM OSS layer may allocate a few temporary buffers, one for the core
read/write and another for the conversions via plugins.  Currently
both are allocated via vmalloc().  But as the allocation size is
equivalent with the PCM period size, the required size might be quite
small, depending on the application.

This patch replaces these vmalloc() calls with kvzalloc() for covering
small period sizes better.  Also, we use "z"-alloc variant here for
addressing the possible uninitialized access reported by syzkaller.

Reported-by: syzbot+1cb36954e127c98dd037@syzkaller.appspotmail.com
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/core/oss/pcm_oss.c    | 6 +++---
 sound/core/oss/pcm_plugin.c | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c
index f8d4a419f3af9..467039b342b51 100644
--- a/sound/core/oss/pcm_oss.c
+++ b/sound/core/oss/pcm_oss.c
@@ -1062,8 +1062,8 @@ static int snd_pcm_oss_change_params_locked(struct snd_pcm_substream *substream)
 	runtime->oss.channels = params_channels(params);
 	runtime->oss.rate = params_rate(params);
 
-	vfree(runtime->oss.buffer);
-	runtime->oss.buffer = vmalloc(runtime->oss.period_bytes);
+	kvfree(runtime->oss.buffer);
+	runtime->oss.buffer = kvzalloc(runtime->oss.period_bytes, GFP_KERNEL);
 	if (!runtime->oss.buffer) {
 		err = -ENOMEM;
 		goto failure;
@@ -2328,7 +2328,7 @@ static void snd_pcm_oss_release_substream(struct snd_pcm_substream *substream)
 {
 	struct snd_pcm_runtime *runtime;
 	runtime = substream->runtime;
-	vfree(runtime->oss.buffer);
+	kvfree(runtime->oss.buffer);
 	runtime->oss.buffer = NULL;
 #ifdef CONFIG_SND_PCM_OSS_PLUGINS
 	snd_pcm_oss_plugin_clear(substream);
diff --git a/sound/core/oss/pcm_plugin.c b/sound/core/oss/pcm_plugin.c
index 141c5f3a95750..31cb2acf8afcc 100644
--- a/sound/core/oss/pcm_plugin.c
+++ b/sound/core/oss/pcm_plugin.c
@@ -66,8 +66,8 @@ static int snd_pcm_plugin_alloc(struct snd_pcm_plugin *plugin, snd_pcm_uframes_t
 		return -ENXIO;
 	size /= 8;
 	if (plugin->buf_frames < frames) {
-		vfree(plugin->buf);
-		plugin->buf = vmalloc(size);
+		kvfree(plugin->buf);
+		plugin->buf = kvzalloc(size, GFP_KERNEL);
 		plugin->buf_frames = frames;
 	}
 	if (!plugin->buf) {
@@ -191,7 +191,7 @@ int snd_pcm_plugin_free(struct snd_pcm_plugin *plugin)
 	if (plugin->private_free)
 		plugin->private_free(plugin);
 	kfree(plugin->buf_channels);
-	vfree(plugin->buf);
+	kvfree(plugin->buf);
 	kfree(plugin);
 	return 0;
 }
-- 
GitLab


From 21d3bbdd4c342f16eac8d70893e45cdfa3381a1e Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Fri, 2 Nov 2018 16:12:21 -0700
Subject: [PATCH 0876/1890] nvmet: don't try to add ns to p2p map unless it
 actually uses it

Even without CONFIG_P2PDMA this results in a error print:
nvmet: no peer-to-peer memory is available that's supported by rxe0 and /dev/nullb0

Fixes: c6925093d0b2 ("nvmet: Optionally use PCI P2P memory")
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/target/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index f4efe289dc7bc..a5f9bbce863f4 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -420,7 +420,7 @@ static void nvmet_p2pmem_ns_add_p2p(struct nvmet_ctrl *ctrl,
 	struct pci_dev *p2p_dev;
 	int ret;
 
-	if (!ctrl->p2p_client)
+	if (!ctrl->p2p_client || !ns->use_p2pmem)
 		return;
 
 	if (ns->p2p_dev) {
-- 
GitLab


From 8f676b8508c250bbe255096522fdefb73f1ea0b9 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Fri, 2 Nov 2018 11:22:13 -0700
Subject: [PATCH 0877/1890] nvme: make sure ns head inherits underlying device
 limits

Whenever we update ns_head info, we need to make sure it is still
compatible with all underlying backing devices because although nvme
multipath doesn't have any explicit use of these limits, other devices
can still be stacked on top of it which may rely on the underlying limits.
Start with unlimited stacking limits, and every info update iterate over
siblings and adjust queue limits.

Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/host/core.c      | 4 +++-
 drivers/nvme/host/multipath.c | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 2e65be8b1387a..559d567693b8d 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1519,8 +1519,10 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
 	if (ns->ndev)
 		nvme_nvm_update_nvm_info(ns);
 #ifdef CONFIG_NVME_MULTIPATH
-	if (ns->head->disk)
+	if (ns->head->disk) {
 		nvme_update_disk_info(ns->head->disk, ns, id);
+		blk_queue_stack_limits(ns->head->disk->queue, ns->queue);
+	}
 #endif
 }
 
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 5e3cc8c59a394..9901afd804ce3 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -285,6 +285,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
 	blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
 	/* set to a default value for 512 until disk is validated */
 	blk_queue_logical_block_size(q, 512);
+	blk_set_stacking_limits(&q->limits);
 
 	/* we need to propagate up the VMC settings */
 	if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
-- 
GitLab


From d39aa4979219ca3d61c492f7460f1032b97b9ef2 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 7 Nov 2018 09:20:25 +0100
Subject: [PATCH 0878/1890] Revert "nvmet-rdma: use a private workqueue for
 delete"

This reverts commit 2acf70ade79d26b97611a8df52eb22aa33814cd4.

The commit never really fixed the intended issue and caused all
kinds of other issues, including a use before initialization.

Suggested-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/target/rdma.c | 19 ++++---------------
 1 file changed, 4 insertions(+), 15 deletions(-)

diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index ddce100be57a4..3f7971d3706d9 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -122,7 +122,6 @@ struct nvmet_rdma_device {
 	int			inline_page_count;
 };
 
-static struct workqueue_struct *nvmet_rdma_delete_wq;
 static bool nvmet_rdma_use_srq;
 module_param_named(use_srq, nvmet_rdma_use_srq, bool, 0444);
 MODULE_PARM_DESC(use_srq, "Use shared receive queue.");
@@ -1274,12 +1273,12 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
 
 	if (queue->host_qid == 0) {
 		/* Let inflight controller teardown complete */
-		flush_workqueue(nvmet_rdma_delete_wq);
+		flush_scheduled_work();
 	}
 
 	ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn);
 	if (ret) {
-		queue_work(nvmet_rdma_delete_wq, &queue->release_work);
+		schedule_work(&queue->release_work);
 		/* Destroying rdma_cm id is not needed here */
 		return 0;
 	}
@@ -1344,7 +1343,7 @@ static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue)
 
 	if (disconnect) {
 		rdma_disconnect(queue->cm_id);
-		queue_work(nvmet_rdma_delete_wq, &queue->release_work);
+		schedule_work(&queue->release_work);
 	}
 }
 
@@ -1374,7 +1373,7 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id,
 	mutex_unlock(&nvmet_rdma_queue_mutex);
 
 	pr_err("failed to connect queue %d\n", queue->idx);
-	queue_work(nvmet_rdma_delete_wq, &queue->release_work);
+	schedule_work(&queue->release_work);
 }
 
 /**
@@ -1656,17 +1655,8 @@ static int __init nvmet_rdma_init(void)
 	if (ret)
 		goto err_ib_client;
 
-	nvmet_rdma_delete_wq = alloc_workqueue("nvmet-rdma-delete-wq",
-			WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
-	if (!nvmet_rdma_delete_wq) {
-		ret = -ENOMEM;
-		goto err_unreg_transport;
-	}
-
 	return 0;
 
-err_unreg_transport:
-	nvmet_unregister_transport(&nvmet_rdma_ops);
 err_ib_client:
 	ib_unregister_client(&nvmet_rdma_ib_client);
 	return ret;
@@ -1674,7 +1664,6 @@ static int __init nvmet_rdma_init(void)
 
 static void __exit nvmet_rdma_exit(void)
 {
-	destroy_workqueue(nvmet_rdma_delete_wq);
 	nvmet_unregister_transport(&nvmet_rdma_ops);
 	ib_unregister_client(&nvmet_rdma_ib_client);
 	WARN_ON_ONCE(!list_empty(&nvmet_rdma_queue_list));
-- 
GitLab


From 1adfc5e4136f5967d591c399aff95b3b035f16b7 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Mon, 29 Oct 2018 20:57:17 +0800
Subject: [PATCH 0879/1890] block: make sure discard bio is aligned with
 logical block size

Obviously the created discard bio has to be aligned with logical block size.

This patch introduces the helper of bio_allowed_max_sectors() for
this purpose.

Cc: stable@vger.kernel.org
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Xiao Ni <xni@redhat.com>
Cc: Mariusz Dabrowski <mariusz.dabrowski@intel.com>
Fixes: 744889b7cbb56a6 ("block: don't deal with discard limit in blkdev_issue_discard()")
Fixes: a22c4d7e34402cc ("block: re-add discard_granularity and alignment checks")
Reported-by: Rui Salvaterra <rsalvaterra@gmail.com>
Tested-by: Rui Salvaterra <rsalvaterra@gmail.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-lib.c   |  3 +--
 block/blk-merge.c |  3 ++-
 block/blk.h       | 10 ++++++++++
 3 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/block/blk-lib.c b/block/blk-lib.c
index 76f867ea9a9b9..d56fd159d2e8f 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -57,8 +57,7 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 
 		if (!req_sects)
 			goto fail;
-		if (req_sects > UINT_MAX >> 9)
-			req_sects = UINT_MAX >> 9;
+		req_sects = min(req_sects, bio_allowed_max_sectors(q));
 
 		end_sect = sector + req_sects;
 
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 208658a901c65..e7696c47489ad 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -90,7 +90,8 @@ static struct bio *blk_bio_discard_split(struct request_queue *q,
 	/* Zero-sector (unknown) and one-sector granularities are the same.  */
 	granularity = max(q->limits.discard_granularity >> 9, 1U);
 
-	max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9);
+	max_discard_sectors = min(q->limits.max_discard_sectors,
+			bio_allowed_max_sectors(q));
 	max_discard_sectors -= max_discard_sectors % granularity;
 
 	if (unlikely(!max_discard_sectors)) {
diff --git a/block/blk.h b/block/blk.h
index c85e53f21cdd8..0089fefdf771d 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -395,6 +395,16 @@ static inline unsigned long blk_rq_deadline(struct request *rq)
 	return rq->__deadline & ~0x1UL;
 }
 
+/*
+ * The max size one bio can handle is UINT_MAX becasue bvec_iter.bi_size
+ * is defined as 'unsigned int', meantime it has to aligned to with logical
+ * block size which is the minimum accepted unit by hardware.
+ */
+static inline unsigned int bio_allowed_max_sectors(struct request_queue *q)
+{
+	return round_down(UINT_MAX, queue_logical_block_size(q)) >> 9;
+}
+
 /*
  * Internal io_context interface
  */
-- 
GitLab


From ba5d73851e71847ba7f7f4c27a1a6e1f5ab91c79 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Mon, 29 Oct 2018 20:57:18 +0800
Subject: [PATCH 0880/1890] block: cleanup __blkdev_issue_discard()

Cleanup __blkdev_issue_discard() a bit:

- remove local variable of 'end_sect'
- remove code block of 'fail'

Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Xiao Ni <xni@redhat.com>
Cc: Mariusz Dabrowski <mariusz.dabrowski@intel.com>
Tested-by: Rui Salvaterra <rsalvaterra@gmail.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-lib.c | 23 ++++++-----------------
 1 file changed, 6 insertions(+), 17 deletions(-)

diff --git a/block/blk-lib.c b/block/blk-lib.c
index d56fd159d2e8f..d58d5d87dd887 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -51,15 +51,12 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 	if ((sector | nr_sects) & bs_mask)
 		return -EINVAL;
 
-	while (nr_sects) {
-		unsigned int req_sects = nr_sects;
-		sector_t end_sect;
-
-		if (!req_sects)
-			goto fail;
-		req_sects = min(req_sects, bio_allowed_max_sectors(q));
+	if (!nr_sects)
+		return -EINVAL;
 
-		end_sect = sector + req_sects;
+	while (nr_sects) {
+		unsigned int req_sects = min_t(unsigned int, nr_sects,
+				bio_allowed_max_sectors(q));
 
 		bio = blk_next_bio(bio, 0, gfp_mask);
 		bio->bi_iter.bi_sector = sector;
@@ -67,8 +64,8 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 		bio_set_op_attrs(bio, op, 0);
 
 		bio->bi_iter.bi_size = req_sects << 9;
+		sector += req_sects;
 		nr_sects -= req_sects;
-		sector = end_sect;
 
 		/*
 		 * We can loop for a long time in here, if someone does
@@ -81,14 +78,6 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 
 	*biop = bio;
 	return 0;
-
-fail:
-	if (bio) {
-		submit_bio_wait(bio);
-		bio_put(bio);
-	}
-	*biop = NULL;
-	return -EOPNOTSUPP;
 }
 EXPORT_SYMBOL(__blkdev_issue_discard);
 
-- 
GitLab


From 34ffec60b27aa81d04e274e71e4c6ef740f75fc7 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Mon, 29 Oct 2018 20:57:19 +0800
Subject: [PATCH 0881/1890] block: make sure writesame bio is aligned with
 logical block size

Obviously the created writesame bio has to be aligned with logical block
size, and use bio_allowed_max_sectors() to retrieve this number.

Cc: stable@vger.kernel.org
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Xiao Ni <xni@redhat.com>
Cc: Mariusz Dabrowski <mariusz.dabrowski@intel.com>
Fixes: b49a0871be31a745b2ef ("block: remove split code in blkdev_issue_{discard,write_same}")
Tested-by: Rui Salvaterra <rsalvaterra@gmail.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-lib.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/blk-lib.c b/block/blk-lib.c
index d58d5d87dd887..e8b3bb9bf3759 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -149,7 +149,7 @@ static int __blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
 		return -EOPNOTSUPP;
 
 	/* Ensure that max_write_same_sectors doesn't overflow bi_size */
-	max_write_same_sectors = UINT_MAX >> 9;
+	max_write_same_sectors = bio_allowed_max_sectors(q);
 
 	while (nr_sects) {
 		bio = blk_next_bio(bio, 1, gfp_mask);
-- 
GitLab


From 7fabaf303458fcabb694999d6fa772cc13d4e217 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Fri, 9 Nov 2018 15:52:16 +0100
Subject: [PATCH 0882/1890] fuse: fix leaked notify reply

fuse_request_send_notify_reply() may fail if the connection was reset for
some reason (e.g. fs was unmounted).  Don't leak request reference in this
case.  Besides leaking memory, this resulted in fc->num_waiting not being
decremented and hence fuse_wait_aborted() left in a hanging and unkillable
state.

Fixes: 2d45ba381a74 ("fuse: add retrieve request")
Fixes: b8f95e5d13f5 ("fuse: umount should wait for all requests")
Reported-and-tested-by: syzbot+6339eda9cb4ebbc4c37b@syzkaller.appspotmail.com
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Cc: <stable@vger.kernel.org> #v2.6.36
---
 fs/fuse/dev.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index ae813e6099321..6fe330cc97091 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1768,8 +1768,10 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
 	req->in.args[1].size = total_len;
 
 	err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
-	if (err)
+	if (err) {
 		fuse_retrieve_end(fc, req);
+		fuse_put_request(fc, req);
+	}
 
 	return err;
 }
-- 
GitLab


From 2d84a2d19b6150c6dbac1e6ebad9c82e4c123772 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Fri, 9 Nov 2018 15:52:16 +0100
Subject: [PATCH 0883/1890] fuse: fix possibly missed wake-up after abort

In current fuse_drop_waiting() implementation it's possible that
fuse_wait_aborted() will not be woken up in the unlikely case that
fuse_abort_conn() + fuse_wait_aborted() runs in between checking
fc->connected and calling atomic_dec(&fc->num_waiting).

Do the atomic_dec_and_test() unconditionally, which also provides the
necessary barrier against reordering with the fc->connected check.

The explicit smp_mb() in fuse_wait_aborted() is not actually needed, since
the spin_unlock() in fuse_abort_conn() provides the necessary RELEASE
barrier after resetting fc->connected.  However, this is not a performance
sensitive path, and adding the explicit barrier makes it easier to
document.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Fixes: b8f95e5d13f5 ("fuse: umount should wait for all requests")
Cc: <stable@vger.kernel.org> #v4.19
---
 fs/fuse/dev.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 6fe330cc97091..a5e516a40e7a3 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -165,9 +165,13 @@ static bool fuse_block_alloc(struct fuse_conn *fc, bool for_background)
 
 static void fuse_drop_waiting(struct fuse_conn *fc)
 {
-	if (fc->connected) {
-		atomic_dec(&fc->num_waiting);
-	} else if (atomic_dec_and_test(&fc->num_waiting)) {
+	/*
+	 * lockess check of fc->connected is okay, because atomic_dec_and_test()
+	 * provides a memory barrier mached with the one in fuse_wait_aborted()
+	 * to ensure no wake-up is missed.
+	 */
+	if (atomic_dec_and_test(&fc->num_waiting) &&
+	    !READ_ONCE(fc->connected)) {
 		/* wake up aborters */
 		wake_up_all(&fc->blocked_waitq);
 	}
@@ -2221,6 +2225,8 @@ EXPORT_SYMBOL_GPL(fuse_abort_conn);
 
 void fuse_wait_aborted(struct fuse_conn *fc)
 {
+	/* matches implicit memory barrier in fuse_drop_waiting() */
+	smp_mb();
 	wait_event(fc->blocked_waitq, atomic_read(&fc->num_waiting) == 0);
 }
 
-- 
GitLab


From ebacb81273599555a7a19f7754a1451206a5fc4f Mon Sep 17 00:00:00 2001
From: Lukas Czerner <lczerner@redhat.com>
Date: Fri, 9 Nov 2018 14:51:46 +0100
Subject: [PATCH 0884/1890] fuse: fix use-after-free in fuse_direct_IO()

In async IO blocking case the additional reference to the io is taken for
it to survive fuse_aio_complete(). In non blocking case this additional
reference is not needed, however we still reference io to figure out
whether to wait for completion or not. This is wrong and will lead to
use-after-free. Fix it by storing blocking information in separate
variable.

This was spotted by KASAN when running generic/208 fstest.

Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Reported-by: Zorro Lang <zlang@redhat.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Fixes: 744742d692e3 ("fuse: Add reference counting for fuse_io_priv")
Cc: <stable@vger.kernel.org> # v4.6
---
 fs/fuse/file.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index cc2121b37bf5f..b52f9baaa3e7b 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -2924,10 +2924,12 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 	}
 
 	if (io->async) {
+		bool blocking = io->blocking;
+
 		fuse_aio_complete(io, ret < 0 ? ret : 0, -1);
 
 		/* we have a non-extending, async request, so return */
-		if (!io->blocking)
+		if (!blocking)
 			return -EIOCBQUEUED;
 
 		wait_for_completion(&wait);
-- 
GitLab


From 278df5e5527b633f4882f1680ad58b62a7c07bfe Mon Sep 17 00:00:00 2001
From: Vasily Khoruzhick <anarsoul@gmail.com>
Date: Thu, 8 Nov 2018 20:31:35 -0800
Subject: [PATCH 0885/1890] ASoC: sun8i-codec: fix crash on module removal

drvdata is actually sun8i_codec, not snd_soc_card, so it crashes
when calling snd_soc_card_get_drvdata().

Drop card and scodec vars anyway since we don't need to
disable/unprepare clocks - it's already done by calling
runtime_suspend()

Drop clk_disable_unprepare() calls for the same reason.

Fixes: 36c684936fae7 ("ASoC: Add sun8i digital audio codec")
Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com>
Acked-by: Maxime Ripard <maxime.ripard@bootlin.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sunxi/sun8i-codec.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/sound/soc/sunxi/sun8i-codec.c b/sound/soc/sunxi/sun8i-codec.c
index 522a72fde78da..c4196d4e59159 100644
--- a/sound/soc/sunxi/sun8i-codec.c
+++ b/sound/soc/sunxi/sun8i-codec.c
@@ -605,16 +605,10 @@ static int sun8i_codec_probe(struct platform_device *pdev)
 
 static int sun8i_codec_remove(struct platform_device *pdev)
 {
-	struct snd_soc_card *card = platform_get_drvdata(pdev);
-	struct sun8i_codec *scodec = snd_soc_card_get_drvdata(card);
-
 	pm_runtime_disable(&pdev->dev);
 	if (!pm_runtime_status_suspended(&pdev->dev))
 		sun8i_codec_runtime_suspend(&pdev->dev);
 
-	clk_disable_unprepare(scodec->clk_module);
-	clk_disable_unprepare(scodec->clk_bus);
-
 	return 0;
 }
 
-- 
GitLab


From d3132b3860f6cf35ff7609a76bbcdbb814bd027c Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Thu, 8 Nov 2018 08:35:06 +0100
Subject: [PATCH 0886/1890] xen: fix xen_qlock_wait()

Commit a856531951dc80 ("xen: make xen_qlock_wait() nestable")
introduced a regression for Xen guests running fully virtualized
(HVM or PVH mode). The Xen hypervisor wouldn't return from the poll
hypercall with interrupts disabled in case of an interrupt (for PV
guests it does).

So instead of disabling interrupts in xen_qlock_wait() use a nesting
counter to avoid calling xen_clear_irq_pending() in case
xen_qlock_wait() is nested.

Fixes: a856531951dc80 ("xen: make xen_qlock_wait() nestable")
Cc: stable@vger.kernel.org
Reported-by: Sander Eikelenboom <linux@eikelenboom.it>
Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Tested-by: Sander Eikelenboom <linux@eikelenboom.it>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/x86/xen/spinlock.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 441c882621698..1c8a8816a402a 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -9,6 +9,7 @@
 #include <linux/log2.h>
 #include <linux/gfp.h>
 #include <linux/slab.h>
+#include <linux/atomic.h>
 
 #include <asm/paravirt.h>
 #include <asm/qspinlock.h>
@@ -21,6 +22,7 @@
 
 static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
 static DEFINE_PER_CPU(char *, irq_name);
+static DEFINE_PER_CPU(atomic_t, xen_qlock_wait_nest);
 static bool xen_pvspin = true;
 
 static void xen_qlock_kick(int cpu)
@@ -39,25 +41,25 @@ static void xen_qlock_kick(int cpu)
  */
 static void xen_qlock_wait(u8 *byte, u8 val)
 {
-	unsigned long flags;
 	int irq = __this_cpu_read(lock_kicker_irq);
+	atomic_t *nest_cnt = this_cpu_ptr(&xen_qlock_wait_nest);
 
 	/* If kicker interrupts not initialized yet, just spin */
 	if (irq == -1 || in_nmi())
 		return;
 
-	/* Guard against reentry. */
-	local_irq_save(flags);
+	/* Detect reentry. */
+	atomic_inc(nest_cnt);
 
-	/* If irq pending already clear it. */
-	if (xen_test_irq_pending(irq)) {
+	/* If irq pending already and no nested call clear it. */
+	if (atomic_read(nest_cnt) == 1 && xen_test_irq_pending(irq)) {
 		xen_clear_irq_pending(irq);
 	} else if (READ_ONCE(*byte) == val) {
 		/* Block until irq becomes pending (or a spurious wakeup) */
 		xen_poll_irq(irq);
 	}
 
-	local_irq_restore(flags);
+	atomic_dec(nest_cnt);
 }
 
 static irqreturn_t dummy_handler(int irq, void *dev_id)
-- 
GitLab


From 3941552aec1e04d63999988a057ae09a1c56ebeb Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Thu, 1 Nov 2018 13:33:07 +0100
Subject: [PATCH 0887/1890] xen: remove size limit of privcmd-buf mapping
 interface

Currently the size of hypercall buffers allocated via
/dev/xen/hypercall is limited to a default of 64 memory pages. For live
migration of guests this might be too small as the page dirty bitmask
needs to be sized according to the size of the guest. This means
migrating a 8GB sized guest is already exhausting the default buffer
size for the dirty bitmap.

There is no sensible way to set a sane limit, so just remove it
completely. The device node's usage is limited to root anyway, so there
is no additional DOS scenario added by allowing unlimited buffers.

While at it make the error path for the -ENOMEM case a little bit
cleaner by setting n_pages to the number of successfully allocated
pages instead of the target size.

Fixes: c51b3c639e01f2 ("xen: add new hypercall buffer mapping device")
Cc: <stable@vger.kernel.org> #4.18
Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 drivers/xen/privcmd-buf.c | 22 ++++------------------
 1 file changed, 4 insertions(+), 18 deletions(-)

diff --git a/drivers/xen/privcmd-buf.c b/drivers/xen/privcmd-buf.c
index df1ed37c3269e..de01a6d0059dc 100644
--- a/drivers/xen/privcmd-buf.c
+++ b/drivers/xen/privcmd-buf.c
@@ -21,15 +21,9 @@
 
 MODULE_LICENSE("GPL");
 
-static unsigned int limit = 64;
-module_param(limit, uint, 0644);
-MODULE_PARM_DESC(limit, "Maximum number of pages that may be allocated by "
-			"the privcmd-buf device per open file");
-
 struct privcmd_buf_private {
 	struct mutex lock;
 	struct list_head list;
-	unsigned int allocated;
 };
 
 struct privcmd_buf_vma_private {
@@ -60,13 +54,10 @@ static void privcmd_buf_vmapriv_free(struct privcmd_buf_vma_private *vma_priv)
 {
 	unsigned int i;
 
-	vma_priv->file_priv->allocated -= vma_priv->n_pages;
-
 	list_del(&vma_priv->list);
 
 	for (i = 0; i < vma_priv->n_pages; i++)
-		if (vma_priv->pages[i])
-			__free_page(vma_priv->pages[i]);
+		__free_page(vma_priv->pages[i]);
 
 	kfree(vma_priv);
 }
@@ -146,8 +137,7 @@ static int privcmd_buf_mmap(struct file *file, struct vm_area_struct *vma)
 	unsigned int i;
 	int ret = 0;
 
-	if (!(vma->vm_flags & VM_SHARED) || count > limit ||
-	    file_priv->allocated + count > limit)
+	if (!(vma->vm_flags & VM_SHARED))
 		return -EINVAL;
 
 	vma_priv = kzalloc(sizeof(*vma_priv) + count * sizeof(void *),
@@ -155,19 +145,15 @@ static int privcmd_buf_mmap(struct file *file, struct vm_area_struct *vma)
 	if (!vma_priv)
 		return -ENOMEM;
 
-	vma_priv->n_pages = count;
-	count = 0;
-	for (i = 0; i < vma_priv->n_pages; i++) {
+	for (i = 0; i < count; i++) {
 		vma_priv->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
 		if (!vma_priv->pages[i])
 			break;
-		count++;
+		vma_priv->n_pages++;
 	}
 
 	mutex_lock(&file_priv->lock);
 
-	file_priv->allocated += count;
-
 	vma_priv->file_priv = file_priv;
 	vma_priv->users = 1;
 
-- 
GitLab


From a43608fa77213ad5ac5f75994254b9f65d57cfa0 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Wed, 24 Oct 2018 10:27:12 +0200
Subject: [PATCH 0888/1890] can: raw: check for CAN FD capable netdev in
 raw_sendmsg()

When the socket is CAN FD enabled it can handle CAN FD frame
transmissions.  Add an additional check in raw_sendmsg() as a CAN2.0 CAN
driver (non CAN FD) should never see a CAN FD frame. Due to the commonly
used can_dropped_invalid_skb() function the CAN 2.0 driver would drop
that CAN FD frame anyway - but with this patch the user gets a proper
-EINVAL return code.

Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/raw.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/net/can/raw.c b/net/can/raw.c
index 1051eee825818..3aab7664933fd 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -745,18 +745,19 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 	} else
 		ifindex = ro->ifindex;
 
-	if (ro->fd_frames) {
+	dev = dev_get_by_index(sock_net(sk), ifindex);
+	if (!dev)
+		return -ENXIO;
+
+	err = -EINVAL;
+	if (ro->fd_frames && dev->mtu == CANFD_MTU) {
 		if (unlikely(size != CANFD_MTU && size != CAN_MTU))
-			return -EINVAL;
+			goto put_dev;
 	} else {
 		if (unlikely(size != CAN_MTU))
-			return -EINVAL;
+			goto put_dev;
 	}
 
-	dev = dev_get_by_index(sock_net(sk), ifindex);
-	if (!dev)
-		return -ENXIO;
-
 	skb = sock_alloc_send_skb(sk, size + sizeof(struct can_skb_priv),
 				  msg->msg_flags & MSG_DONTWAIT, &err);
 	if (!skb)
-- 
GitLab


From 95217260649aa504eb5d4a0d50959ca4e67c8f96 Mon Sep 17 00:00:00 2001
From: Jimmy Assarsson <jimmyassarsson@gmail.com>
Date: Mon, 6 Aug 2018 15:14:50 +0200
Subject: [PATCH 0889/1890] can: kvaser_usb: Fix potential uninitialized
 variable use

If alloc_can_err_skb() fails, cf is never initialized.
Move assignment of cf inside check.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Jimmy Assarsson <jimmyassarsson@gmail.com>
Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c
index c084bae5ec0a4..5fc0be5642743 100644
--- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c
+++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c
@@ -1019,6 +1019,11 @@ kvaser_usb_hydra_error_frame(struct kvaser_usb_net_priv *priv,
 					new_state : CAN_STATE_ERROR_ACTIVE;
 
 			can_change_state(netdev, cf, tx_state, rx_state);
+
+			if (priv->can.restart_ms &&
+			    old_state >= CAN_STATE_BUS_OFF &&
+			    new_state < CAN_STATE_BUS_OFF)
+				cf->can_id |= CAN_ERR_RESTARTED;
 		}
 
 		if (new_state == CAN_STATE_BUS_OFF) {
@@ -1028,11 +1033,6 @@ kvaser_usb_hydra_error_frame(struct kvaser_usb_net_priv *priv,
 
 			can_bus_off(netdev);
 		}
-
-		if (priv->can.restart_ms &&
-		    old_state >= CAN_STATE_BUS_OFF &&
-		    new_state < CAN_STATE_BUS_OFF)
-			cf->can_id |= CAN_ERR_RESTARTED;
 	}
 
 	if (!skb) {
-- 
GitLab


From e13fb9b37cc00616b90df2d620f30345b5ada6ff Mon Sep 17 00:00:00 2001
From: Jimmy Assarsson <jimmyassarsson@gmail.com>
Date: Mon, 6 Aug 2018 15:14:49 +0200
Subject: [PATCH 0890/1890] can: kvaser_usb: Fix accessing freed memory in
 kvaser_usb_start_xmit()

The call to can_put_echo_skb() may result in the skb being freed. The skb
is later used in the call to dev->ops->dev_frame_to_cmd().

This is avoided by moving the call to can_put_echo_skb() after
dev->ops->dev_frame_to_cmd().

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Jimmy Assarsson <jimmyassarsson@gmail.com>
Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c
index b939a4c10b840..c89c7d4900d75 100644
--- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c
+++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c
@@ -528,7 +528,6 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb,
 			context = &priv->tx_contexts[i];
 
 			context->echo_index = i;
-			can_put_echo_skb(skb, netdev, context->echo_index);
 			++priv->active_tx_contexts;
 			if (priv->active_tx_contexts >= (int)dev->max_tx_urbs)
 				netif_stop_queue(netdev);
@@ -553,7 +552,6 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb,
 		dev_kfree_skb(skb);
 		spin_lock_irqsave(&priv->tx_contexts_lock, flags);
 
-		can_free_echo_skb(netdev, context->echo_index);
 		context->echo_index = dev->max_tx_urbs;
 		--priv->active_tx_contexts;
 		netif_wake_queue(netdev);
@@ -564,6 +562,8 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb,
 
 	context->priv = priv;
 
+	can_put_echo_skb(skb, netdev, context->echo_index);
+
 	usb_fill_bulk_urb(urb, dev->udev,
 			  usb_sndbulkpipe(dev->udev,
 					  dev->bulk_out->bEndpointAddress),
-- 
GitLab


From 207681fc5f3d5d398f106d1ae0080fc2373f707a Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Wed, 29 Aug 2018 01:46:54 +0000
Subject: [PATCH 0891/1890] can: ucan: remove set but not used variable 'udev'

Fixes gcc '-Wunused-but-set-variable' warning:

drivers/net/can/usb/ucan.c: In function 'ucan_disconnect':
drivers/net/can/usb/ucan.c:1578:21: warning:
 variable 'udev' set but not used [-Wunused-but-set-variable]
  struct usb_device *udev;

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Reviewed-by: Martin Elshuber <martin.elshuber@theobroma-systems.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/ucan.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/can/usb/ucan.c b/drivers/net/can/usb/ucan.c
index 0678a38b1af45..c9fd83e8d9477 100644
--- a/drivers/net/can/usb/ucan.c
+++ b/drivers/net/can/usb/ucan.c
@@ -1575,11 +1575,8 @@ static int ucan_probe(struct usb_interface *intf,
 /* disconnect the device */
 static void ucan_disconnect(struct usb_interface *intf)
 {
-	struct usb_device *udev;
 	struct ucan_priv *up = usb_get_intfdata(intf);
 
-	udev = interface_to_usbdev(intf);
-
 	usb_set_intfdata(intf, NULL);
 
 	if (up) {
-- 
GitLab


From ff1f19d56c200b35eb07cfa6668aa6dcac198cec Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Wed, 29 Aug 2018 01:25:45 +0000
Subject: [PATCH 0892/1890] can: ucan: remove duplicated include from ucan.c

Remove duplicated include.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Reviewed-by: Martin Elshuber <martin.elshuber@theobroma-systems.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/ucan.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/net/can/usb/ucan.c b/drivers/net/can/usb/ucan.c
index c9fd83e8d9477..f3d5bda012a10 100644
--- a/drivers/net/can/usb/ucan.c
+++ b/drivers/net/can/usb/ucan.c
@@ -35,10 +35,6 @@
 #include <linux/slab.h>
 #include <linux/usb.h>
 
-#include <linux/can.h>
-#include <linux/can/dev.h>
-#include <linux/can/error.h>
-
 #define UCAN_DRIVER_NAME "ucan"
 #define UCAN_MAX_RX_URBS 8
 /* the CAN controller needs a while to enable/disable the bus */
-- 
GitLab


From 4f145f14f6b98b5aa0dd91bdae518b3f24f74b37 Mon Sep 17 00:00:00 2001
From: Eugeniu Rosca <rosca.eugeniu@gmail.com>
Date: Mon, 20 Aug 2018 16:49:10 +0200
Subject: [PATCH 0893/1890] dt-bindings: can: rcar_can: document r8a77965
 support

Document the support for rcar_can on R8A77965 SoC devices.
Add R8A77965 to the list of SoCs which require the "assigned-clocks" and
"assigned-clock-rates" properties (thanks, Sergei).

Signed-off-by: Eugeniu Rosca <erosca@de.adit-jv.com>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
Reviewed-by: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 Documentation/devicetree/bindings/net/can/rcar_can.txt | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/can/rcar_can.txt b/Documentation/devicetree/bindings/net/can/rcar_can.txt
index cc4372842bf37..47fc68148f38e 100644
--- a/Documentation/devicetree/bindings/net/can/rcar_can.txt
+++ b/Documentation/devicetree/bindings/net/can/rcar_can.txt
@@ -14,6 +14,7 @@ Required properties:
 	      "renesas,can-r8a7794" if CAN controller is a part of R8A7794 SoC.
 	      "renesas,can-r8a7795" if CAN controller is a part of R8A7795 SoC.
 	      "renesas,can-r8a7796" if CAN controller is a part of R8A7796 SoC.
+	      "renesas,can-r8a77965" if CAN controller is a part of R8A77965 SoC.
 	      "renesas,rcar-gen1-can" for a generic R-Car Gen1 compatible device.
 	      "renesas,rcar-gen2-can" for a generic R-Car Gen2 or RZ/G1
 	      compatible device.
@@ -29,11 +30,10 @@ Required properties:
 - pinctrl-0: pin control group to be used for this controller.
 - pinctrl-names: must be "default".
 
-Required properties for "renesas,can-r8a7795" and "renesas,can-r8a7796"
-compatible:
-In R8A7795 and R8A7796 SoCs, "clkp2" can be CANFD clock. This is a div6 clock
-and can be used by both CAN and CAN FD controller at the same time. It needs to
-be scaled to maximum frequency if any of these controllers use it. This is done
+Required properties for R8A7795, R8A7796 and R8A77965:
+For the denoted SoCs, "clkp2" can be CANFD clock. This is a div6 clock and can
+be used by both CAN and CAN FD controller at the same time. It needs to be
+scaled to maximum frequency if any of these controllers use it. This is done
 using the below properties:
 
 - assigned-clocks: phandle of clkp2(CANFD) clock.
-- 
GitLab


From 68c8d209cd4337da4fa04c672f0b62bb735969bc Mon Sep 17 00:00:00 2001
From: Fabrizio Castro <fabrizio.castro@bp.renesas.com>
Date: Mon, 10 Sep 2018 11:43:13 +0100
Subject: [PATCH 0894/1890] can: rcar_can: Fix erroneous registration

Assigning 2 to "renesas,can-clock-select" tricks the driver into
registering the CAN interface, even though we don't want that.
This patch improves one of the checks to prevent that from happening.

Fixes: 862e2b6af9413b43 ("can: rcar_can: support all input clocks")
Signed-off-by: Fabrizio Castro <fabrizio.castro@bp.renesas.com>
Signed-off-by: Chris Paterson <Chris.Paterson2@renesas.com>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/rcar/rcar_can.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/can/rcar/rcar_can.c b/drivers/net/can/rcar/rcar_can.c
index 11662f479e760..771a460837397 100644
--- a/drivers/net/can/rcar/rcar_can.c
+++ b/drivers/net/can/rcar/rcar_can.c
@@ -24,6 +24,9 @@
 
 #define RCAR_CAN_DRV_NAME	"rcar_can"
 
+#define RCAR_SUPPORTED_CLOCKS	(BIT(CLKR_CLKP1) | BIT(CLKR_CLKP2) | \
+				 BIT(CLKR_CLKEXT))
+
 /* Mailbox configuration:
  * mailbox 60 - 63 - Rx FIFO mailboxes
  * mailbox 56 - 59 - Tx FIFO mailboxes
@@ -789,7 +792,7 @@ static int rcar_can_probe(struct platform_device *pdev)
 		goto fail_clk;
 	}
 
-	if (clock_select >= ARRAY_SIZE(clock_names)) {
+	if (!(BIT(clock_select) & RCAR_SUPPORTED_CLOCKS)) {
 		err = -EINVAL;
 		dev_err(&pdev->dev, "invalid CAN clock selected\n");
 		goto fail_clk;
-- 
GitLab


From 868b7c0f43e61f227bf3d7f7d6134bb3c67bb0e8 Mon Sep 17 00:00:00 2001
From: Fabrizio Castro <fabrizio.castro@bp.renesas.com>
Date: Mon, 10 Sep 2018 11:43:14 +0100
Subject: [PATCH 0895/1890] dt-bindings: can: rcar_can: Add r8a774a1 support

Document RZ/G2M (r8a774a1) SoC specific bindings.

Signed-off-by: Fabrizio Castro <fabrizio.castro@bp.renesas.com>
Signed-off-by: Chris Paterson <Chris.Paterson2@renesas.com>
Reviewed-by: Biju Das <biju.das@bp.renesas.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 .../devicetree/bindings/net/can/rcar_can.txt   | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/can/rcar_can.txt b/Documentation/devicetree/bindings/net/can/rcar_can.txt
index 47fc68148f38e..9936b9ee67c36 100644
--- a/Documentation/devicetree/bindings/net/can/rcar_can.txt
+++ b/Documentation/devicetree/bindings/net/can/rcar_can.txt
@@ -5,6 +5,7 @@ Required properties:
 - compatible: "renesas,can-r8a7743" if CAN controller is a part of R8A7743 SoC.
 	      "renesas,can-r8a7744" if CAN controller is a part of R8A7744 SoC.
 	      "renesas,can-r8a7745" if CAN controller is a part of R8A7745 SoC.
+	      "renesas,can-r8a774a1" if CAN controller is a part of R8A774A1 SoC.
 	      "renesas,can-r8a7778" if CAN controller is a part of R8A7778 SoC.
 	      "renesas,can-r8a7779" if CAN controller is a part of R8A7779 SoC.
 	      "renesas,can-r8a7790" if CAN controller is a part of R8A7790 SoC.
@@ -18,15 +19,21 @@ Required properties:
 	      "renesas,rcar-gen1-can" for a generic R-Car Gen1 compatible device.
 	      "renesas,rcar-gen2-can" for a generic R-Car Gen2 or RZ/G1
 	      compatible device.
-	      "renesas,rcar-gen3-can" for a generic R-Car Gen3 compatible device.
+	      "renesas,rcar-gen3-can" for a generic R-Car Gen3 or RZ/G2
+	      compatible device.
 	      When compatible with the generic version, nodes must list the
 	      SoC-specific version corresponding to the platform first
 	      followed by the generic version.
 
 - reg: physical base address and size of the R-Car CAN register map.
 - interrupts: interrupt specifier for the sole interrupt.
-- clocks: phandles and clock specifiers for 3 CAN clock inputs.
-- clock-names: 3 clock input name strings: "clkp1", "clkp2", "can_clk".
+- clocks: phandles and clock specifiers for 2 CAN clock inputs for RZ/G2
+	  devices.
+	  phandles and clock specifiers for 3 CAN clock inputs for every other
+	  SoC.
+- clock-names: 2 clock input name strings for RZ/G2: "clkp1", "can_clk".
+	       3 clock input name strings for every other SoC: "clkp1", "clkp2",
+	       "can_clk".
 - pinctrl-0: pin control group to be used for this controller.
 - pinctrl-names: must be "default".
 
@@ -42,8 +49,9 @@ using the below properties:
 Optional properties:
 - renesas,can-clock-select: R-Car CAN Clock Source Select. Valid values are:
 			    <0x0> (default) : Peripheral clock (clkp1)
-			    <0x1> : Peripheral clock (clkp2)
-			    <0x3> : Externally input clock
+			    <0x1> : Peripheral clock (clkp2) (not supported by
+				    RZ/G2 devices)
+			    <0x3> : External input clock
 
 Example
 -------
-- 
GitLab


From f164d0204b1156a7e0d8d1622c1a8d25752befec Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Sat, 27 Oct 2018 10:36:54 +0200
Subject: [PATCH 0896/1890] can: hi311x: Use level-triggered interrupt

If the hi3110 shares the SPI bus with another traffic-intensive device
and packets are received in high volume (by a separate machine sending
with "cangen -g 0 -i -x"), reception stops after a few minutes and the
counter in /proc/interrupts stops incrementing.  Bus state is "active".
Bringing the interface down and back up reconvenes the reception.  The
issue is not observed when the hi3110 is the sole device on the SPI bus.

Using a level-triggered interrupt makes the issue go away and lets the
hi3110 successfully receive 2 GByte over the course of 5 days while a
ks8851 Ethernet chip on the same SPI bus handles 6 GByte of traffic.

Unfortunately the hi3110 datasheet is mum on the trigger type.  The pin
description on page 3 only specifies the polarity (active high):
http://www.holtic.com/documents/371-hi-3110_v-rev-kpdf.do

Cc: Mathias Duckeck <m.duckeck@kunbus.de>
Cc: Akshay Bhat <akshay.bhat@timesys.com>
Cc: Casey Fitzpatrick <casey.fitzpatrick@timesys.com>
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 Documentation/devicetree/bindings/net/can/holt_hi311x.txt | 2 +-
 drivers/net/can/spi/hi311x.c                              | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/can/holt_hi311x.txt b/Documentation/devicetree/bindings/net/can/holt_hi311x.txt
index 903a78da65be2..3a9926f999370 100644
--- a/Documentation/devicetree/bindings/net/can/holt_hi311x.txt
+++ b/Documentation/devicetree/bindings/net/can/holt_hi311x.txt
@@ -17,7 +17,7 @@ Example:
 		reg = <1>;
 		clocks = <&clk32m>;
 		interrupt-parent = <&gpio4>;
-		interrupts = <13 IRQ_TYPE_EDGE_RISING>;
+		interrupts = <13 IRQ_TYPE_LEVEL_HIGH>;
 		vdd-supply = <&reg5v0>;
 		xceiver-supply = <&reg5v0>;
 	};
diff --git a/drivers/net/can/spi/hi311x.c b/drivers/net/can/spi/hi311x.c
index 53e320c92a8be..ddaf46239e39e 100644
--- a/drivers/net/can/spi/hi311x.c
+++ b/drivers/net/can/spi/hi311x.c
@@ -760,7 +760,7 @@ static int hi3110_open(struct net_device *net)
 {
 	struct hi3110_priv *priv = netdev_priv(net);
 	struct spi_device *spi = priv->spi;
-	unsigned long flags = IRQF_ONESHOT | IRQF_TRIGGER_RISING;
+	unsigned long flags = IRQF_ONESHOT | IRQF_TRIGGER_HIGH;
 	int ret;
 
 	ret = open_candev(net);
-- 
GitLab


From 5178b7cd8e42448b1041716f124734eaaa36ca50 Mon Sep 17 00:00:00 2001
From: Pankaj Bansal <pankaj.bansal@nxp.com>
Date: Wed, 1 Aug 2018 19:36:46 +0530
Subject: [PATCH 0897/1890] can: flexcan: Unlock the MB unconditionally

Unlock the MB irrespective of reception method being FIFO or timestamp
based. It is optional but recommended to unlock Mailbox as soon as
possible and make it available for reception.

Reported-by: Alexander Stein <alexander.stein@systec-electronic.com>
Signed-off-by: Pankaj Bansal <pankaj.bansal@nxp.com>
Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/flexcan.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index 8e972ef086376..0431f8d05518a 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -720,9 +720,14 @@ static unsigned int flexcan_mailbox_read(struct can_rx_offload *offload,
 			priv->write(BIT(n - 32), &regs->iflag2);
 	} else {
 		priv->write(FLEXCAN_IFLAG_RX_FIFO_AVAILABLE, &regs->iflag1);
-		priv->read(&regs->timer);
 	}
 
+	/* Read the Free Running Timer. It is optional but recommended
+	 * to unlock Mailbox as soon as possible and make it available
+	 * for reception.
+	 */
+	priv->read(&regs->timer);
+
 	return 1;
 }
 
-- 
GitLab


From cbffaf7aa09edbaea2bc7dc440c945297095e2fd Mon Sep 17 00:00:00 2001
From: Alexander Stein <alexander.stein@systec-electronic.com>
Date: Thu, 11 Oct 2018 17:01:25 +0200
Subject: [PATCH 0898/1890] can: flexcan: Always use last mailbox for TX

Essentially this patch moves the TX mailbox to position 63, regardless
of timestamp based offloading or RX FIFO. So mainly the iflag register
usage regarding TX has changed. The rest is consolidating RX FIFO and
timestamp offloading as they now use both the same TX mailbox.

The reason is a very annoying behavior regarding sending RTR frames when
_not_ using RX FIFO:

If a TX mailbox sent a RTR frame it becomes a RX mailbox. For that
reason flexcan_irq disables the TX mailbox again. But if during the time
the RTR was sent and the TX mailbox is disabled a new CAN frames is
received, it is lost without notice. The reason is that so-called
"Move-in" process starts from the lowest mailbox which happen to be a TX
mailbox set to EMPTY.

Steps to reproduce (I used an imx7d):
1. generate regular bursts of messages
2. send a RTR from flexcan with higher priority than burst messages every
   1ms, e.g. cangen -I 0x100 -L 0 -g 1 -R can0
3. notice a lost message without notification after some seconds

When running an iperf in parallel this problem is occurring even more
frequently. Using filters is not possible as at least one single CAN-ID
is allowed. Handling the TX MB during RX is also not possible as there
is no race-free disable of RX MB.

There is still a slight window when the described problem can occur. But
for that all RX MB must be in use which is essentially next to an
overrun. Still there will be no indication if it ever occurs.

Signed-off-by: Alexander Stein <alexander.stein@systec-electronic.com>
Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/flexcan.c | 67 +++++++++++++++++++--------------------
 1 file changed, 33 insertions(+), 34 deletions(-)

diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index 0431f8d05518a..677c41701cf3c 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -135,13 +135,12 @@
 
 /* FLEXCAN interrupt flag register (IFLAG) bits */
 /* Errata ERR005829 step7: Reserve first valid MB */
-#define FLEXCAN_TX_MB_RESERVED_OFF_FIFO	8
-#define FLEXCAN_TX_MB_OFF_FIFO		9
+#define FLEXCAN_TX_MB_RESERVED_OFF_FIFO		8
 #define FLEXCAN_TX_MB_RESERVED_OFF_TIMESTAMP	0
-#define FLEXCAN_TX_MB_OFF_TIMESTAMP		1
-#define FLEXCAN_RX_MB_OFF_TIMESTAMP_FIRST	(FLEXCAN_TX_MB_OFF_TIMESTAMP + 1)
-#define FLEXCAN_RX_MB_OFF_TIMESTAMP_LAST	63
-#define FLEXCAN_IFLAG_MB(x)		BIT(x)
+#define FLEXCAN_TX_MB				63
+#define FLEXCAN_RX_MB_OFF_TIMESTAMP_FIRST	(FLEXCAN_TX_MB_RESERVED_OFF_TIMESTAMP + 1)
+#define FLEXCAN_RX_MB_OFF_TIMESTAMP_LAST	(FLEXCAN_TX_MB - 1)
+#define FLEXCAN_IFLAG_MB(x)		BIT(x & 0x1f)
 #define FLEXCAN_IFLAG_RX_FIFO_OVERFLOW	BIT(7)
 #define FLEXCAN_IFLAG_RX_FIFO_WARN	BIT(6)
 #define FLEXCAN_IFLAG_RX_FIFO_AVAILABLE	BIT(5)
@@ -737,9 +736,9 @@ static inline u64 flexcan_read_reg_iflag_rx(struct flexcan_priv *priv)
 	struct flexcan_regs __iomem *regs = priv->regs;
 	u32 iflag1, iflag2;
 
-	iflag2 = priv->read(&regs->iflag2) & priv->reg_imask2_default;
-	iflag1 = priv->read(&regs->iflag1) & priv->reg_imask1_default &
+	iflag2 = priv->read(&regs->iflag2) & priv->reg_imask2_default &
 		~FLEXCAN_IFLAG_MB(priv->tx_mb_idx);
+	iflag1 = priv->read(&regs->iflag1) & priv->reg_imask1_default;
 
 	return (u64)iflag2 << 32 | iflag1;
 }
@@ -751,11 +750,9 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id)
 	struct flexcan_priv *priv = netdev_priv(dev);
 	struct flexcan_regs __iomem *regs = priv->regs;
 	irqreturn_t handled = IRQ_NONE;
-	u32 reg_iflag1, reg_esr;
+	u32 reg_iflag2, reg_esr;
 	enum can_state last_state = priv->can.state;
 
-	reg_iflag1 = priv->read(&regs->iflag1);
-
 	/* reception interrupt */
 	if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) {
 		u64 reg_iflag;
@@ -769,6 +766,9 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id)
 				break;
 		}
 	} else {
+		u32 reg_iflag1;
+
+		reg_iflag1 = priv->read(&regs->iflag1);
 		if (reg_iflag1 & FLEXCAN_IFLAG_RX_FIFO_AVAILABLE) {
 			handled = IRQ_HANDLED;
 			can_rx_offload_irq_offload_fifo(&priv->offload);
@@ -784,8 +784,10 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id)
 		}
 	}
 
+	reg_iflag2 = priv->read(&regs->iflag2);
+
 	/* transmission complete interrupt */
-	if (reg_iflag1 & FLEXCAN_IFLAG_MB(priv->tx_mb_idx)) {
+	if (reg_iflag2 & FLEXCAN_IFLAG_MB(priv->tx_mb_idx)) {
 		handled = IRQ_HANDLED;
 		stats->tx_bytes += can_get_echo_skb(dev, 0);
 		stats->tx_packets++;
@@ -794,7 +796,7 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id)
 		/* after sending a RTR frame MB is in RX mode */
 		priv->write(FLEXCAN_MB_CODE_TX_INACTIVE,
 			    &priv->tx_mb->can_ctrl);
-		priv->write(FLEXCAN_IFLAG_MB(priv->tx_mb_idx), &regs->iflag1);
+		priv->write(FLEXCAN_IFLAG_MB(priv->tx_mb_idx), &regs->iflag2);
 		netif_wake_queue(dev);
 	}
 
@@ -936,15 +938,13 @@ static int flexcan_chip_start(struct net_device *dev)
 	reg_mcr &= ~FLEXCAN_MCR_MAXMB(0xff);
 	reg_mcr |= FLEXCAN_MCR_FRZ | FLEXCAN_MCR_HALT | FLEXCAN_MCR_SUPV |
 		FLEXCAN_MCR_WRN_EN | FLEXCAN_MCR_SRX_DIS | FLEXCAN_MCR_IRMQ |
-		FLEXCAN_MCR_IDAM_C;
+		FLEXCAN_MCR_IDAM_C | FLEXCAN_MCR_MAXMB(priv->tx_mb_idx);
 
-	if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) {
+	if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP)
 		reg_mcr &= ~FLEXCAN_MCR_FEN;
-		reg_mcr |= FLEXCAN_MCR_MAXMB(priv->offload.mb_last);
-	} else {
-		reg_mcr |= FLEXCAN_MCR_FEN |
-			FLEXCAN_MCR_MAXMB(priv->tx_mb_idx);
-	}
+	else
+		reg_mcr |= FLEXCAN_MCR_FEN;
+
 	netdev_dbg(dev, "%s: writing mcr=0x%08x", __func__, reg_mcr);
 	priv->write(reg_mcr, &regs->mcr);
 
@@ -987,16 +987,17 @@ static int flexcan_chip_start(struct net_device *dev)
 		priv->write(reg_ctrl2, &regs->ctrl2);
 	}
 
-	/* clear and invalidate all mailboxes first */
-	for (i = priv->tx_mb_idx; i < ARRAY_SIZE(regs->mb); i++) {
-		priv->write(FLEXCAN_MB_CODE_RX_INACTIVE,
-			    &regs->mb[i].can_ctrl);
-	}
-
 	if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) {
-		for (i = priv->offload.mb_first; i <= priv->offload.mb_last; i++)
+		for (i = priv->offload.mb_first; i <= priv->offload.mb_last; i++) {
 			priv->write(FLEXCAN_MB_CODE_RX_EMPTY,
 				    &regs->mb[i].can_ctrl);
+		}
+	} else {
+		/* clear and invalidate unused mailboxes first */
+		for (i = FLEXCAN_TX_MB_RESERVED_OFF_FIFO; i <= ARRAY_SIZE(regs->mb); i++) {
+			priv->write(FLEXCAN_MB_CODE_RX_INACTIVE,
+				    &regs->mb[i].can_ctrl);
+		}
 	}
 
 	/* Errata ERR005829: mark first TX mailbox as INACTIVE */
@@ -1360,17 +1361,15 @@ static int flexcan_probe(struct platform_device *pdev)
 	priv->devtype_data = devtype_data;
 	priv->reg_xceiver = reg_xceiver;
 
-	if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP) {
-		priv->tx_mb_idx = FLEXCAN_TX_MB_OFF_TIMESTAMP;
+	if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP)
 		priv->tx_mb_reserved = &regs->mb[FLEXCAN_TX_MB_RESERVED_OFF_TIMESTAMP];
-	} else {
-		priv->tx_mb_idx = FLEXCAN_TX_MB_OFF_FIFO;
+	else
 		priv->tx_mb_reserved = &regs->mb[FLEXCAN_TX_MB_RESERVED_OFF_FIFO];
-	}
+	priv->tx_mb_idx = FLEXCAN_TX_MB;
 	priv->tx_mb = &regs->mb[priv->tx_mb_idx];
 
-	priv->reg_imask1_default = FLEXCAN_IFLAG_MB(priv->tx_mb_idx);
-	priv->reg_imask2_default = 0;
+	priv->reg_imask1_default = 0;
+	priv->reg_imask2_default = FLEXCAN_IFLAG_MB(priv->tx_mb_idx);
 
 	priv->offload.mailbox_read = flexcan_mailbox_read;
 
-- 
GitLab


From e05237f9da42ee52e73acea0bb082d788e111229 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Fri, 9 Nov 2018 15:01:50 +0100
Subject: [PATCH 0899/1890] can: flexcan: remove not needed struct
 flexcan_priv::tx_mb and struct flexcan_priv::tx_mb_idx

The previous patch changes the TX path to always use the last mailbox
regardless of the used offload scheme (rx-fifo or timestamp based). This
means members "tx_mb" and "tx_mb_idx" of the struct flexcan_priv don't
depend on the offload scheme, so replace them by compile time constants.

Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/flexcan.c | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index 677c41701cf3c..68b46395c580c 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -258,9 +258,7 @@ struct flexcan_priv {
 	struct can_rx_offload offload;
 
 	struct flexcan_regs __iomem *regs;
-	struct flexcan_mb __iomem *tx_mb;
 	struct flexcan_mb __iomem *tx_mb_reserved;
-	u8 tx_mb_idx;
 	u32 reg_ctrl_default;
 	u32 reg_imask1_default;
 	u32 reg_imask2_default;
@@ -514,6 +512,7 @@ static int flexcan_get_berr_counter(const struct net_device *dev,
 static netdev_tx_t flexcan_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	const struct flexcan_priv *priv = netdev_priv(dev);
+	struct flexcan_regs __iomem *regs = priv->regs;
 	struct can_frame *cf = (struct can_frame *)skb->data;
 	u32 can_id;
 	u32 data;
@@ -536,17 +535,17 @@ static netdev_tx_t flexcan_start_xmit(struct sk_buff *skb, struct net_device *de
 
 	if (cf->can_dlc > 0) {
 		data = be32_to_cpup((__be32 *)&cf->data[0]);
-		priv->write(data, &priv->tx_mb->data[0]);
+		priv->write(data, &regs->mb[FLEXCAN_TX_MB].data[0]);
 	}
 	if (cf->can_dlc > 4) {
 		data = be32_to_cpup((__be32 *)&cf->data[4]);
-		priv->write(data, &priv->tx_mb->data[1]);
+		priv->write(data, &regs->mb[FLEXCAN_TX_MB].data[1]);
 	}
 
 	can_put_echo_skb(skb, dev, 0);
 
-	priv->write(can_id, &priv->tx_mb->can_id);
-	priv->write(ctrl, &priv->tx_mb->can_ctrl);
+	priv->write(can_id, &regs->mb[FLEXCAN_TX_MB].can_id);
+	priv->write(ctrl, &regs->mb[FLEXCAN_TX_MB].can_ctrl);
 
 	/* Errata ERR005829 step8:
 	 * Write twice INACTIVE(0x8) code to first MB.
@@ -737,7 +736,7 @@ static inline u64 flexcan_read_reg_iflag_rx(struct flexcan_priv *priv)
 	u32 iflag1, iflag2;
 
 	iflag2 = priv->read(&regs->iflag2) & priv->reg_imask2_default &
-		~FLEXCAN_IFLAG_MB(priv->tx_mb_idx);
+		~FLEXCAN_IFLAG_MB(FLEXCAN_TX_MB);
 	iflag1 = priv->read(&regs->iflag1) & priv->reg_imask1_default;
 
 	return (u64)iflag2 << 32 | iflag1;
@@ -787,7 +786,7 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id)
 	reg_iflag2 = priv->read(&regs->iflag2);
 
 	/* transmission complete interrupt */
-	if (reg_iflag2 & FLEXCAN_IFLAG_MB(priv->tx_mb_idx)) {
+	if (reg_iflag2 & FLEXCAN_IFLAG_MB(FLEXCAN_TX_MB)) {
 		handled = IRQ_HANDLED;
 		stats->tx_bytes += can_get_echo_skb(dev, 0);
 		stats->tx_packets++;
@@ -795,8 +794,8 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id)
 
 		/* after sending a RTR frame MB is in RX mode */
 		priv->write(FLEXCAN_MB_CODE_TX_INACTIVE,
-			    &priv->tx_mb->can_ctrl);
-		priv->write(FLEXCAN_IFLAG_MB(priv->tx_mb_idx), &regs->iflag2);
+			    &regs->mb[FLEXCAN_TX_MB].can_ctrl);
+		priv->write(FLEXCAN_IFLAG_MB(FLEXCAN_TX_MB), &regs->iflag2);
 		netif_wake_queue(dev);
 	}
 
@@ -938,7 +937,7 @@ static int flexcan_chip_start(struct net_device *dev)
 	reg_mcr &= ~FLEXCAN_MCR_MAXMB(0xff);
 	reg_mcr |= FLEXCAN_MCR_FRZ | FLEXCAN_MCR_HALT | FLEXCAN_MCR_SUPV |
 		FLEXCAN_MCR_WRN_EN | FLEXCAN_MCR_SRX_DIS | FLEXCAN_MCR_IRMQ |
-		FLEXCAN_MCR_IDAM_C | FLEXCAN_MCR_MAXMB(priv->tx_mb_idx);
+		FLEXCAN_MCR_IDAM_C | FLEXCAN_MCR_MAXMB(FLEXCAN_TX_MB);
 
 	if (priv->devtype_data->quirks & FLEXCAN_QUIRK_USE_OFF_TIMESTAMP)
 		reg_mcr &= ~FLEXCAN_MCR_FEN;
@@ -1006,7 +1005,7 @@ static int flexcan_chip_start(struct net_device *dev)
 
 	/* mark TX mailbox as INACTIVE */
 	priv->write(FLEXCAN_MB_CODE_TX_INACTIVE,
-		    &priv->tx_mb->can_ctrl);
+		    &regs->mb[FLEXCAN_TX_MB].can_ctrl);
 
 	/* acceptance mask/acceptance code (accept everything) */
 	priv->write(0x0, &regs->rxgmask);
@@ -1365,11 +1364,9 @@ static int flexcan_probe(struct platform_device *pdev)
 		priv->tx_mb_reserved = &regs->mb[FLEXCAN_TX_MB_RESERVED_OFF_TIMESTAMP];
 	else
 		priv->tx_mb_reserved = &regs->mb[FLEXCAN_TX_MB_RESERVED_OFF_FIFO];
-	priv->tx_mb_idx = FLEXCAN_TX_MB;
-	priv->tx_mb = &regs->mb[priv->tx_mb_idx];
 
 	priv->reg_imask1_default = 0;
-	priv->reg_imask2_default = FLEXCAN_IFLAG_MB(priv->tx_mb_idx);
+	priv->reg_imask2_default = FLEXCAN_IFLAG_MB(FLEXCAN_TX_MB);
 
 	priv->offload.mailbox_read = flexcan_mailbox_read;
 
-- 
GitLab


From a4310fa2f24687888ce80fdb0e88583561a23700 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Wed, 31 Oct 2018 10:37:46 +0100
Subject: [PATCH 0900/1890] can: dev: can_get_echo_skb(): factor out non
 sending code to __can_get_echo_skb()

This patch factors out all non sending parts of can_get_echo_skb() into
a seperate function __can_get_echo_skb(), so that it can be re-used in
an upcoming patch.

Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/dev.c   | 36 +++++++++++++++++++++++++-----------
 include/linux/can/dev.h |  1 +
 2 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index 49163570a63af..80530ab37b1e3 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -477,14 +477,7 @@ void can_put_echo_skb(struct sk_buff *skb, struct net_device *dev,
 }
 EXPORT_SYMBOL_GPL(can_put_echo_skb);
 
-/*
- * Get the skb from the stack and loop it back locally
- *
- * The function is typically called when the TX done interrupt
- * is handled in the device driver. The driver must protect
- * access to priv->echo_skb, if necessary.
- */
-unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx)
+struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr)
 {
 	struct can_priv *priv = netdev_priv(dev);
 
@@ -495,13 +488,34 @@ unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx)
 		struct can_frame *cf = (struct can_frame *)skb->data;
 		u8 dlc = cf->can_dlc;
 
-		netif_rx(priv->echo_skb[idx]);
+		*len_ptr = dlc;
 		priv->echo_skb[idx] = NULL;
 
-		return dlc;
+		return skb;
 	}
 
-	return 0;
+	return NULL;
+}
+
+/*
+ * Get the skb from the stack and loop it back locally
+ *
+ * The function is typically called when the TX done interrupt
+ * is handled in the device driver. The driver must protect
+ * access to priv->echo_skb, if necessary.
+ */
+unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx)
+{
+	struct sk_buff *skb;
+	u8 len;
+
+	skb = __can_get_echo_skb(dev, idx, &len);
+	if (!skb)
+		return 0;
+
+	netif_rx(skb);
+
+	return len;
 }
 EXPORT_SYMBOL_GPL(can_get_echo_skb);
 
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index a83e1f632eb70..f01623aef2f77 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -169,6 +169,7 @@ void can_change_state(struct net_device *dev, struct can_frame *cf,
 
 void can_put_echo_skb(struct sk_buff *skb, struct net_device *dev,
 		      unsigned int idx);
+struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr);
 unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx);
 void can_free_echo_skb(struct net_device *dev, unsigned int idx);
 
-- 
GitLab


From 200f5c49f7a2cd694436bfc6cb0662b794c96736 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Wed, 31 Oct 2018 11:08:21 +0100
Subject: [PATCH 0901/1890] can: dev: __can_get_echo_skb(): replace struct
 can_frame by canfd_frame to access frame length

This patch replaces the use of "struct can_frame::can_dlc" by "struct
canfd_frame::len" to access the frame's length. As it is ensured that
both structures have a compatible memory layout for this member this is
no functional change. Futher, this compatibility is documented in a
comment.

Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/dev.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index 80530ab37b1e3..46cc5fec4043d 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -484,11 +484,14 @@ struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8
 	BUG_ON(idx >= priv->echo_skb_max);
 
 	if (priv->echo_skb[idx]) {
+		/* Using "struct canfd_frame::len" for the frame
+		 * length is supported on both CAN and CANFD frames.
+		 */
 		struct sk_buff *skb = priv->echo_skb[idx];
-		struct can_frame *cf = (struct can_frame *)skb->data;
-		u8 dlc = cf->can_dlc;
+		struct canfd_frame *cf = (struct canfd_frame *)skb->data;
+		u8 len = cf->len;
 
-		*len_ptr = dlc;
+		*len_ptr = len;
 		priv->echo_skb[idx] = NULL;
 
 		return skb;
-- 
GitLab


From e7a6994d043a1e31d5b17706a22ce33d2a3e4cdc Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Wed, 31 Oct 2018 14:05:26 +0100
Subject: [PATCH 0902/1890] can: dev: __can_get_echo_skb(): Don't crash the
 kernel if can_priv::echo_skb is accessed out of bounds

If the "struct can_priv::echo_skb" is accessed out of bounds would lead
to a kernel crash. Better print a sensible warning message instead and
try to recover.

Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/dev.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index 46cc5fec4043d..c05e4d50d43d7 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -481,7 +481,11 @@ struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8
 {
 	struct can_priv *priv = netdev_priv(dev);
 
-	BUG_ON(idx >= priv->echo_skb_max);
+	if (idx >= priv->echo_skb_max) {
+		netdev_err(dev, "%s: BUG! Trying to access can_priv::echo_skb out of bounds (%u/max %u)\n",
+			   __func__, idx, priv->echo_skb_max);
+		return NULL;
+	}
 
 	if (priv->echo_skb[idx]) {
 		/* Using "struct canfd_frame::len" for the frame
-- 
GitLab


From 7da11ba5c5066dadc2e96835a6233d56d7b7764a Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Wed, 31 Oct 2018 14:15:13 +0100
Subject: [PATCH 0903/1890] can: dev: __can_get_echo_skb(): print error
 message, if trying to echo non existing skb

Prior to echoing a successfully transmitted CAN frame (by calling
can_get_echo_skb()), CAN drivers have to put the CAN frame (by calling
can_put_echo_skb() in the transmit function). These put and get function
take an index as parameter, which is used to identify the CAN frame.

A driver calling can_get_echo_skb() with a index not pointing to a skb
is a BUG, so add an appropriate error message.

Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/dev.c | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index c05e4d50d43d7..3b3f88ffab53c 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -480,6 +480,8 @@ EXPORT_SYMBOL_GPL(can_put_echo_skb);
 struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr)
 {
 	struct can_priv *priv = netdev_priv(dev);
+	struct sk_buff *skb = priv->echo_skb[idx];
+	struct canfd_frame *cf;
 
 	if (idx >= priv->echo_skb_max) {
 		netdev_err(dev, "%s: BUG! Trying to access can_priv::echo_skb out of bounds (%u/max %u)\n",
@@ -487,21 +489,20 @@ struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8
 		return NULL;
 	}
 
-	if (priv->echo_skb[idx]) {
-		/* Using "struct canfd_frame::len" for the frame
-		 * length is supported on both CAN and CANFD frames.
-		 */
-		struct sk_buff *skb = priv->echo_skb[idx];
-		struct canfd_frame *cf = (struct canfd_frame *)skb->data;
-		u8 len = cf->len;
-
-		*len_ptr = len;
-		priv->echo_skb[idx] = NULL;
-
-		return skb;
+	if (!skb) {
+		netdev_err(dev, "%s: BUG! Trying to echo non existing skb: can_priv::echo_skb[%u]\n",
+			   __func__, idx);
+		return NULL;
 	}
 
-	return NULL;
+	/* Using "struct canfd_frame::len" for the frame
+	 * length is supported on both CAN and CANFD frames.
+	 */
+	cf = (struct canfd_frame *)skb->data;
+	*len_ptr = cf->len;
+	priv->echo_skb[idx] = NULL;
+
+	return skb;
 }
 
 /*
-- 
GitLab


From 55059f2b7f868cd43b3ad30e28e18347e1b46ace Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Tue, 18 Sep 2018 11:40:38 +0200
Subject: [PATCH 0904/1890] can: rx-offload: introduce
 can_rx_offload_get_echo_skb() and can_rx_offload_queue_sorted() functions

Current CAN framework can't guarantee proper/chronological order
of RX and TX-ECHO messages. To make this possible, drivers should use
this functions instead of can_get_echo_skb().

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/rx-offload.c   | 46 ++++++++++++++++++++++++++++++++++
 include/linux/can/rx-offload.h |  4 +++
 2 files changed, 50 insertions(+)

diff --git a/drivers/net/can/rx-offload.c b/drivers/net/can/rx-offload.c
index c7d05027a7a07..c368686e21645 100644
--- a/drivers/net/can/rx-offload.c
+++ b/drivers/net/can/rx-offload.c
@@ -211,6 +211,52 @@ int can_rx_offload_irq_offload_fifo(struct can_rx_offload *offload)
 }
 EXPORT_SYMBOL_GPL(can_rx_offload_irq_offload_fifo);
 
+int can_rx_offload_queue_sorted(struct can_rx_offload *offload,
+				struct sk_buff *skb, u32 timestamp)
+{
+	struct can_rx_offload_cb *cb;
+	unsigned long flags;
+
+	if (skb_queue_len(&offload->skb_queue) >
+	    offload->skb_queue_len_max)
+		return -ENOMEM;
+
+	cb = can_rx_offload_get_cb(skb);
+	cb->timestamp = timestamp;
+
+	spin_lock_irqsave(&offload->skb_queue.lock, flags);
+	__skb_queue_add_sort(&offload->skb_queue, skb, can_rx_offload_compare);
+	spin_unlock_irqrestore(&offload->skb_queue.lock, flags);
+
+	can_rx_offload_schedule(offload);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(can_rx_offload_queue_sorted);
+
+unsigned int can_rx_offload_get_echo_skb(struct can_rx_offload *offload,
+					 unsigned int idx, u32 timestamp)
+{
+	struct net_device *dev = offload->dev;
+	struct net_device_stats *stats = &dev->stats;
+	struct sk_buff *skb;
+	u8 len;
+	int err;
+
+	skb = __can_get_echo_skb(dev, idx, &len);
+	if (!skb)
+		return 0;
+
+	err = can_rx_offload_queue_sorted(offload, skb, timestamp);
+	if (err) {
+		stats->rx_errors++;
+		stats->tx_fifo_errors++;
+	}
+
+	return len;
+}
+EXPORT_SYMBOL_GPL(can_rx_offload_get_echo_skb);
+
 int can_rx_offload_irq_queue_err_skb(struct can_rx_offload *offload, struct sk_buff *skb)
 {
 	if (skb_queue_len(&offload->skb_queue) >
diff --git a/include/linux/can/rx-offload.h b/include/linux/can/rx-offload.h
index cb31683bbe154..01a7c9e5d8d85 100644
--- a/include/linux/can/rx-offload.h
+++ b/include/linux/can/rx-offload.h
@@ -41,6 +41,10 @@ int can_rx_offload_add_timestamp(struct net_device *dev, struct can_rx_offload *
 int can_rx_offload_add_fifo(struct net_device *dev, struct can_rx_offload *offload, unsigned int weight);
 int can_rx_offload_irq_offload_timestamp(struct can_rx_offload *offload, u64 reg);
 int can_rx_offload_irq_offload_fifo(struct can_rx_offload *offload);
+int can_rx_offload_queue_sorted(struct can_rx_offload *offload,
+				struct sk_buff *skb, u32 timestamp);
+unsigned int can_rx_offload_get_echo_skb(struct can_rx_offload *offload,
+					 unsigned int idx, u32 timestamp);
 int can_rx_offload_irq_queue_err_skb(struct can_rx_offload *offload, struct sk_buff *skb);
 void can_rx_offload_reset(struct can_rx_offload *offload);
 void can_rx_offload_del(struct can_rx_offload *offload);
-- 
GitLab


From ab214c48387aaaadcf8246b4b00d8b78286fde1b Mon Sep 17 00:00:00 2001
From: Vignesh R <vigneshr@ti.com>
Date: Fri, 9 Nov 2018 16:44:10 +0530
Subject: [PATCH 0905/1890] dt-bindings: i2c: omap: Add new compatible for
 AM654 SoCs

AM654 SoCs have same I2C IP as OMAP SoCs. Add new compatible to
handle AM654 SoCs. While at that reformat the existing compatible list
for older SoCs to list one valid compatible per line.

Signed-off-by: Vignesh R <vigneshr@ti.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 Documentation/devicetree/bindings/i2c/i2c-omap.txt | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/i2c/i2c-omap.txt b/Documentation/devicetree/bindings/i2c/i2c-omap.txt
index 7e49839d41249..4b90ba9f31b70 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-omap.txt
+++ b/Documentation/devicetree/bindings/i2c/i2c-omap.txt
@@ -1,8 +1,12 @@
 I2C for OMAP platforms
 
 Required properties :
-- compatible : Must be "ti,omap2420-i2c", "ti,omap2430-i2c", "ti,omap3-i2c"
-  or "ti,omap4-i2c"
+- compatible : Must be
+	"ti,omap2420-i2c" for OMAP2420 SoCs
+	"ti,omap2430-i2c" for OMAP2430 SoCs
+	"ti,omap3-i2c" for OMAP3 SoCs
+	"ti,omap4-i2c" for OMAP4+ SoCs
+	"ti,am654-i2c", "ti,omap4-i2c" for AM654 SoCs
 - ti,hwmods : Must be "i2c<n>", n being the instance number (1-based)
 - #address-cells = <1>;
 - #size-cells = <0>;
-- 
GitLab


From 5b277402deac0691226a947df71c581686bd4020 Mon Sep 17 00:00:00 2001
From: Vignesh R <vigneshr@ti.com>
Date: Fri, 9 Nov 2018 16:44:11 +0530
Subject: [PATCH 0906/1890] i2c: omap: Enable for ARCH_K3

Allow I2C_OMAP to be built for K3 platforms.

Signed-off-by: Vignesh R <vigneshr@ti.com>
Reviewed-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index 56ccb1ea7da5b..77dc94b440117 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -752,7 +752,7 @@ config I2C_OCORES
 
 config I2C_OMAP
 	tristate "OMAP I2C adapter"
-	depends on ARCH_OMAP
+	depends on ARCH_OMAP || ARCH_K3
 	default y if MACH_OMAP_H3 || MACH_OMAP_OSK
 	help
 	  If you say yes to this option, support will be included for the
-- 
GitLab


From 350784e79230f85e9007c4c7d8b2259717d67a4d Mon Sep 17 00:00:00 2001
From: Vignesh R <vigneshr@ti.com>
Date: Fri, 9 Nov 2018 16:44:12 +0530
Subject: [PATCH 0907/1890] MAINTAINERS: Add entry for i2c-omap driver

Add separate entry for i2c-omap and add my name as maintainer for this
driver.

Signed-off-by: Vignesh R <vigneshr@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 MAINTAINERS | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index f4855974f3250..259fe9445d2fb 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10784,6 +10784,14 @@ L:	linux-omap@vger.kernel.org
 S:	Maintained
 F:	arch/arm/mach-omap2/omap_hwmod.*
 
+OMAP I2C DRIVER
+M:	Vignesh R <vigneshr@ti.com>
+L:	linux-omap@vger.kernel.org
+L:	linux-i2c@vger.kernel.org
+S:	Maintained
+F:	Documentation/devicetree/bindings/i2c/i2c-omap.txt
+F:	drivers/i2c/busses/i2c-omap.c
+
 OMAP IMAGING SUBSYSTEM (OMAP3 ISP and OMAP4 ISS)
 M:	Laurent Pinchart <laurent.pinchart@ideasonboard.com>
 L:	linux-media@vger.kernel.org
-- 
GitLab


From 848bd3f3de9d44950c00eda6c115e8e9785440da Mon Sep 17 00:00:00 2001
From: Stephen Boyd <swboyd@chromium.org>
Date: Fri, 2 Nov 2018 13:57:32 -0700
Subject: [PATCH 0908/1890] i2c: qcom-geni: Fix runtime PM mismatch with child
 devices

We need to enable runtime PM on this i2c controller before populating
child devices with i2c_add_adapter(). Otherwise, if a child device uses
runtime PM and stays runtime PM enabled we'll get the following warning
at boot.

 Enabling runtime PM for inactive device (a98000.i2c) with active children

[...]

 Call trace:
  pm_runtime_enable+0xd8/0xf8
  geni_i2c_probe+0x440/0x460
  platform_drv_probe+0x74/0xc8
[...]

Let's move the runtime PM enabling and setup to before we add the
adapter, so that this device can respond to runtime PM requests from
children.

Fixes: 37692de5d523 ("i2c: i2c-qcom-geni: Add bus driver for the Qualcomm GENI I2C controller")
Signed-off-by: Stephen Boyd <swboyd@chromium.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-qcom-geni.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c
index 527f55c8c4c70..db075bc0d9525 100644
--- a/drivers/i2c/busses/i2c-qcom-geni.c
+++ b/drivers/i2c/busses/i2c-qcom-geni.c
@@ -571,18 +571,19 @@ static int geni_i2c_probe(struct platform_device *pdev)
 
 	dev_dbg(&pdev->dev, "i2c fifo/se-dma mode. fifo depth:%d\n", tx_depth);
 
-	ret = i2c_add_adapter(&gi2c->adap);
-	if (ret) {
-		dev_err(&pdev->dev, "Error adding i2c adapter %d\n", ret);
-		return ret;
-	}
-
 	gi2c->suspended = 1;
 	pm_runtime_set_suspended(gi2c->se.dev);
 	pm_runtime_set_autosuspend_delay(gi2c->se.dev, I2C_AUTO_SUSPEND_DELAY);
 	pm_runtime_use_autosuspend(gi2c->se.dev);
 	pm_runtime_enable(gi2c->se.dev);
 
+	ret = i2c_add_adapter(&gi2c->adap);
+	if (ret) {
+		dev_err(&pdev->dev, "Error adding i2c adapter %d\n", ret);
+		pm_runtime_disable(gi2c->se.dev);
+		return ret;
+	}
+
 	return 0;
 }
 
@@ -590,8 +591,8 @@ static int geni_i2c_remove(struct platform_device *pdev)
 {
 	struct geni_i2c_dev *gi2c = platform_get_drvdata(pdev);
 
-	pm_runtime_disable(gi2c->se.dev);
 	i2c_del_adapter(&gi2c->adap);
+	pm_runtime_disable(gi2c->se.dev);
 	return 0;
 }
 
-- 
GitLab


From 8588eac3ff66e77fb681591714fd76da3a4da80d Mon Sep 17 00:00:00 2001
From: Vignesh R <vigneshr@ti.com>
Date: Thu, 27 Sep 2018 10:31:46 +0530
Subject: [PATCH 0909/1890] arm64: dts: ti: k3-am654: Fix wakeup_uart reg
 address

cbass_wakeup interconnect which is the parent of wakeup_uart node
defines address-cells=1 and size-cells=1, therefore fix up reg property
of wakeup_uart node accordingly. Otherwise, this UART instance fails to
probe if enabled.

Fixes: 4201af2544b3 ("arm64: dts: ti: am654: Add uart nodes")
Signed-off-by: Vignesh R <vigneshr@ti.com>
Reviewed-by: Lokesh Vutla <lokeshvutla@ti.com>
Signed-off-by: Tero Kristo <t-kristo@ti.com>
---
 arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi
index affc3c3093532..8d7b47f9dfbf4 100644
--- a/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi
@@ -36,7 +36,7 @@ k3_reset: reset-controller {
 
 	wkup_uart0: serial@42300000 {
 		compatible = "ti,am654-uart";
-		reg = <0x00 0x42300000 0x00 0x100>;
+		reg = <0x42300000 0x100>;
 		reg-shift = <2>;
 		reg-io-width = <4>;
 		interrupts = <GIC_SPI 697 IRQ_TYPE_LEVEL_HIGH>;
-- 
GitLab


From f068090426ea8d72c408ebd42953a82a88e2282c Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman@linux.intel.com>
Date: Fri, 9 Nov 2018 17:21:17 +0200
Subject: [PATCH 0910/1890] xhci: Fix leaking USB3 shared_hcd at xhci removal

Ensure that the shared_hcd pointer is valid when calling usb_put_hcd()

The shared_hcd is removed and freed in xhci by first calling
usb_remove_hcd(xhci->shared_hcd), and later
usb_put_hcd(xhci->shared_hcd)

Afer commit fe190ed0d602 ("xhci: Do not halt the host until both HCD have
disconnected their devices.") the shared_hcd was never properly put as
xhci->shared_hcd was set to NULL before usb_put_hcd(xhci->shared_hcd) was
called.

shared_hcd (USB3) is removed before primary hcd (USB2).
While removing the primary hcd we might need to handle xhci interrupts
to cleanly remove last USB2 devices, therefore we need to set
xhci->shared_hcd to NULL before removing the primary hcd to let xhci
interrupt handler know shared_hcd is no longer available.

xhci-plat.c, xhci-histb.c and xhci-mtk first create both their hcd's before
adding them. so to keep the correct reverse removal order use a temporary
shared_hcd variable for them.
For more details see commit 4ac53087d6d4 ("usb: xhci: plat: Create both
HCDs before adding them")

Fixes: fe190ed0d602 ("xhci: Do not halt the host until both HCD have disconnected their devices.")
Cc: Joel Stanley <joel@jms.id.au>
Cc: Chunfeng Yun <chunfeng.yun@mediatek.com>
Cc: Thierry Reding <treding@nvidia.com>
Cc: Jianguo Sun <sunjianguo1@huawei.com>
Cc: <stable@vger.kernel.org>
Reported-by: Jack Pham <jackp@codeaurora.org>
Tested-by: Jack Pham <jackp@codeaurora.org>
Tested-by: Peter Chen <peter.chen@nxp.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-histb.c | 6 ++++--
 drivers/usb/host/xhci-mtk.c   | 6 ++++--
 drivers/usb/host/xhci-pci.c   | 1 +
 drivers/usb/host/xhci-plat.c  | 6 ++++--
 drivers/usb/host/xhci-tegra.c | 1 +
 drivers/usb/host/xhci.c       | 2 --
 6 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/drivers/usb/host/xhci-histb.c b/drivers/usb/host/xhci-histb.c
index 27f00160332e2..3c4abb5a1c3fc 100644
--- a/drivers/usb/host/xhci-histb.c
+++ b/drivers/usb/host/xhci-histb.c
@@ -325,14 +325,16 @@ static int xhci_histb_remove(struct platform_device *dev)
 	struct xhci_hcd_histb *histb = platform_get_drvdata(dev);
 	struct usb_hcd *hcd = histb->hcd;
 	struct xhci_hcd	*xhci = hcd_to_xhci(hcd);
+	struct usb_hcd *shared_hcd = xhci->shared_hcd;
 
 	xhci->xhc_state |= XHCI_STATE_REMOVING;
 
-	usb_remove_hcd(xhci->shared_hcd);
+	usb_remove_hcd(shared_hcd);
+	xhci->shared_hcd = NULL;
 	device_wakeup_disable(&dev->dev);
 
 	usb_remove_hcd(hcd);
-	usb_put_hcd(xhci->shared_hcd);
+	usb_put_hcd(shared_hcd);
 
 	xhci_histb_host_disable(histb);
 	usb_put_hcd(hcd);
diff --git a/drivers/usb/host/xhci-mtk.c b/drivers/usb/host/xhci-mtk.c
index 71d0d33c32862..60987c787e44f 100644
--- a/drivers/usb/host/xhci-mtk.c
+++ b/drivers/usb/host/xhci-mtk.c
@@ -590,12 +590,14 @@ static int xhci_mtk_remove(struct platform_device *dev)
 	struct xhci_hcd_mtk *mtk = platform_get_drvdata(dev);
 	struct usb_hcd	*hcd = mtk->hcd;
 	struct xhci_hcd	*xhci = hcd_to_xhci(hcd);
+	struct usb_hcd  *shared_hcd = xhci->shared_hcd;
 
-	usb_remove_hcd(xhci->shared_hcd);
+	usb_remove_hcd(shared_hcd);
+	xhci->shared_hcd = NULL;
 	device_init_wakeup(&dev->dev, false);
 
 	usb_remove_hcd(hcd);
-	usb_put_hcd(xhci->shared_hcd);
+	usb_put_hcd(shared_hcd);
 	usb_put_hcd(hcd);
 	xhci_mtk_sch_exit(mtk);
 	xhci_mtk_clks_disable(mtk);
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 01c57055c0c5b..1fb448cd26678 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -380,6 +380,7 @@ static void xhci_pci_remove(struct pci_dev *dev)
 	if (xhci->shared_hcd) {
 		usb_remove_hcd(xhci->shared_hcd);
 		usb_put_hcd(xhci->shared_hcd);
+		xhci->shared_hcd = NULL;
 	}
 
 	/* Workaround for spurious wakeups at shutdown with HSW */
diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c
index 32b5574ad5c56..ef09cb06212fd 100644
--- a/drivers/usb/host/xhci-plat.c
+++ b/drivers/usb/host/xhci-plat.c
@@ -362,14 +362,16 @@ static int xhci_plat_remove(struct platform_device *dev)
 	struct xhci_hcd	*xhci = hcd_to_xhci(hcd);
 	struct clk *clk = xhci->clk;
 	struct clk *reg_clk = xhci->reg_clk;
+	struct usb_hcd *shared_hcd = xhci->shared_hcd;
 
 	xhci->xhc_state |= XHCI_STATE_REMOVING;
 
-	usb_remove_hcd(xhci->shared_hcd);
+	usb_remove_hcd(shared_hcd);
+	xhci->shared_hcd = NULL;
 	usb_phy_shutdown(hcd->usb_phy);
 
 	usb_remove_hcd(hcd);
-	usb_put_hcd(xhci->shared_hcd);
+	usb_put_hcd(shared_hcd);
 
 	clk_disable_unprepare(clk);
 	clk_disable_unprepare(reg_clk);
diff --git a/drivers/usb/host/xhci-tegra.c b/drivers/usb/host/xhci-tegra.c
index 6b5db344de301..938ff06c03495 100644
--- a/drivers/usb/host/xhci-tegra.c
+++ b/drivers/usb/host/xhci-tegra.c
@@ -1303,6 +1303,7 @@ static int tegra_xusb_remove(struct platform_device *pdev)
 
 	usb_remove_hcd(xhci->shared_hcd);
 	usb_put_hcd(xhci->shared_hcd);
+	xhci->shared_hcd = NULL;
 	usb_remove_hcd(tegra->hcd);
 	usb_put_hcd(tegra->hcd);
 
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 0420eefa647a1..c928dbbff8811 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -719,8 +719,6 @@ static void xhci_stop(struct usb_hcd *hcd)
 
 	/* Only halt host and free memory after both hcds are removed */
 	if (!usb_hcd_is_primary_hcd(hcd)) {
-		/* usb core will free this hcd shortly, unset pointer */
-		xhci->shared_hcd = NULL;
 		mutex_unlock(&xhci->mutex);
 		return;
 	}
-- 
GitLab


From 1245374e9b8340fc255fd51b2015173a83050d03 Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman@linux.intel.com>
Date: Fri, 9 Nov 2018 17:21:18 +0200
Subject: [PATCH 0911/1890] xhci: handle port status events for removed USB3
 hcd

At xhci removal the USB3 hcd (shared_hcd) is removed before the primary
USB2 hcd. Interrupts for port status changes may still occur for USB3
ports after the shared_hcd is freed, causing  NULL pointer dereference.

Check if xhci->shared_hcd is still valid before handing USB3 port events

Cc: <stable@vger.kernel.org>
Reported-by: Peter Chen <peter.chen@nxp.com>
Tested-by: Jack Pham <jackp@codeaurora.org>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-ring.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index a8d92c90fb587..80d464ea5a9a8 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -1556,6 +1556,13 @@ static void handle_port_status(struct xhci_hcd *xhci,
 		goto cleanup;
 	}
 
+	/* We might get interrupts after shared_hcd is removed */
+	if (port->rhub == &xhci->usb3_rhub && xhci->shared_hcd == NULL) {
+		xhci_dbg(xhci, "ignore port event for removed USB3 hcd\n");
+		bogus_port_status = true;
+		goto cleanup;
+	}
+
 	hcd = port->rhub->hcd;
 	bus_state = &xhci->bus_state[hcd_index(hcd)];
 	hcd_portnum = port->hcd_portnum;
-- 
GitLab


From d9193efba84fe4c4aa22a569fade5e6ca971f8af Mon Sep 17 00:00:00 2001
From: Sandeep Singh <sandeep.singh@amd.com>
Date: Fri, 9 Nov 2018 17:21:19 +0200
Subject: [PATCH 0912/1890] xhci: Add check for invalid byte size error when
 UAS devices are connected.

Observed "TRB completion code (27)" error which corresponds to Stopped -
Length Invalid error(xhci spec section 4.17.4) while connecting USB to
SATA bridge.

Looks like this case was not considered when the following patch[1] was
committed. Hence adding this new check which can prevent
the invalid byte size error.

[1] ade2e3a xhci: handle transfer events without TRB pointer

Cc: <stable@vger.kernel.org>
Signed-off-by: Sandeep Singh <sandeep.singh@amd.com>
cc: Nehal Shah <Nehal-bakulchandra.Shah@amd.com>
cc: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-ring.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 80d464ea5a9a8..730a6ecd85fcb 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -2273,6 +2273,7 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 			goto cleanup;
 		case COMP_RING_UNDERRUN:
 		case COMP_RING_OVERRUN:
+		case COMP_STOPPED_LENGTH_INVALID:
 			goto cleanup;
 		default:
 			xhci_err(xhci, "ERROR Transfer event for unknown stream ring slot %u ep %u\n",
-- 
GitLab


From 958c0bd86075d4ef1c936998deefe1947e539240 Mon Sep 17 00:00:00 2001
From: Aaron Ma <aaron.ma@canonical.com>
Date: Fri, 9 Nov 2018 17:21:20 +0200
Subject: [PATCH 0913/1890] usb: xhci: fix uninitialized completion when USB3
 port got wrong status

Realtek USB3.0 Card Reader [0bda:0328] reports wrong port status on
Cannon lake PCH USB3.1 xHCI [8086:a36d] after resume from S3,
after clear port reset it works fine.

Since this device is registered on USB3 roothub at boot,
when port status reports not superspeed, xhci_get_port_status will call
an uninitialized completion in bus_state[0].
Kernel will hang because of NULL pointer.

Restrict the USB2 resume status check in USB2 roothub to fix hang issue.

Cc: stable@vger.kernel.org
Signed-off-by: Aaron Ma <aaron.ma@canonical.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-hub.c  | 2 +-
 drivers/usb/host/xhci-ring.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c
index 12eea73d9f20b..60e4ac7ae4f8d 100644
--- a/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@ -876,7 +876,7 @@ static u32 xhci_get_port_status(struct usb_hcd *hcd,
 			status |= USB_PORT_STAT_SUSPEND;
 	}
 	if ((raw_port_status & PORT_PLS_MASK) == XDEV_RESUME &&
-		!DEV_SUPERSPEED_ANY(raw_port_status)) {
+		!DEV_SUPERSPEED_ANY(raw_port_status) && hcd->speed < HCD_USB3) {
 		if ((raw_port_status & PORT_RESET) ||
 				!(raw_port_status & PORT_PE))
 			return 0xffffffff;
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 730a6ecd85fcb..250b758efe9f6 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -1646,7 +1646,7 @@ static void handle_port_status(struct xhci_hcd *xhci,
 	 * RExit to a disconnect state).  If so, let the the driver know it's
 	 * out of the RExit state.
 	 */
-	if (!DEV_SUPERSPEED_ANY(portsc) &&
+	if (!DEV_SUPERSPEED_ANY(portsc) && hcd->speed < HCD_USB3 &&
 			test_and_clear_bit(hcd_portnum,
 				&bus_state->rexit_ports)) {
 		complete(&bus_state->rexit_done[hcd_portnum]);
-- 
GitLab


From a5baeaeabcca3244782a9b6382ebab6f8a58f583 Mon Sep 17 00:00:00 2001
From: Aaron Ma <aaron.ma@canonical.com>
Date: Fri, 9 Nov 2018 17:21:21 +0200
Subject: [PATCH 0914/1890] usb: xhci: fix timeout for transition from RExit to
 U0

This definition is used by msecs_to_jiffies in milliseconds.
According to the comments, max rexit timeout should be 20ms.
Align with the comments to properly calculate the delay.

Verified on Sunrise Point-LP and Cannon Lake.

Cc: stable@vger.kernel.org
Signed-off-by: Aaron Ma <aaron.ma@canonical.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-hub.c | 4 ++--
 drivers/usb/host/xhci.h     | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c
index 60e4ac7ae4f8d..da98a11244e2b 100644
--- a/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@ -921,7 +921,7 @@ static u32 xhci_get_port_status(struct usb_hcd *hcd,
 			time_left = wait_for_completion_timeout(
 					&bus_state->rexit_done[wIndex],
 					msecs_to_jiffies(
-						XHCI_MAX_REXIT_TIMEOUT));
+						XHCI_MAX_REXIT_TIMEOUT_MS));
 			spin_lock_irqsave(&xhci->lock, flags);
 
 			if (time_left) {
@@ -935,7 +935,7 @@ static u32 xhci_get_port_status(struct usb_hcd *hcd,
 			} else {
 				int port_status = readl(port->addr);
 				xhci_warn(xhci, "Port resume took longer than %i msec, port status = 0x%x\n",
-						XHCI_MAX_REXIT_TIMEOUT,
+						XHCI_MAX_REXIT_TIMEOUT_MS,
 						port_status);
 				status |= USB_PORT_STAT_SUSPEND;
 				clear_bit(wIndex, &bus_state->rexit_ports);
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index bf0b3692dc9a1..5f0c4f197f134 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1680,7 +1680,7 @@ struct xhci_bus_state {
  * It can take up to 20 ms to transition from RExit to U0 on the
  * Intel Lynx Point LP xHCI host.
  */
-#define	XHCI_MAX_REXIT_TIMEOUT	(20 * 1000)
+#define	XHCI_MAX_REXIT_TIMEOUT_MS	20
 
 static inline unsigned int hcd_index(struct usb_hcd *hcd)
 {
-- 
GitLab


From 11644a7659529730eaf2f166efaabe7c3dc7af8c Mon Sep 17 00:00:00 2001
From: "Cherian, George" <George.Cherian@cavium.com>
Date: Fri, 9 Nov 2018 17:21:22 +0200
Subject: [PATCH 0915/1890] xhci: Add quirk to workaround the errata seen on
 Cavium Thunder-X2 Soc

Implement workaround for ThunderX2 Errata-129 (documented in
CN99XX Known Issues" available at Cavium support site).
As per ThunderX2errata-129, USB 2 device may come up as USB 1
if a connection to a USB 1 device is followed by another connection to
a USB 2 device, the link will come up as USB 1 for the USB 2 device.

Resolution: Reset the PHY after the USB 1 device is disconnected.
The PHY reset sequence is done using private registers in XHCI register
space. After the PHY is reset we check for the PLL lock status and retry
the operation if it fails. From our tests, retrying 4 times is sufficient.

Add a new quirk flag XHCI_RESET_PLL_ON_DISCONNECT to invoke the workaround
in handle_xhci_port_status().

Cc: stable@vger.kernel.org
Signed-off-by: George Cherian <george.cherian@cavium.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-pci.c  |  5 +++++
 drivers/usb/host/xhci-ring.c | 35 ++++++++++++++++++++++++++++++++++-
 drivers/usb/host/xhci.h      |  1 +
 3 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 1fb448cd26678..a9515265db4db 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -248,6 +248,11 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
 	if (pdev->vendor == PCI_VENDOR_ID_TI && pdev->device == 0x8241)
 		xhci->quirks |= XHCI_LIMIT_ENDPOINT_INTERVAL_7;
 
+	if ((pdev->vendor == PCI_VENDOR_ID_BROADCOM ||
+	     pdev->vendor == PCI_VENDOR_ID_CAVIUM) &&
+	     pdev->device == 0x9026)
+		xhci->quirks |= XHCI_RESET_PLL_ON_DISCONNECT;
+
 	if (xhci->quirks & XHCI_RESET_ON_RESUME)
 		xhci_dbg_trace(xhci, trace_xhci_dbg_quirks,
 				"QUIRK: Resetting on resume");
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 250b758efe9f6..65750582133f6 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -1521,6 +1521,35 @@ static void handle_device_notification(struct xhci_hcd *xhci,
 		usb_wakeup_notification(udev->parent, udev->portnum);
 }
 
+/*
+ * Quirk hanlder for errata seen on Cavium ThunderX2 processor XHCI
+ * Controller.
+ * As per ThunderX2errata-129 USB 2 device may come up as USB 1
+ * If a connection to a USB 1 device is followed by another connection
+ * to a USB 2 device.
+ *
+ * Reset the PHY after the USB device is disconnected if device speed
+ * is less than HCD_USB3.
+ * Retry the reset sequence max of 4 times checking the PLL lock status.
+ *
+ */
+static void xhci_cavium_reset_phy_quirk(struct xhci_hcd *xhci)
+{
+	struct usb_hcd *hcd = xhci_to_hcd(xhci);
+	u32 pll_lock_check;
+	u32 retry_count = 4;
+
+	do {
+		/* Assert PHY reset */
+		writel(0x6F, hcd->regs + 0x1048);
+		udelay(10);
+		/* De-assert the PHY reset */
+		writel(0x7F, hcd->regs + 0x1048);
+		udelay(200);
+		pll_lock_check = readl(hcd->regs + 0x1070);
+	} while (!(pll_lock_check & 0x1) && --retry_count);
+}
+
 static void handle_port_status(struct xhci_hcd *xhci,
 		union xhci_trb *event)
 {
@@ -1654,8 +1683,12 @@ static void handle_port_status(struct xhci_hcd *xhci,
 		goto cleanup;
 	}
 
-	if (hcd->speed < HCD_USB3)
+	if (hcd->speed < HCD_USB3) {
 		xhci_test_and_clear_bit(xhci, port, PORT_PLC);
+		if ((xhci->quirks & XHCI_RESET_PLL_ON_DISCONNECT) &&
+		    (portsc & PORT_CSC) && !(portsc & PORT_CONNECT))
+			xhci_cavium_reset_phy_quirk(xhci);
+	}
 
 cleanup:
 	/* Update event ring dequeue pointer before dropping the lock */
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index 5f0c4f197f134..260b259b72bcb 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1849,6 +1849,7 @@ struct xhci_hcd {
 #define XHCI_INTEL_USB_ROLE_SW	BIT_ULL(31)
 #define XHCI_ZERO_64B_REGS	BIT_ULL(32)
 #define XHCI_DEFAULT_PM_RUNTIME_ALLOW	BIT_ULL(33)
+#define XHCI_RESET_PLL_ON_DISCONNECT	BIT_ULL(34)
 
 	unsigned int		num_active_eps;
 	unsigned int		limit_active_eps;
-- 
GitLab


From ed72bc8bcb9277061e753faf300b20f97323761c Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Tue, 18 Sep 2018 11:40:39 +0200
Subject: [PATCH 0916/1890] can: flexcan: handle tx-complete CAN frames via
 rx-offload infrastructure

Current flexcan driver will put TX-ECHO in regular unsorted way, in
this case TX-ECHO can come after the response to the same TXed message.
In some cases, for example for J1939 stack, things will break.
This patch is using new rx-offload API to put the messages just in the
right place.

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/flexcan.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index 68b46395c580c..41a175f80c4b5 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -787,8 +787,11 @@ static irqreturn_t flexcan_irq(int irq, void *dev_id)
 
 	/* transmission complete interrupt */
 	if (reg_iflag2 & FLEXCAN_IFLAG_MB(FLEXCAN_TX_MB)) {
+		u32 reg_ctrl = priv->read(&regs->mb[FLEXCAN_TX_MB].can_ctrl);
+
 		handled = IRQ_HANDLED;
-		stats->tx_bytes += can_get_echo_skb(dev, 0);
+		stats->tx_bytes += can_rx_offload_get_echo_skb(&priv->offload,
+							       0, reg_ctrl << 16);
 		stats->tx_packets++;
 		can_led_event(dev, CAN_LED_EVENT_TX);
 
-- 
GitLab


From 4530ec36bb1e0d24f41c33229694adacda3d5d89 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Tue, 18 Sep 2018 11:40:40 +0200
Subject: [PATCH 0917/1890] can: rx-offload: rename
 can_rx_offload_irq_queue_err_skb() to can_rx_offload_queue_tail()

This function has nothing todo with error.

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/flexcan.c      | 4 ++--
 drivers/net/can/rx-offload.c   | 5 +++--
 include/linux/can/rx-offload.h | 3 ++-
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index 41a175f80c4b5..5923bd0ec1183 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -610,7 +610,7 @@ static void flexcan_irq_bus_err(struct net_device *dev, u32 reg_esr)
 	if (tx_errors)
 		dev->stats.tx_errors++;
 
-	can_rx_offload_irq_queue_err_skb(&priv->offload, skb);
+	can_rx_offload_queue_tail(&priv->offload, skb);
 }
 
 static void flexcan_irq_state(struct net_device *dev, u32 reg_esr)
@@ -650,7 +650,7 @@ static void flexcan_irq_state(struct net_device *dev, u32 reg_esr)
 	if (unlikely(new_state == CAN_STATE_BUS_OFF))
 		can_bus_off(dev);
 
-	can_rx_offload_irq_queue_err_skb(&priv->offload, skb);
+	can_rx_offload_queue_tail(&priv->offload, skb);
 }
 
 static inline struct flexcan_priv *rx_offload_to_priv(struct can_rx_offload *offload)
diff --git a/drivers/net/can/rx-offload.c b/drivers/net/can/rx-offload.c
index c368686e21645..2ce4fa8698c73 100644
--- a/drivers/net/can/rx-offload.c
+++ b/drivers/net/can/rx-offload.c
@@ -257,7 +257,8 @@ unsigned int can_rx_offload_get_echo_skb(struct can_rx_offload *offload,
 }
 EXPORT_SYMBOL_GPL(can_rx_offload_get_echo_skb);
 
-int can_rx_offload_irq_queue_err_skb(struct can_rx_offload *offload, struct sk_buff *skb)
+int can_rx_offload_queue_tail(struct can_rx_offload *offload,
+			      struct sk_buff *skb)
 {
 	if (skb_queue_len(&offload->skb_queue) >
 	    offload->skb_queue_len_max)
@@ -268,7 +269,7 @@ int can_rx_offload_irq_queue_err_skb(struct can_rx_offload *offload, struct sk_b
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(can_rx_offload_irq_queue_err_skb);
+EXPORT_SYMBOL_GPL(can_rx_offload_queue_tail);
 
 static int can_rx_offload_init_queue(struct net_device *dev, struct can_rx_offload *offload, unsigned int weight)
 {
diff --git a/include/linux/can/rx-offload.h b/include/linux/can/rx-offload.h
index 01a7c9e5d8d85..8268811a697e2 100644
--- a/include/linux/can/rx-offload.h
+++ b/include/linux/can/rx-offload.h
@@ -45,7 +45,8 @@ int can_rx_offload_queue_sorted(struct can_rx_offload *offload,
 				struct sk_buff *skb, u32 timestamp);
 unsigned int can_rx_offload_get_echo_skb(struct can_rx_offload *offload,
 					 unsigned int idx, u32 timestamp);
-int can_rx_offload_irq_queue_err_skb(struct can_rx_offload *offload, struct sk_buff *skb);
+int can_rx_offload_queue_tail(struct can_rx_offload *offload,
+			      struct sk_buff *skb);
 void can_rx_offload_reset(struct can_rx_offload *offload);
 void can_rx_offload_del(struct can_rx_offload *offload);
 void can_rx_offload_enable(struct can_rx_offload *offload);
-- 
GitLab


From d788905f68fd4714c82936f6f7f1d3644d7ae7ef Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Tue, 18 Sep 2018 11:40:41 +0200
Subject: [PATCH 0918/1890] can: flexcan: use can_rx_offload_queue_sorted() for
 flexcan_irq_bus_*()

Currently, in case of bus error, driver will generate error message and put
in the tail of the message queue. To avoid confusions, this change should
place the bus related messages in proper order.

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/flexcan.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index 5923bd0ec1183..75ce11395ee81 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -561,9 +561,13 @@ static netdev_tx_t flexcan_start_xmit(struct sk_buff *skb, struct net_device *de
 static void flexcan_irq_bus_err(struct net_device *dev, u32 reg_esr)
 {
 	struct flexcan_priv *priv = netdev_priv(dev);
+	struct flexcan_regs __iomem *regs = priv->regs;
 	struct sk_buff *skb;
 	struct can_frame *cf;
 	bool rx_errors = false, tx_errors = false;
+	u32 timestamp;
+
+	timestamp = priv->read(&regs->timer) << 16;
 
 	skb = alloc_can_err_skb(dev, &cf);
 	if (unlikely(!skb))
@@ -610,17 +614,21 @@ static void flexcan_irq_bus_err(struct net_device *dev, u32 reg_esr)
 	if (tx_errors)
 		dev->stats.tx_errors++;
 
-	can_rx_offload_queue_tail(&priv->offload, skb);
+	can_rx_offload_queue_sorted(&priv->offload, skb, timestamp);
 }
 
 static void flexcan_irq_state(struct net_device *dev, u32 reg_esr)
 {
 	struct flexcan_priv *priv = netdev_priv(dev);
+	struct flexcan_regs __iomem *regs = priv->regs;
 	struct sk_buff *skb;
 	struct can_frame *cf;
 	enum can_state new_state, rx_state, tx_state;
 	int flt;
 	struct can_berr_counter bec;
+	u32 timestamp;
+
+	timestamp = priv->read(&regs->timer) << 16;
 
 	flt = reg_esr & FLEXCAN_ESR_FLT_CONF_MASK;
 	if (likely(flt == FLEXCAN_ESR_FLT_CONF_ACTIVE)) {
@@ -650,7 +658,7 @@ static void flexcan_irq_state(struct net_device *dev, u32 reg_esr)
 	if (unlikely(new_state == CAN_STATE_BUS_OFF))
 		can_bus_off(dev);
 
-	can_rx_offload_queue_tail(&priv->offload, skb);
+	can_rx_offload_queue_sorted(&priv->offload, skb, timestamp);
 }
 
 static inline struct flexcan_priv *rx_offload_to_priv(struct can_rx_offload *offload)
-- 
GitLab


From eb6984fa4ce2837dcb1f66720a600f31b0bb3739 Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@virtuozzo.com>
Date: Fri, 9 Nov 2018 11:34:40 -0500
Subject: [PATCH 0919/1890] ext4: missing !bh check in ext4_xattr_inode_write()

According to Ted Ts'o ext4_getblk() called in ext4_xattr_inode_write()
should not return bh = NULL

The only time that bh could be NULL, then, would be in the case of
something really going wrong; a programming error elsewhere (perhaps a
wild pointer dereference) or I/O error causing on-disk file system
corruption (although that would be highly unlikely given that we had
*just* allocated the blocks and so the metadata blocks in question
probably would still be in the cache).

Fixes: e50e5129f384 ("ext4: xattr-in-inode support")
Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org # 4.13
---
 fs/ext4/xattr.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 0b96886835262..7643d52c776c6 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1384,6 +1384,12 @@ static int ext4_xattr_inode_write(handle_t *handle, struct inode *ea_inode,
 		bh = ext4_getblk(handle, ea_inode, block, 0);
 		if (IS_ERR(bh))
 			return PTR_ERR(bh);
+		if (!bh) {
+			WARN_ON_ONCE(1);
+			EXT4_ERROR_INODE(ea_inode,
+					 "ext4_getblk() return bh = NULL");
+			return -EFSCORRUPTED;
+		}
 		ret = ext4_journal_get_write_access(handle, bh);
 		if (ret)
 			goto out;
-- 
GitLab


From c71bcdcb42a7493348d3b45dee8139843bf45efc Mon Sep 17 00:00:00 2001
From: Ajay Gupta <ajayg@nvidia.com>
Date: Fri, 26 Oct 2018 09:36:58 -0700
Subject: [PATCH 0920/1890] i2c: add i2c bus driver for NVIDIA GPU

Latest NVIDIA GPU card has USB Type-C interface. There is a
Type-C controller which can be accessed over I2C.

This driver adds I2C bus driver to communicate with Type-C controller.
I2C client driver will be part of USB Type-C UCSI driver.

Signed-off-by: Ajay Gupta <ajayg@nvidia.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
[wsa: kept Makefile sorting]
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 Documentation/i2c/busses/i2c-nvidia-gpu |  18 ++
 MAINTAINERS                             |   7 +
 drivers/i2c/busses/Kconfig              |   9 +
 drivers/i2c/busses/Makefile             |   1 +
 drivers/i2c/busses/i2c-nvidia-gpu.c     | 368 ++++++++++++++++++++++++
 5 files changed, 403 insertions(+)
 create mode 100644 Documentation/i2c/busses/i2c-nvidia-gpu
 create mode 100644 drivers/i2c/busses/i2c-nvidia-gpu.c

diff --git a/Documentation/i2c/busses/i2c-nvidia-gpu b/Documentation/i2c/busses/i2c-nvidia-gpu
new file mode 100644
index 0000000000000..31884d2b2eb5a
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-nvidia-gpu
@@ -0,0 +1,18 @@
+Kernel driver i2c-nvidia-gpu
+
+Datasheet: not publicly available.
+
+Authors:
+	Ajay Gupta <ajayg@nvidia.com>
+
+Description
+-----------
+
+i2c-nvidia-gpu is a driver for I2C controller included in NVIDIA Turing
+and later GPUs and it is used to communicate with Type-C controller on GPUs.
+
+If your 'lspci -v' listing shows something like the following,
+
+01:00.3 Serial bus controller [0c80]: NVIDIA Corporation Device 1ad9 (rev a1)
+
+then this driver should support the I2C controller of your GPU.
diff --git a/MAINTAINERS b/MAINTAINERS
index 259fe9445d2fb..1835d3ffd7ea3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6861,6 +6861,13 @@ L:	linux-acpi@vger.kernel.org
 S:	Maintained
 F:	drivers/i2c/i2c-core-acpi.c
 
+I2C CONTROLLER DRIVER FOR NVIDIA GPU
+M:	Ajay Gupta <ajayg@nvidia.com>
+L:	linux-i2c@vger.kernel.org
+S:	Maintained
+F:	Documentation/i2c/busses/i2c-nvidia-gpu
+F:	drivers/i2c/busses/i2c-nvidia-gpu.c
+
 I2C MUXES
 M:	Peter Rosin <peda@axentia.se>
 L:	linux-i2c@vger.kernel.org
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index 77dc94b440117..f2c6819712013 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -224,6 +224,15 @@ config I2C_NFORCE2_S4985
 	  This driver can also be built as a module.  If so, the module
 	  will be called i2c-nforce2-s4985.
 
+config I2C_NVIDIA_GPU
+	tristate "NVIDIA GPU I2C controller"
+	depends on PCI
+	help
+	  If you say yes to this option, support will be included for the
+	  NVIDIA GPU I2C controller which is used to communicate with the GPU's
+	  Type-C controller. This driver can also be built as a module called
+	  i2c-nvidia-gpu.
+
 config I2C_SIS5595
 	tristate "SiS 5595"
 	depends on PCI
diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
index 18b26af82b1c5..5f0cb6915969a 100644
--- a/drivers/i2c/busses/Makefile
+++ b/drivers/i2c/busses/Makefile
@@ -19,6 +19,7 @@ obj-$(CONFIG_I2C_ISCH)		+= i2c-isch.o
 obj-$(CONFIG_I2C_ISMT)		+= i2c-ismt.o
 obj-$(CONFIG_I2C_NFORCE2)	+= i2c-nforce2.o
 obj-$(CONFIG_I2C_NFORCE2_S4985)	+= i2c-nforce2-s4985.o
+obj-$(CONFIG_I2C_NVIDIA_GPU)	+= i2c-nvidia-gpu.o
 obj-$(CONFIG_I2C_PIIX4)		+= i2c-piix4.o
 obj-$(CONFIG_I2C_SIS5595)	+= i2c-sis5595.o
 obj-$(CONFIG_I2C_SIS630)	+= i2c-sis630.o
diff --git a/drivers/i2c/busses/i2c-nvidia-gpu.c b/drivers/i2c/busses/i2c-nvidia-gpu.c
new file mode 100644
index 0000000000000..744f5e42636b2
--- /dev/null
+++ b/drivers/i2c/busses/i2c-nvidia-gpu.c
@@ -0,0 +1,368 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Nvidia GPU I2C controller Driver
+ *
+ * Copyright (C) 2018 NVIDIA Corporation. All rights reserved.
+ * Author: Ajay Gupta <ajayg@nvidia.com>
+ */
+#include <linux/delay.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+#include <linux/pm_runtime.h>
+
+#include <asm/unaligned.h>
+
+/* I2C definitions */
+#define I2C_MST_CNTL				0x00
+#define I2C_MST_CNTL_GEN_START			BIT(0)
+#define I2C_MST_CNTL_GEN_STOP			BIT(1)
+#define I2C_MST_CNTL_CMD_READ			(1 << 2)
+#define I2C_MST_CNTL_CMD_WRITE			(2 << 2)
+#define I2C_MST_CNTL_BURST_SIZE_SHIFT		6
+#define I2C_MST_CNTL_GEN_NACK			BIT(28)
+#define I2C_MST_CNTL_STATUS			GENMASK(30, 29)
+#define I2C_MST_CNTL_STATUS_OKAY		(0 << 29)
+#define I2C_MST_CNTL_STATUS_NO_ACK		(1 << 29)
+#define I2C_MST_CNTL_STATUS_TIMEOUT		(2 << 29)
+#define I2C_MST_CNTL_STATUS_BUS_BUSY		(3 << 29)
+#define I2C_MST_CNTL_CYCLE_TRIGGER		BIT(31)
+
+#define I2C_MST_ADDR				0x04
+
+#define I2C_MST_I2C0_TIMING				0x08
+#define I2C_MST_I2C0_TIMING_SCL_PERIOD_100KHZ		0x10e
+#define I2C_MST_I2C0_TIMING_TIMEOUT_CLK_CNT		16
+#define I2C_MST_I2C0_TIMING_TIMEOUT_CLK_CNT_MAX		255
+#define I2C_MST_I2C0_TIMING_TIMEOUT_CHECK		BIT(24)
+
+#define I2C_MST_DATA					0x0c
+
+#define I2C_MST_HYBRID_PADCTL				0x20
+#define I2C_MST_HYBRID_PADCTL_MODE_I2C			BIT(0)
+#define I2C_MST_HYBRID_PADCTL_I2C_SCL_INPUT_RCV		BIT(14)
+#define I2C_MST_HYBRID_PADCTL_I2C_SDA_INPUT_RCV		BIT(15)
+
+struct gpu_i2c_dev {
+	struct device *dev;
+	void __iomem *regs;
+	struct i2c_adapter adapter;
+	struct i2c_board_info *gpu_ccgx_ucsi;
+};
+
+static void gpu_enable_i2c_bus(struct gpu_i2c_dev *i2cd)
+{
+	u32 val;
+
+	/* enable I2C */
+	val = readl(i2cd->regs + I2C_MST_HYBRID_PADCTL);
+	val |= I2C_MST_HYBRID_PADCTL_MODE_I2C |
+		I2C_MST_HYBRID_PADCTL_I2C_SCL_INPUT_RCV |
+		I2C_MST_HYBRID_PADCTL_I2C_SDA_INPUT_RCV;
+	writel(val, i2cd->regs + I2C_MST_HYBRID_PADCTL);
+
+	/* enable 100KHZ mode */
+	val = I2C_MST_I2C0_TIMING_SCL_PERIOD_100KHZ;
+	val |= (I2C_MST_I2C0_TIMING_TIMEOUT_CLK_CNT_MAX
+	    << I2C_MST_I2C0_TIMING_TIMEOUT_CLK_CNT);
+	val |= I2C_MST_I2C0_TIMING_TIMEOUT_CHECK;
+	writel(val, i2cd->regs + I2C_MST_I2C0_TIMING);
+}
+
+static int gpu_i2c_check_status(struct gpu_i2c_dev *i2cd)
+{
+	unsigned long target = jiffies + msecs_to_jiffies(1000);
+	u32 val;
+
+	do {
+		val = readl(i2cd->regs + I2C_MST_CNTL);
+		if (!(val & I2C_MST_CNTL_CYCLE_TRIGGER))
+			break;
+		if ((val & I2C_MST_CNTL_STATUS) !=
+				I2C_MST_CNTL_STATUS_BUS_BUSY)
+			break;
+		usleep_range(500, 600);
+	} while (time_is_after_jiffies(target));
+
+	if (time_is_before_jiffies(target)) {
+		dev_err(i2cd->dev, "i2c timeout error %x\n", val);
+		return -ETIME;
+	}
+
+	val = readl(i2cd->regs + I2C_MST_CNTL);
+	switch (val & I2C_MST_CNTL_STATUS) {
+	case I2C_MST_CNTL_STATUS_OKAY:
+		return 0;
+	case I2C_MST_CNTL_STATUS_NO_ACK:
+		return -EIO;
+	case I2C_MST_CNTL_STATUS_TIMEOUT:
+		return -ETIME;
+	default:
+		return 0;
+	}
+}
+
+static int gpu_i2c_read(struct gpu_i2c_dev *i2cd, u8 *data, u16 len)
+{
+	int status;
+	u32 val;
+
+	val = I2C_MST_CNTL_GEN_START | I2C_MST_CNTL_CMD_READ |
+		(len << I2C_MST_CNTL_BURST_SIZE_SHIFT) |
+		I2C_MST_CNTL_CYCLE_TRIGGER | I2C_MST_CNTL_GEN_NACK;
+	writel(val, i2cd->regs + I2C_MST_CNTL);
+
+	status = gpu_i2c_check_status(i2cd);
+	if (status < 0)
+		return status;
+
+	val = readl(i2cd->regs + I2C_MST_DATA);
+	switch (len) {
+	case 1:
+		data[0] = val;
+		break;
+	case 2:
+		put_unaligned_be16(val, data);
+		break;
+	case 3:
+		put_unaligned_be16(val >> 8, data);
+		data[2] = val;
+		break;
+	case 4:
+		put_unaligned_be32(val, data);
+		break;
+	default:
+		break;
+	}
+	return status;
+}
+
+static int gpu_i2c_start(struct gpu_i2c_dev *i2cd)
+{
+	writel(I2C_MST_CNTL_GEN_START, i2cd->regs + I2C_MST_CNTL);
+	return gpu_i2c_check_status(i2cd);
+}
+
+static int gpu_i2c_stop(struct gpu_i2c_dev *i2cd)
+{
+	writel(I2C_MST_CNTL_GEN_STOP, i2cd->regs + I2C_MST_CNTL);
+	return gpu_i2c_check_status(i2cd);
+}
+
+static int gpu_i2c_write(struct gpu_i2c_dev *i2cd, u8 data)
+{
+	u32 val;
+
+	writel(data, i2cd->regs + I2C_MST_DATA);
+
+	val = I2C_MST_CNTL_CMD_WRITE | (1 << I2C_MST_CNTL_BURST_SIZE_SHIFT);
+	writel(val, i2cd->regs + I2C_MST_CNTL);
+
+	return gpu_i2c_check_status(i2cd);
+}
+
+static int gpu_i2c_master_xfer(struct i2c_adapter *adap,
+			       struct i2c_msg *msgs, int num)
+{
+	struct gpu_i2c_dev *i2cd = i2c_get_adapdata(adap);
+	int status, status2;
+	int i, j;
+
+	/*
+	 * The controller supports maximum 4 byte read due to known
+	 * limitation of sending STOP after every read.
+	 */
+	for (i = 0; i < num; i++) {
+		if (msgs[i].flags & I2C_M_RD) {
+			/* program client address before starting read */
+			writel(msgs[i].addr, i2cd->regs + I2C_MST_ADDR);
+			/* gpu_i2c_read has implicit start */
+			status = gpu_i2c_read(i2cd, msgs[i].buf, msgs[i].len);
+			if (status < 0)
+				goto stop;
+		} else {
+			u8 addr = i2c_8bit_addr_from_msg(msgs + i);
+
+			status = gpu_i2c_start(i2cd);
+			if (status < 0) {
+				if (i == 0)
+					return status;
+				goto stop;
+			}
+
+			status = gpu_i2c_write(i2cd, addr);
+			if (status < 0)
+				goto stop;
+
+			for (j = 0; j < msgs[i].len; j++) {
+				status = gpu_i2c_write(i2cd, msgs[i].buf[j]);
+				if (status < 0)
+					goto stop;
+			}
+		}
+	}
+	status = gpu_i2c_stop(i2cd);
+	if (status < 0)
+		return status;
+
+	return i;
+stop:
+	status2 = gpu_i2c_stop(i2cd);
+	if (status2 < 0)
+		dev_err(i2cd->dev, "i2c stop failed %d\n", status2);
+	return status;
+}
+
+static const struct i2c_adapter_quirks gpu_i2c_quirks = {
+	.max_read_len = 4,
+	.flags = I2C_AQ_COMB_WRITE_THEN_READ,
+};
+
+static u32 gpu_i2c_functionality(struct i2c_adapter *adap)
+{
+	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
+}
+
+static const struct i2c_algorithm gpu_i2c_algorithm = {
+	.master_xfer	= gpu_i2c_master_xfer,
+	.functionality	= gpu_i2c_functionality,
+};
+
+/*
+ * This driver is for Nvidia GPU cards with USB Type-C interface.
+ * We want to identify the cards using vendor ID and class code only
+ * to avoid dependency of adding product id for any new card which
+ * requires this driver.
+ * Currently there is no class code defined for UCSI device over PCI
+ * so using UNKNOWN class for now and it will be updated when UCSI
+ * over PCI gets a class code.
+ * There is no other NVIDIA cards with UNKNOWN class code. Even if the
+ * driver gets loaded for an undesired card then eventually i2c_read()
+ * (initiated from UCSI i2c_client) will timeout or UCSI commands will
+ * timeout.
+ */
+#define PCI_CLASS_SERIAL_UNKNOWN	0x0c80
+static const struct pci_device_id gpu_i2c_ids[] = {
+	{ PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
+		PCI_CLASS_SERIAL_UNKNOWN << 8, 0xffffff00},
+	{ }
+};
+MODULE_DEVICE_TABLE(pci, gpu_i2c_ids);
+
+static int gpu_populate_client(struct gpu_i2c_dev *i2cd, int irq)
+{
+	struct i2c_client *ccgx_client;
+
+	i2cd->gpu_ccgx_ucsi = devm_kzalloc(i2cd->dev,
+					   sizeof(*i2cd->gpu_ccgx_ucsi),
+					   GFP_KERNEL);
+	if (!i2cd->gpu_ccgx_ucsi)
+		return -ENOMEM;
+
+	strlcpy(i2cd->gpu_ccgx_ucsi->type, "ccgx-ucsi",
+		sizeof(i2cd->gpu_ccgx_ucsi->type));
+	i2cd->gpu_ccgx_ucsi->addr = 0x8;
+	i2cd->gpu_ccgx_ucsi->irq = irq;
+	ccgx_client = i2c_new_device(&i2cd->adapter, i2cd->gpu_ccgx_ucsi);
+	if (!ccgx_client)
+		return -ENODEV;
+
+	return 0;
+}
+
+static int gpu_i2c_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct gpu_i2c_dev *i2cd;
+	int status;
+
+	i2cd = devm_kzalloc(&pdev->dev, sizeof(*i2cd), GFP_KERNEL);
+	if (!i2cd)
+		return -ENOMEM;
+
+	i2cd->dev = &pdev->dev;
+	dev_set_drvdata(&pdev->dev, i2cd);
+
+	status = pcim_enable_device(pdev);
+	if (status < 0) {
+		dev_err(&pdev->dev, "pcim_enable_device failed %d\n", status);
+		return status;
+	}
+
+	pci_set_master(pdev);
+
+	i2cd->regs = pcim_iomap(pdev, 0, 0);
+	if (!i2cd->regs) {
+		dev_err(&pdev->dev, "pcim_iomap failed\n");
+		return -ENOMEM;
+	}
+
+	status = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI);
+	if (status < 0) {
+		dev_err(&pdev->dev, "pci_alloc_irq_vectors err %d\n", status);
+		return status;
+	}
+
+	gpu_enable_i2c_bus(i2cd);
+
+	i2c_set_adapdata(&i2cd->adapter, i2cd);
+	i2cd->adapter.owner = THIS_MODULE;
+	strlcpy(i2cd->adapter.name, "NVIDIA GPU I2C adapter",
+		sizeof(i2cd->adapter.name));
+	i2cd->adapter.algo = &gpu_i2c_algorithm;
+	i2cd->adapter.quirks = &gpu_i2c_quirks;
+	i2cd->adapter.dev.parent = &pdev->dev;
+	status = i2c_add_adapter(&i2cd->adapter);
+	if (status < 0)
+		goto free_irq_vectors;
+
+	status = gpu_populate_client(i2cd, pdev->irq);
+	if (status < 0) {
+		dev_err(&pdev->dev, "gpu_populate_client failed %d\n", status);
+		goto del_adapter;
+	}
+
+	return 0;
+
+del_adapter:
+	i2c_del_adapter(&i2cd->adapter);
+free_irq_vectors:
+	pci_free_irq_vectors(pdev);
+	return status;
+}
+
+static void gpu_i2c_remove(struct pci_dev *pdev)
+{
+	struct gpu_i2c_dev *i2cd = dev_get_drvdata(&pdev->dev);
+
+	i2c_del_adapter(&i2cd->adapter);
+	pci_free_irq_vectors(pdev);
+}
+
+static int gpu_i2c_resume(struct device *dev)
+{
+	struct gpu_i2c_dev *i2cd = dev_get_drvdata(dev);
+
+	gpu_enable_i2c_bus(i2cd);
+	return 0;
+}
+
+UNIVERSAL_DEV_PM_OPS(gpu_i2c_driver_pm, NULL, gpu_i2c_resume, NULL);
+
+static struct pci_driver gpu_i2c_driver = {
+	.name		= "nvidia-gpu",
+	.id_table	= gpu_i2c_ids,
+	.probe		= gpu_i2c_probe,
+	.remove		= gpu_i2c_remove,
+	.driver		= {
+		.pm	= &gpu_i2c_driver_pm,
+	},
+};
+
+module_pci_driver(gpu_i2c_driver);
+
+MODULE_AUTHOR("Ajay Gupta <ajayg@nvidia.com>");
+MODULE_DESCRIPTION("Nvidia GPU I2C controller Driver");
+MODULE_LICENSE("GPL v2");
-- 
GitLab


From caccdcc5dbec0dd9643fe1667893e78631a4d38e Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Fri, 9 Nov 2018 17:54:38 +0100
Subject: [PATCH 0921/1890] i2c: nvidia-gpu: make pm_ops static

sparse rightfully says:

warning: symbol 'gpu_i2c_driver_pm' was not declared. Should it be static?

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-nvidia-gpu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-nvidia-gpu.c b/drivers/i2c/busses/i2c-nvidia-gpu.c
index 744f5e42636b2..8822357bca0c3 100644
--- a/drivers/i2c/busses/i2c-nvidia-gpu.c
+++ b/drivers/i2c/busses/i2c-nvidia-gpu.c
@@ -349,7 +349,7 @@ static int gpu_i2c_resume(struct device *dev)
 	return 0;
 }
 
-UNIVERSAL_DEV_PM_OPS(gpu_i2c_driver_pm, NULL, gpu_i2c_resume, NULL);
+static UNIVERSAL_DEV_PM_OPS(gpu_i2c_driver_pm, NULL, gpu_i2c_resume, NULL);
 
 static struct pci_driver gpu_i2c_driver = {
 	.name		= "nvidia-gpu",
-- 
GitLab


From 23d8003907d094f77cf959228e2248d6db819fa7 Mon Sep 17 00:00:00 2001
From: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
Date: Fri, 9 Nov 2018 11:00:12 +0200
Subject: [PATCH 0922/1890] drm/dp_mst: Check if primary mstb is null

Unfortunately drm_dp_get_mst_branch_device which is called from both
drm_dp_mst_handle_down_rep and drm_dp_mst_handle_up_rep seem to rely
on that mgr->mst_primary is not NULL, which seem to be wrong as it can be
cleared with simultaneous mode set, if probing fails or in other case.
mgr->lock mutex doesn't protect against that as it might just get
assigned to NULL right before, not simultaneously.

There are currently bugs 107738, 108616 bugs which crash in
drm_dp_get_mst_branch_device, caused by this issue.

v2: Refactored the code, as it was nicely noticed.
    Fixed Bugzilla bug numbers(second was 108616, but not 108816)
    and added links.

[changed title and added stable cc]
Signed-off-by: Lyude Paul <lyude@redhat.com>
Signed-off-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
Cc: stable@vger.kernel.org
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108616
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107738
Link: https://patchwork.freedesktop.org/patch/msgid/20181109090012.24438-1-stanislav.lisovskiy@intel.com
---
 drivers/gpu/drm/drm_dp_mst_topology.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
index 5ff1d79b86c4a..0e0df398222d1 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -1275,6 +1275,9 @@ static struct drm_dp_mst_branch *drm_dp_get_mst_branch_device(struct drm_dp_mst_
 	mutex_lock(&mgr->lock);
 	mstb = mgr->mst_primary;
 
+	if (!mstb)
+		goto out;
+
 	for (i = 0; i < lct - 1; i++) {
 		int shift = (i % 2) ? 0 : 4;
 		int port_num = (rad[i / 2] >> shift) & 0xf;
-- 
GitLab


From 641a41dbba217ee5bd26abe6be77f8cead9cd00e Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Date: Tue, 30 Oct 2018 15:13:35 +0900
Subject: [PATCH 0923/1890] serial: sh-sci: Fix could not remove
 dev_attr_rx_fifo_timeout

This patch fixes an issue that the sci_remove() could not remove
dev_attr_rx_fifo_timeout because uart_remove_one_port() set
the port->port.type to PORT_UNKNOWN.

Reported-by: Hiromitsu Yamasaki <hiromitsu.yamasaki.ym@renesas.com>
Fixes: 5d23188a473d ("serial: sh-sci: make RX FIFO parameters tunable via sysfs")
Cc: <stable@vger.kernel.org> # v4.11+
Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Reviewed-by: Ulrich Hecht <uli+renesas@fpond.eu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/sh-sci.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index e19bfbba8a019..effba6ce0caa2 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -3102,6 +3102,7 @@ static struct uart_driver sci_uart_driver = {
 static int sci_remove(struct platform_device *dev)
 {
 	struct sci_port *port = platform_get_drvdata(dev);
+	unsigned int type = port->port.type;	/* uart_remove_... clears it */
 
 	sci_ports_in_use &= ~BIT(port->port.line);
 	uart_remove_one_port(&sci_uart_driver, &port->port);
@@ -3112,8 +3113,7 @@ static int sci_remove(struct platform_device *dev)
 		sysfs_remove_file(&dev->dev.kobj,
 				  &dev_attr_rx_fifo_trigger.attr);
 	}
-	if (port->port.type == PORT_SCIFA || port->port.type == PORT_SCIFB ||
-	    port->port.type == PORT_HSCIF) {
+	if (type == PORT_SCIFA || type == PORT_SCIFB || type == PORT_HSCIF) {
 		sysfs_remove_file(&dev->dev.kobj,
 				  &dev_attr_rx_fifo_timeout.attr);
 	}
-- 
GitLab


From 247c554a14aa16ca08f4ed4d9eb39a2389f69d1d Mon Sep 17 00:00:00 2001
From: Ajay Gupta <ajayg@nvidia.com>
Date: Fri, 26 Oct 2018 09:36:59 -0700
Subject: [PATCH 0924/1890] usb: typec: ucsi: add support for Cypress CCGx

Latest NVIDIA GPU cards have a Cypress CCGx Type-C controller
over I2C interface.

This UCSI I2C driver uses I2C bus driver interface for communicating
with Type-C controller.

Signed-off-by: Ajay Gupta <ajayg@nvidia.com>
Acked-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/usb/typec/ucsi/Kconfig    |  10 +
 drivers/usb/typec/ucsi/Makefile   |   2 +
 drivers/usb/typec/ucsi/ucsi_ccg.c | 307 ++++++++++++++++++++++++++++++
 3 files changed, 319 insertions(+)
 create mode 100644 drivers/usb/typec/ucsi/ucsi_ccg.c

diff --git a/drivers/usb/typec/ucsi/Kconfig b/drivers/usb/typec/ucsi/Kconfig
index e36d6c73c4a41..78118883f96c8 100644
--- a/drivers/usb/typec/ucsi/Kconfig
+++ b/drivers/usb/typec/ucsi/Kconfig
@@ -23,6 +23,16 @@ config TYPEC_UCSI
 
 if TYPEC_UCSI
 
+config UCSI_CCG
+	tristate "UCSI Interface Driver for Cypress CCGx"
+	depends on I2C
+	help
+	  This driver enables UCSI support on platforms that expose a
+	  Cypress CCGx Type-C controller over I2C interface.
+
+	  To compile the driver as a module, choose M here: the module will be
+	  called ucsi_ccg.
+
 config UCSI_ACPI
 	tristate "UCSI ACPI Interface Driver"
 	depends on ACPI
diff --git a/drivers/usb/typec/ucsi/Makefile b/drivers/usb/typec/ucsi/Makefile
index 7afbea5122077..2f4900b26210e 100644
--- a/drivers/usb/typec/ucsi/Makefile
+++ b/drivers/usb/typec/ucsi/Makefile
@@ -8,3 +8,5 @@ typec_ucsi-y			:= ucsi.o
 typec_ucsi-$(CONFIG_TRACING)	+= trace.o
 
 obj-$(CONFIG_UCSI_ACPI)		+= ucsi_acpi.o
+
+obj-$(CONFIG_UCSI_CCG)		+= ucsi_ccg.o
diff --git a/drivers/usb/typec/ucsi/ucsi_ccg.c b/drivers/usb/typec/ucsi/ucsi_ccg.c
new file mode 100644
index 0000000000000..de8a43bdff689
--- /dev/null
+++ b/drivers/usb/typec/ucsi/ucsi_ccg.c
@@ -0,0 +1,307 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * UCSI driver for Cypress CCGx Type-C controller
+ *
+ * Copyright (C) 2017-2018 NVIDIA Corporation. All rights reserved.
+ * Author: Ajay Gupta <ajayg@nvidia.com>
+ *
+ * Some code borrowed from drivers/usb/typec/ucsi/ucsi_acpi.c
+ */
+#include <linux/acpi.h>
+#include <linux/delay.h>
+#include <linux/i2c.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+
+#include <asm/unaligned.h>
+#include "ucsi.h"
+
+struct ucsi_ccg {
+	struct device *dev;
+	struct ucsi *ucsi;
+	struct ucsi_ppm ppm;
+	struct i2c_client *client;
+};
+
+#define CCGX_RAB_INTR_REG			0x06
+#define CCGX_RAB_UCSI_CONTROL			0x39
+#define CCGX_RAB_UCSI_CONTROL_START		BIT(0)
+#define CCGX_RAB_UCSI_CONTROL_STOP		BIT(1)
+#define CCGX_RAB_UCSI_DATA_BLOCK(offset)	(0xf000 | ((offset) & 0xff))
+
+static int ccg_read(struct ucsi_ccg *uc, u16 rab, u8 *data, u32 len)
+{
+	struct i2c_client *client = uc->client;
+	const struct i2c_adapter_quirks *quirks = client->adapter->quirks;
+	unsigned char buf[2];
+	struct i2c_msg msgs[] = {
+		{
+			.addr	= client->addr,
+			.flags  = 0x0,
+			.len	= sizeof(buf),
+			.buf	= buf,
+		},
+		{
+			.addr	= client->addr,
+			.flags  = I2C_M_RD,
+			.buf	= data,
+		},
+	};
+	u32 rlen, rem_len = len, max_read_len = len;
+	int status;
+
+	/* check any max_read_len limitation on i2c adapter */
+	if (quirks && quirks->max_read_len)
+		max_read_len = quirks->max_read_len;
+
+	while (rem_len > 0) {
+		msgs[1].buf = &data[len - rem_len];
+		rlen = min_t(u16, rem_len, max_read_len);
+		msgs[1].len = rlen;
+		put_unaligned_le16(rab, buf);
+		status = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
+		if (status < 0) {
+			dev_err(uc->dev, "i2c_transfer failed %d\n", status);
+			return status;
+		}
+		rab += rlen;
+		rem_len -= rlen;
+	}
+
+	return 0;
+}
+
+static int ccg_write(struct ucsi_ccg *uc, u16 rab, u8 *data, u32 len)
+{
+	struct i2c_client *client = uc->client;
+	unsigned char *buf;
+	struct i2c_msg msgs[] = {
+		{
+			.addr	= client->addr,
+			.flags  = 0x0,
+		}
+	};
+	int status;
+
+	buf = kzalloc(len + sizeof(rab), GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	put_unaligned_le16(rab, buf);
+	memcpy(buf + sizeof(rab), data, len);
+
+	msgs[0].len = len + sizeof(rab);
+	msgs[0].buf = buf;
+
+	status = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
+	if (status < 0) {
+		dev_err(uc->dev, "i2c_transfer failed %d\n", status);
+		kfree(buf);
+		return status;
+	}
+
+	kfree(buf);
+	return 0;
+}
+
+static int ucsi_ccg_init(struct ucsi_ccg *uc)
+{
+	unsigned int count = 10;
+	u8 data;
+	int status;
+
+	data = CCGX_RAB_UCSI_CONTROL_STOP;
+	status = ccg_write(uc, CCGX_RAB_UCSI_CONTROL, &data, sizeof(data));
+	if (status < 0)
+		return status;
+
+	data = CCGX_RAB_UCSI_CONTROL_START;
+	status = ccg_write(uc, CCGX_RAB_UCSI_CONTROL, &data, sizeof(data));
+	if (status < 0)
+		return status;
+
+	/*
+	 * Flush CCGx RESPONSE queue by acking interrupts. Above ucsi control
+	 * register write will push response which must be cleared.
+	 */
+	do {
+		status = ccg_read(uc, CCGX_RAB_INTR_REG, &data, sizeof(data));
+		if (status < 0)
+			return status;
+
+		if (!data)
+			return 0;
+
+		status = ccg_write(uc, CCGX_RAB_INTR_REG, &data, sizeof(data));
+		if (status < 0)
+			return status;
+
+		usleep_range(10000, 11000);
+	} while (--count);
+
+	return -ETIMEDOUT;
+}
+
+static int ucsi_ccg_send_data(struct ucsi_ccg *uc)
+{
+	u8 *ppm = (u8 *)uc->ppm.data;
+	int status;
+	u16 rab;
+
+	rab = CCGX_RAB_UCSI_DATA_BLOCK(offsetof(struct ucsi_data, message_out));
+	status = ccg_write(uc, rab, ppm +
+			   offsetof(struct ucsi_data, message_out),
+			   sizeof(uc->ppm.data->message_out));
+	if (status < 0)
+		return status;
+
+	rab = CCGX_RAB_UCSI_DATA_BLOCK(offsetof(struct ucsi_data, ctrl));
+	return ccg_write(uc, rab, ppm + offsetof(struct ucsi_data, ctrl),
+			 sizeof(uc->ppm.data->ctrl));
+}
+
+static int ucsi_ccg_recv_data(struct ucsi_ccg *uc)
+{
+	u8 *ppm = (u8 *)uc->ppm.data;
+	int status;
+	u16 rab;
+
+	rab = CCGX_RAB_UCSI_DATA_BLOCK(offsetof(struct ucsi_data, cci));
+	status = ccg_read(uc, rab, ppm + offsetof(struct ucsi_data, cci),
+			  sizeof(uc->ppm.data->cci));
+	if (status < 0)
+		return status;
+
+	rab = CCGX_RAB_UCSI_DATA_BLOCK(offsetof(struct ucsi_data, message_in));
+	return ccg_read(uc, rab, ppm + offsetof(struct ucsi_data, message_in),
+			sizeof(uc->ppm.data->message_in));
+}
+
+static int ucsi_ccg_ack_interrupt(struct ucsi_ccg *uc)
+{
+	int status;
+	unsigned char data;
+
+	status = ccg_read(uc, CCGX_RAB_INTR_REG, &data, sizeof(data));
+	if (status < 0)
+		return status;
+
+	return ccg_write(uc, CCGX_RAB_INTR_REG, &data, sizeof(data));
+}
+
+static int ucsi_ccg_sync(struct ucsi_ppm *ppm)
+{
+	struct ucsi_ccg *uc = container_of(ppm, struct ucsi_ccg, ppm);
+	int status;
+
+	status = ucsi_ccg_recv_data(uc);
+	if (status < 0)
+		return status;
+
+	/* ack interrupt to allow next command to run */
+	return ucsi_ccg_ack_interrupt(uc);
+}
+
+static int ucsi_ccg_cmd(struct ucsi_ppm *ppm, struct ucsi_control *ctrl)
+{
+	struct ucsi_ccg *uc = container_of(ppm, struct ucsi_ccg, ppm);
+
+	ppm->data->ctrl.raw_cmd = ctrl->raw_cmd;
+	return ucsi_ccg_send_data(uc);
+}
+
+static irqreturn_t ccg_irq_handler(int irq, void *data)
+{
+	struct ucsi_ccg *uc = data;
+
+	ucsi_notify(uc->ucsi);
+
+	return IRQ_HANDLED;
+}
+
+static int ucsi_ccg_probe(struct i2c_client *client,
+			  const struct i2c_device_id *id)
+{
+	struct device *dev = &client->dev;
+	struct ucsi_ccg *uc;
+	int status;
+	u16 rab;
+
+	uc = devm_kzalloc(dev, sizeof(*uc), GFP_KERNEL);
+	if (!uc)
+		return -ENOMEM;
+
+	uc->ppm.data = devm_kzalloc(dev, sizeof(struct ucsi_data), GFP_KERNEL);
+	if (!uc->ppm.data)
+		return -ENOMEM;
+
+	uc->ppm.cmd = ucsi_ccg_cmd;
+	uc->ppm.sync = ucsi_ccg_sync;
+	uc->dev = dev;
+	uc->client = client;
+
+	/* reset ccg device and initialize ucsi */
+	status = ucsi_ccg_init(uc);
+	if (status < 0) {
+		dev_err(uc->dev, "ucsi_ccg_init failed - %d\n", status);
+		return status;
+	}
+
+	status = devm_request_threaded_irq(dev, client->irq, NULL,
+					   ccg_irq_handler,
+					   IRQF_ONESHOT | IRQF_TRIGGER_HIGH,
+					   dev_name(dev), uc);
+	if (status < 0) {
+		dev_err(uc->dev, "request_threaded_irq failed - %d\n", status);
+		return status;
+	}
+
+	uc->ucsi = ucsi_register_ppm(dev, &uc->ppm);
+	if (IS_ERR(uc->ucsi)) {
+		dev_err(uc->dev, "ucsi_register_ppm failed\n");
+		return PTR_ERR(uc->ucsi);
+	}
+
+	rab = CCGX_RAB_UCSI_DATA_BLOCK(offsetof(struct ucsi_data, version));
+	status = ccg_read(uc, rab, (u8 *)(uc->ppm.data) +
+			  offsetof(struct ucsi_data, version),
+			  sizeof(uc->ppm.data->version));
+	if (status < 0) {
+		ucsi_unregister_ppm(uc->ucsi);
+		return status;
+	}
+
+	i2c_set_clientdata(client, uc);
+	return 0;
+}
+
+static int ucsi_ccg_remove(struct i2c_client *client)
+{
+	struct ucsi_ccg *uc = i2c_get_clientdata(client);
+
+	ucsi_unregister_ppm(uc->ucsi);
+
+	return 0;
+}
+
+static const struct i2c_device_id ucsi_ccg_device_id[] = {
+	{"ccgx-ucsi", 0},
+	{}
+};
+MODULE_DEVICE_TABLE(i2c, ucsi_ccg_device_id);
+
+static struct i2c_driver ucsi_ccg_driver = {
+	.driver = {
+		.name = "ucsi_ccg",
+	},
+	.probe = ucsi_ccg_probe,
+	.remove = ucsi_ccg_remove,
+	.id_table = ucsi_ccg_device_id,
+};
+
+module_i2c_driver(ucsi_ccg_driver);
+
+MODULE_AUTHOR("Ajay Gupta <ajayg@nvidia.com>");
+MODULE_DESCRIPTION("UCSI driver for Cypress CCGx Type-C controller");
+MODULE_LICENSE("GPL v2");
-- 
GitLab


From 15035388439f892017d38b05214d3cda6578af64 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Fri, 9 Nov 2018 15:22:07 -0500
Subject: [PATCH 0925/1890] x86/cpu/vmware: Do not trace vmware_sched_clock()

When running function tracing on a Linux guest running on VMware
Workstation, the guest would crash. This is due to tracing of the
sched_clock internal call of the VMware vmware_sched_clock(), which
causes an infinite recursion within the tracing code (clock calls must
not be traced).

Make vmware_sched_clock() not traced by ftrace.

Fixes: 80e9a4f21fd7c ("x86/vmware: Add paravirt sched clock")
Reported-by: GwanYeong Kim <gy741.kim@gmail.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
CC: Alok Kataria <akataria@vmware.com>
CC: GwanYeong Kim <gy741.kim@gmail.com>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Ingo Molnar <mingo@kernel.org>
Cc: stable@vger.kernel.org
CC: Thomas Gleixner <tglx@linutronix.de>
CC: virtualization@lists.linux-foundation.org
CC: x86-ml <x86@kernel.org>
Link: http://lkml.kernel.org/r/20181109152207.4d3e7d70@gandalf.local.home
---
 arch/x86/kernel/cpu/vmware.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index d9ab49bed8afc..0eda91f8eeace 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -77,7 +77,7 @@ static __init int setup_vmw_sched_clock(char *s)
 }
 early_param("no-vmw-sched-clock", setup_vmw_sched_clock);
 
-static unsigned long long vmware_sched_clock(void)
+static unsigned long long notrace vmware_sched_clock(void)
 {
 	unsigned long long ns;
 
-- 
GitLab


From 1aefa98b010e9cc7a07046cbcb1237ddad85b708 Mon Sep 17 00:00:00 2001
From: Vinod Koul <vkoul@kernel.org>
Date: Fri, 9 Nov 2018 15:20:54 +0530
Subject: [PATCH 0926/1890] clk: qcom: gcc: Fix board clock node name

Device tree node name are not supposed to have "_" in them so fix the
node name use of xo_board to xo-board

Fixes: 652f1813c113 ("clk: qcom: gcc: Add global clock controller driver for QCS404")
Signed-off-by: Vinod Koul <vkoul@kernel.org>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/qcom/gcc-qcs404.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/qcom/gcc-qcs404.c b/drivers/clk/qcom/gcc-qcs404.c
index e4ca6a45f3139..ef1b267cb058a 100644
--- a/drivers/clk/qcom/gcc-qcs404.c
+++ b/drivers/clk/qcom/gcc-qcs404.c
@@ -265,7 +265,7 @@ static struct clk_fixed_factor cxo = {
 	.div = 1,
 	.hw.init = &(struct clk_init_data){
 		.name = "cxo",
-		.parent_names = (const char *[]){ "xo_board" },
+		.parent_names = (const char *[]){ "xo-board" },
 		.num_parents = 1,
 		.ops = &clk_fixed_factor_ops,
 	},
-- 
GitLab


From 5f8d3ab136d0ccb59c4d628d8f85e0d8f2761d07 Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Fri, 2 Nov 2018 14:45:32 -0700
Subject: [PATCH 0927/1890] arm64: dts: sdm845-mtp: Reserve reserved gpios

With the introduction of commit 3edfb7bd76bd ("gpiolib: Show correct
direction from the beginning") the gpiolib will attempt to read the
direction of all pins, which triggers a read from protected register
regions.

The pins 0 through 3 and 81 through 84 are protected, so mark these as
reserved.

Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Reviewed-by: Stephen Boyd <sboyd@kernel.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Andy Gross <andy.gross@linaro.org>
---
 arch/arm64/boot/dts/qcom/sdm845-mtp.dts | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/sdm845-mtp.dts b/arch/arm64/boot/dts/qcom/sdm845-mtp.dts
index eedfaf8922e2a..d667eee4e6d03 100644
--- a/arch/arm64/boot/dts/qcom/sdm845-mtp.dts
+++ b/arch/arm64/boot/dts/qcom/sdm845-mtp.dts
@@ -352,6 +352,10 @@ &qupv3_id_1 {
 	status = "okay";
 };
 
+&tlmm {
+	gpio-reserved-ranges = <0 4>, <81 4>;
+};
+
 &uart9 {
 	status = "okay";
 };
-- 
GitLab


From 9134586715e389fe90d9d28cb37a668f374d686a Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Mon, 29 Oct 2018 22:45:54 -0700
Subject: [PATCH 0928/1890] arm64: dts: qcom: msm8998: Reserve gpio ranges on
 MTP

GPIOs 0 through 3 and 81 through 84 are configured to not be accessible
from the application CPUs. Mark them as reserved to allow the MSM8998
MTP to boot after the introduction of 3edfb7bd76bd ("gpiolib: Show
correct direction from the beginning").

Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Reviewed-by: Jeffrey Hugo <jhugo@codeaurora.org>
Signed-off-by: Andy Gross <andy.gross@linaro.org>
---
 arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi b/arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi
index b4276da1fb0d9..11fd1fe8bdb52 100644
--- a/arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi
@@ -241,3 +241,7 @@ vreg_bob: bob {
 		};
 	};
 };
+
+&tlmm {
+	gpio-reserved-ranges = <0 4>, <81 4>;
+};
-- 
GitLab


From 35e8e8b45d31bec34379dd36e7b71448e003efb2 Mon Sep 17 00:00:00 2001
From: Igor Russkikh <Igor.Russkikh@aquantia.com>
Date: Fri, 9 Nov 2018 11:53:56 +0000
Subject: [PATCH 0929/1890] net: aquantia: synchronized flow control between
 mac/phy

Flow control statuses were not synchronized between blocks,
that caused packets/link drop on some corner cases, when
MAC sent PFC although Phy was not expecting these to come.

Driver should readout the negotiated FC from phy and
configure RX block accordigly.

This is done on each link change event with information from FW.

Fixes: 288551de45aa ("net: aquantia: Implement rx/tx flow control ethtools callback")
Signed-off-by: Igor Russkikh <igor.russkikh@aquantia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/aquantia/atlantic/aq_ethtool.c   |  8 +++----
 .../net/ethernet/aquantia/atlantic/aq_hw.h    |  3 +++
 .../net/ethernet/aquantia/atlantic/aq_nic.c   | 14 ++++++++++++-
 .../aquantia/atlantic/hw_atl/hw_atl_b0.c      | 12 ++++++++---
 .../atlantic/hw_atl/hw_atl_utils_fw2x.c       | 21 +++++++++++++++++++
 5 files changed, 50 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
index 6a633c70f603d..99ef1daaa4d80 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
@@ -407,13 +407,13 @@ static void aq_ethtool_get_pauseparam(struct net_device *ndev,
 				      struct ethtool_pauseparam *pause)
 {
 	struct aq_nic_s *aq_nic = netdev_priv(ndev);
+	u32 fc = aq_nic->aq_nic_cfg.flow_control;
 
 	pause->autoneg = 0;
 
-	if (aq_nic->aq_hw->aq_nic_cfg->flow_control & AQ_NIC_FC_RX)
-		pause->rx_pause = 1;
-	if (aq_nic->aq_hw->aq_nic_cfg->flow_control & AQ_NIC_FC_TX)
-		pause->tx_pause = 1;
+	pause->rx_pause = !!(fc & AQ_NIC_FC_RX);
+	pause->tx_pause = !!(fc & AQ_NIC_FC_TX);
+
 }
 
 static int aq_ethtool_set_pauseparam(struct net_device *ndev,
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
index e8689241204e9..7ec8d24b2b0b8 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
@@ -204,6 +204,7 @@ struct aq_hw_ops {
 
 	int (*hw_get_fw_version)(struct aq_hw_s *self, u32 *fw_version);
 
+	int (*hw_set_fc)(struct aq_hw_s *self, u32 fc, u32 tc);
 };
 
 struct aq_fw_ops {
@@ -226,6 +227,8 @@ struct aq_fw_ops {
 
 	int (*update_stats)(struct aq_hw_s *self);
 
+	u32 (*get_flow_control)(struct aq_hw_s *self, u32 *fcmode);
+
 	int (*set_flow_control)(struct aq_hw_s *self);
 
 	int (*set_power)(struct aq_hw_s *self, unsigned int power_state,
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index 5fed244466871..0011a3f2f6727 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -124,6 +124,7 @@ void aq_nic_cfg_start(struct aq_nic_s *self)
 static int aq_nic_update_link_status(struct aq_nic_s *self)
 {
 	int err = self->aq_fw_ops->update_link_status(self->aq_hw);
+	u32 fc = 0;
 
 	if (err)
 		return err;
@@ -133,6 +134,15 @@ static int aq_nic_update_link_status(struct aq_nic_s *self)
 			AQ_CFG_DRV_NAME, self->link_status.mbps,
 			self->aq_hw->aq_link_status.mbps);
 		aq_nic_update_interrupt_moderation_settings(self);
+
+		/* Driver has to update flow control settings on RX block
+		 * on any link event.
+		 * We should query FW whether it negotiated FC.
+		 */
+		if (self->aq_fw_ops->get_flow_control)
+			self->aq_fw_ops->get_flow_control(self->aq_hw, &fc);
+		if (self->aq_hw_ops->hw_set_fc)
+			self->aq_hw_ops->hw_set_fc(self->aq_hw, fc, 0);
 	}
 
 	self->link_status = self->aq_hw->aq_link_status;
@@ -772,7 +782,9 @@ void aq_nic_get_link_ksettings(struct aq_nic_s *self,
 		ethtool_link_ksettings_add_link_mode(cmd, advertising,
 						     Pause);
 
-	if (self->aq_nic_cfg.flow_control & AQ_NIC_FC_TX)
+	/* Asym is when either RX or TX, but not both */
+	if (!!(self->aq_nic_cfg.flow_control & AQ_NIC_FC_TX) ^
+	    !!(self->aq_nic_cfg.flow_control & AQ_NIC_FC_RX))
 		ethtool_link_ksettings_add_link_mode(cmd, advertising,
 						     Asym_Pause);
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index 76d25d594a0f6..119265762b0c4 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -100,12 +100,17 @@ static int hw_atl_b0_hw_reset(struct aq_hw_s *self)
 	return err;
 }
 
+static int hw_atl_b0_set_fc(struct aq_hw_s *self, u32 fc, u32 tc)
+{
+	hw_atl_rpb_rx_xoff_en_per_tc_set(self, !!(fc & AQ_NIC_FC_RX), tc);
+	return 0;
+}
+
 static int hw_atl_b0_hw_qos_set(struct aq_hw_s *self)
 {
 	u32 tc = 0U;
 	u32 buff_size = 0U;
 	unsigned int i_priority = 0U;
-	bool is_rx_flow_control = false;
 
 	/* TPS Descriptor rate init */
 	hw_atl_tps_tx_pkt_shed_desc_rate_curr_time_res_set(self, 0x0U);
@@ -138,7 +143,6 @@ static int hw_atl_b0_hw_qos_set(struct aq_hw_s *self)
 
 	/* QoS Rx buf size per TC */
 	tc = 0;
-	is_rx_flow_control = (AQ_NIC_FC_RX & self->aq_nic_cfg->flow_control);
 	buff_size = HW_ATL_B0_RXBUF_MAX;
 
 	hw_atl_rpb_rx_pkt_buff_size_per_tc_set(self, buff_size, tc);
@@ -150,7 +154,8 @@ static int hw_atl_b0_hw_qos_set(struct aq_hw_s *self)
 						   (buff_size *
 						   (1024U / 32U) * 50U) /
 						   100U, tc);
-	hw_atl_rpb_rx_xoff_en_per_tc_set(self, is_rx_flow_control ? 1U : 0U, tc);
+
+	hw_atl_b0_set_fc(self, self->aq_nic_cfg->flow_control, tc);
 
 	/* QoS 802.1p priority -> TC mapping */
 	for (i_priority = 8U; i_priority--;)
@@ -963,4 +968,5 @@ const struct aq_hw_ops hw_atl_ops_b0 = {
 	.hw_get_regs                 = hw_atl_utils_hw_get_regs,
 	.hw_get_hw_stats             = hw_atl_utils_get_hw_stats,
 	.hw_get_fw_version           = hw_atl_utils_get_fw_version,
+	.hw_set_fc                   = hw_atl_b0_set_fc,
 };
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
index 096ca5730887c..7de3220d9cab7 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
@@ -30,6 +30,8 @@
 #define HW_ATL_FW2X_MPI_STATE_ADDR	0x370
 #define HW_ATL_FW2X_MPI_STATE2_ADDR	0x374
 
+#define HW_ATL_FW2X_CAP_PAUSE            BIT(CAPS_HI_PAUSE)
+#define HW_ATL_FW2X_CAP_ASYM_PAUSE       BIT(CAPS_HI_ASYMMETRIC_PAUSE)
 #define HW_ATL_FW2X_CAP_SLEEP_PROXY      BIT(CAPS_HI_SLEEP_PROXY)
 #define HW_ATL_FW2X_CAP_WOL              BIT(CAPS_HI_WOL)
 
@@ -451,6 +453,24 @@ static int aq_fw2x_set_flow_control(struct aq_hw_s *self)
 	return 0;
 }
 
+static u32 aq_fw2x_get_flow_control(struct aq_hw_s *self, u32 *fcmode)
+{
+	u32 mpi_state = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_STATE2_ADDR);
+
+	if (mpi_state & HW_ATL_FW2X_CAP_PAUSE)
+		if (mpi_state & HW_ATL_FW2X_CAP_ASYM_PAUSE)
+			*fcmode = AQ_NIC_FC_RX;
+		else
+			*fcmode = AQ_NIC_FC_RX | AQ_NIC_FC_TX;
+	else
+		if (mpi_state & HW_ATL_FW2X_CAP_ASYM_PAUSE)
+			*fcmode = AQ_NIC_FC_TX;
+		else
+			*fcmode = 0;
+
+	return 0;
+}
+
 const struct aq_fw_ops aq_fw_2x_ops = {
 	.init = aq_fw2x_init,
 	.deinit = aq_fw2x_deinit,
@@ -465,4 +485,5 @@ const struct aq_fw_ops aq_fw_2x_ops = {
 	.set_eee_rate = aq_fw2x_set_eee_rate,
 	.get_eee_rate = aq_fw2x_get_eee_rate,
 	.set_flow_control = aq_fw2x_set_flow_control,
+	.get_flow_control = aq_fw2x_get_flow_control
 };
-- 
GitLab


From 7a1bb49461b12b2e6332a4d054256835f45203f3 Mon Sep 17 00:00:00 2001
From: Dmitry Bogdanov <dmitry.bogdanov@aquantia.com>
Date: Fri, 9 Nov 2018 11:53:57 +0000
Subject: [PATCH 0930/1890] net: aquantia: fix potential IOMMU fault after
 driver unbind

IOMMU fault may occurr on unbind/bind or if_down/if_up sequence.

Although driver disables the rings on down, this is not enough.
Due to internal HW design, during subsequent initialization
NIC sometimes may reuse RX descriptors cache and write to the
host memory from the descriptor cache.
That's get catched by IOMMU on host.

This patch invalidates the descriptor cache in NIC on interface down
to prevent writing to the cached descriptors and to the memory pointed
in those descriptors.

Signed-off-by: Dmitry Bogdanov <dmitry.bogdanov@aquantia.com>
Signed-off-by: Igor Russkikh <igor.russkikh@aquantia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../aquantia/atlantic/hw_atl/hw_atl_b0.c       |  6 ++++++
 .../aquantia/atlantic/hw_atl/hw_atl_llh.c      |  8 ++++++++
 .../aquantia/atlantic/hw_atl/hw_atl_llh.h      |  3 +++
 .../atlantic/hw_atl/hw_atl_llh_internal.h      | 18 ++++++++++++++++++
 4 files changed, 35 insertions(+)

diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index 119265762b0c4..3aec56623bf55 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -920,6 +920,12 @@ static int hw_atl_b0_hw_interrupt_moderation_set(struct aq_hw_s *self)
 static int hw_atl_b0_hw_stop(struct aq_hw_s *self)
 {
 	hw_atl_b0_hw_irq_disable(self, HW_ATL_B0_INT_MASK);
+
+	/* Invalidate Descriptor Cache to prevent writing to the cached
+	 * descriptors and to the data pointer of those descriptors
+	 */
+	hw_atl_rdm_rx_dma_desc_cache_init_set(self, 1);
+
 	return aq_hw_err_from_flags(self);
 }
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c
index be0a3a90dfad6..5502ec5f0f699 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c
@@ -619,6 +619,14 @@ void hw_atl_rpb_rx_flow_ctl_mode_set(struct aq_hw_s *aq_hw, u32 rx_flow_ctl_mode
 			    HW_ATL_RPB_RX_FC_MODE_SHIFT, rx_flow_ctl_mode);
 }
 
+void hw_atl_rdm_rx_dma_desc_cache_init_set(struct aq_hw_s *aq_hw, u32 init)
+{
+	aq_hw_write_reg_bit(aq_hw, HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_ADR,
+			    HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_MSK,
+			    HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_SHIFT,
+			    init);
+}
+
 void hw_atl_rpb_rx_pkt_buff_size_per_tc_set(struct aq_hw_s *aq_hw,
 					    u32 rx_pkt_buff_size_per_tc, u32 buffer)
 {
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h
index 7056c7342afcf..41f239928c157 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h
@@ -325,6 +325,9 @@ void hw_atl_rpb_rx_pkt_buff_size_per_tc_set(struct aq_hw_s *aq_hw,
 					    u32 rx_pkt_buff_size_per_tc,
 					    u32 buffer);
 
+/* set rdm rx dma descriptor cache init */
+void hw_atl_rdm_rx_dma_desc_cache_init_set(struct aq_hw_s *aq_hw, u32 init);
+
 /* set rx xoff enable (per tc) */
 void hw_atl_rpb_rx_xoff_en_per_tc_set(struct aq_hw_s *aq_hw, u32 rx_xoff_en_per_tc,
 				      u32 buffer);
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h
index 716674a9b729e..a715fa317b1c8 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h
@@ -293,6 +293,24 @@
 /* default value of bitfield desc{d}_reset */
 #define HW_ATL_RDM_DESCDRESET_DEFAULT 0x0
 
+/* rdm_desc_init_i bitfield definitions
+ * preprocessor definitions for the bitfield rdm_desc_init_i.
+ * port="pif_rdm_desc_init_i"
+ */
+
+/* register address for bitfield rdm_desc_init_i */
+#define HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_ADR 0x00005a00
+/* bitmask for bitfield rdm_desc_init_i */
+#define HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_MSK 0xffffffff
+/* inverted bitmask for bitfield rdm_desc_init_i */
+#define HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_MSKN 0x00000000
+/* lower bit position of bitfield  rdm_desc_init_i */
+#define HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_SHIFT 0
+/* width of bitfield rdm_desc_init_i */
+#define HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_WIDTH 32
+/* default value of bitfield rdm_desc_init_i */
+#define HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_DEFAULT 0x0
+
 /* rx int_desc_wrb_en bitfield definitions
  * preprocessor definitions for the bitfield "int_desc_wrb_en".
  * port="pif_rdm_int_desc_wrb_en_i"
-- 
GitLab


From bfaa9f8553d5c20703781e63f4fc8cb4792f18fd Mon Sep 17 00:00:00 2001
From: Igor Russkikh <Igor.Russkikh@aquantia.com>
Date: Fri, 9 Nov 2018 11:53:59 +0000
Subject: [PATCH 0931/1890] net: aquantia: fixed enable unicast on 32 macvlan

Fixed a condition mistake due to which macvlans unicast
item number 32 was not added in the unicast filter.

The consequence is that when exactly 32 macvlans are created
on NIC, the last created macvlan receives no traffic because
its MAC was not registered in HW.

Fixes: 94b3b542303f ("net: aquantia: vlan unicast address list correct handling")
Signed-off-by: Igor Russkikh <igor.russkikh@aquantia.com>
Tested-by: Nikita Danilov <nikita.danilov@aquantia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index 0011a3f2f6727..b5e7c98f424ca 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -600,7 +600,7 @@ int aq_nic_set_multicast_list(struct aq_nic_s *self, struct net_device *ndev)
 		}
 	}
 
-	if (i > 0 && i < AQ_HW_MULTICAST_ADDRESS_MAX) {
+	if (i > 0 && i <= AQ_HW_MULTICAST_ADDRESS_MAX) {
 		packet_filter |= IFF_MULTICAST;
 		self->mc_list.count = i;
 		self->aq_hw_ops->hw_multicast_list_set(self->aq_hw,
-- 
GitLab


From ad703c2b9127f9acdef697ec4755f6da4beaa266 Mon Sep 17 00:00:00 2001
From: Dmitry Bogdanov <dmitry.bogdanov@aquantia.com>
Date: Fri, 9 Nov 2018 11:54:01 +0000
Subject: [PATCH 0932/1890] net: aquantia: invalid checksumm offload
 implementation

Packets with marked invalid IP/UDP/TCP checksums were considered as good
by the driver. The error was in a logic, processing offload bits in
RX descriptor.

Signed-off-by: Igor Russkikh <igor.russkikh@aquantia.com>
Signed-off-by: Dmitry Bogdanov <dmitry.bogdanov@aquantia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/aquantia/atlantic/aq_ring.c  | 35 +++++++++++-------
 .../aquantia/atlantic/hw_atl/hw_atl_b0.c      | 36 +++++++++----------
 2 files changed, 41 insertions(+), 30 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
index 3db91446cc677..74550ccc7a20f 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
@@ -172,6 +172,27 @@ bool aq_ring_tx_clean(struct aq_ring_s *self)
 	return !!budget;
 }
 
+static void aq_rx_checksum(struct aq_ring_s *self,
+			   struct aq_ring_buff_s *buff,
+			   struct sk_buff *skb)
+{
+	if (!(self->aq_nic->ndev->features & NETIF_F_RXCSUM))
+		return;
+
+	if (unlikely(buff->is_cso_err)) {
+		++self->stats.rx.errors;
+		skb->ip_summed = CHECKSUM_NONE;
+		return;
+	}
+	if (buff->is_ip_cso) {
+		__skb_incr_checksum_unnecessary(skb);
+		if (buff->is_udp_cso || buff->is_tcp_cso)
+			__skb_incr_checksum_unnecessary(skb);
+	} else {
+		skb->ip_summed = CHECKSUM_NONE;
+	}
+}
+
 #define AQ_SKB_ALIGN SKB_DATA_ALIGN(sizeof(struct skb_shared_info))
 int aq_ring_rx_clean(struct aq_ring_s *self,
 		     struct napi_struct *napi,
@@ -267,18 +288,8 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
 		}
 
 		skb->protocol = eth_type_trans(skb, ndev);
-		if (unlikely(buff->is_cso_err)) {
-			++self->stats.rx.errors;
-			skb->ip_summed = CHECKSUM_NONE;
-		} else {
-			if (buff->is_ip_cso) {
-				__skb_incr_checksum_unnecessary(skb);
-				if (buff->is_udp_cso || buff->is_tcp_cso)
-					__skb_incr_checksum_unnecessary(skb);
-			} else {
-				skb->ip_summed = CHECKSUM_NONE;
-			}
-		}
+
+		aq_rx_checksum(self, buff, skb);
 
 		skb_set_hash(skb, buff->rss_hash,
 			     buff->is_hash_l4 ? PKT_HASH_TYPE_L4 :
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index 3aec56623bf55..179ce12fe4d8d 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -660,9 +660,9 @@ static int hw_atl_b0_hw_ring_rx_receive(struct aq_hw_s *self,
 		struct hw_atl_rxd_wb_s *rxd_wb = (struct hw_atl_rxd_wb_s *)
 			&ring->dx_ring[ring->hw_head * HW_ATL_B0_RXD_SIZE];
 
-		unsigned int is_err = 1U;
 		unsigned int is_rx_check_sum_enabled = 0U;
 		unsigned int pkt_type = 0U;
+		u8 rx_stat = 0U;
 
 		if (!(rxd_wb->status & 0x1U)) { /* RxD is not done */
 			break;
@@ -670,35 +670,35 @@ static int hw_atl_b0_hw_ring_rx_receive(struct aq_hw_s *self,
 
 		buff = &ring->buff_ring[ring->hw_head];
 
-		is_err = (0x0000003CU & rxd_wb->status);
+		rx_stat = (0x0000003CU & rxd_wb->status) >> 2;
 
 		is_rx_check_sum_enabled = (rxd_wb->type) & (0x3U << 19);
-		is_err &= ~0x20U; /* exclude validity bit */
 
 		pkt_type = 0xFFU & (rxd_wb->type >> 4);
 
-		if (is_rx_check_sum_enabled) {
-			if (0x0U == (pkt_type & 0x3U))
-				buff->is_ip_cso = (is_err & 0x08U) ? 0U : 1U;
+		if (is_rx_check_sum_enabled & BIT(0) &&
+		    (0x0U == (pkt_type & 0x3U)))
+			buff->is_ip_cso = (rx_stat & BIT(1)) ? 0U : 1U;
 
+		if (is_rx_check_sum_enabled & BIT(1)) {
 			if (0x4U == (pkt_type & 0x1CU))
-				buff->is_udp_cso = buff->is_cso_err ? 0U : 1U;
+				buff->is_udp_cso = (rx_stat & BIT(2)) ? 0U :
+						   !!(rx_stat & BIT(3));
 			else if (0x0U == (pkt_type & 0x1CU))
-				buff->is_tcp_cso = buff->is_cso_err ? 0U : 1U;
-
-			/* Checksum offload workaround for small packets */
-			if (rxd_wb->pkt_len <= 60) {
-				buff->is_ip_cso = 0U;
-				buff->is_cso_err = 0U;
-			}
+				buff->is_tcp_cso = (rx_stat & BIT(2)) ? 0U :
+						   !!(rx_stat & BIT(3));
+		}
+		buff->is_cso_err = !!(rx_stat & 0x6);
+		/* Checksum offload workaround for small packets */
+		if (unlikely(rxd_wb->pkt_len <= 60)) {
+			buff->is_ip_cso = 0U;
+			buff->is_cso_err = 0U;
 		}
-
-		is_err &= ~0x18U;
 
 		dma_unmap_page(ndev, buff->pa, buff->len, DMA_FROM_DEVICE);
 
-		if (is_err || rxd_wb->type & 0x1000U) {
-			/* status error or DMA error */
+		if ((rx_stat & BIT(0)) || rxd_wb->type & 0x1000U) {
+			/* MAC error or DMA error */
 			buff->is_error = 1U;
 		} else {
 			if (self->aq_nic_cfg->is_rss) {
-- 
GitLab


From bbb67a44baf973da734b9fd61cba4211da240751 Mon Sep 17 00:00:00 2001
From: Dmitry Bogdanov <dmitry.bogdanov@aquantia.com>
Date: Fri, 9 Nov 2018 11:54:03 +0000
Subject: [PATCH 0933/1890] net: aquantia: allow rx checksum offload
 configuration

RX Checksum offloads could not be configured and ignored netdev features
flag for checksumming.

Signed-off-by: Igor Russkikh <igor.russkikh@aquantia.com>
Signed-off-by: Dmitry Bogdanov <dmitry.bogdanov@aquantia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aquantia/atlantic/aq_hw.h         |  3 +++
 drivers/net/ethernet/aquantia/atlantic/aq_main.c       | 10 ++++++++--
 drivers/net/ethernet/aquantia/atlantic/aq_nic.c        |  2 +-
 drivers/net/ethernet/aquantia/atlantic/aq_nic.h        |  2 +-
 .../net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c  |  7 +++++--
 5 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
index 7ec8d24b2b0b8..a1e70da358ca6 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
@@ -204,6 +204,9 @@ struct aq_hw_ops {
 
 	int (*hw_get_fw_version)(struct aq_hw_s *self, u32 *fw_version);
 
+	int (*hw_set_offload)(struct aq_hw_s *self,
+			      struct aq_nic_cfg_s *aq_nic_cfg);
+
 	int (*hw_set_fc)(struct aq_hw_s *self, u32 fc, u32 tc);
 };
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c b/drivers/net/ethernet/aquantia/atlantic/aq_main.c
index e3ae29e523f0e..7c07eef275eb8 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_main.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c
@@ -99,8 +99,11 @@ static int aq_ndev_set_features(struct net_device *ndev,
 	struct aq_nic_s *aq_nic = netdev_priv(ndev);
 	struct aq_nic_cfg_s *aq_cfg = aq_nic_get_cfg(aq_nic);
 	bool is_lro = false;
+	int err = 0;
+
+	aq_cfg->features = features;
 
-	if (aq_cfg->hw_features & NETIF_F_LRO) {
+	if (aq_cfg->aq_hw_caps->hw_features & NETIF_F_LRO) {
 		is_lro = features & NETIF_F_LRO;
 
 		if (aq_cfg->is_lro != is_lro) {
@@ -112,8 +115,11 @@ static int aq_ndev_set_features(struct net_device *ndev,
 			}
 		}
 	}
+	if ((aq_nic->ndev->features ^ features) & NETIF_F_RXCSUM)
+		err = aq_nic->aq_hw_ops->hw_set_offload(aq_nic->aq_hw,
+							aq_cfg);
 
-	return 0;
+	return err;
 }
 
 static int aq_ndev_set_mac_address(struct net_device *ndev, void *addr)
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index b5e7c98f424ca..7abdc09524259 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -118,7 +118,7 @@ void aq_nic_cfg_start(struct aq_nic_s *self)
 	}
 
 	cfg->link_speed_msk &= cfg->aq_hw_caps->link_speed_msk;
-	cfg->hw_features = cfg->aq_hw_caps->hw_features;
+	cfg->features = cfg->aq_hw_caps->hw_features;
 }
 
 static int aq_nic_update_link_status(struct aq_nic_s *self)
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
index c1582f4e8e1b5..44ec47a3d60a5 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
@@ -23,7 +23,7 @@ struct aq_vec_s;
 
 struct aq_nic_cfg_s {
 	const struct aq_hw_caps_s *aq_hw_caps;
-	u64 hw_features;
+	u64 features;
 	u32 rxds;		/* rx ring size, descriptors # */
 	u32 txds;		/* tx ring size, descriptors # */
 	u32 vecs;		/* vecs==allocated irqs */
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index 179ce12fe4d8d..f02592f43fe36 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -234,8 +234,10 @@ static int hw_atl_b0_hw_offload_set(struct aq_hw_s *self,
 	hw_atl_tpo_tcp_udp_crc_offload_en_set(self, 1);
 
 	/* RX checksums offloads*/
-	hw_atl_rpo_ipv4header_crc_offload_en_set(self, 1);
-	hw_atl_rpo_tcp_udp_crc_offload_en_set(self, 1);
+	hw_atl_rpo_ipv4header_crc_offload_en_set(self, !!(aq_nic_cfg->features &
+						 NETIF_F_RXCSUM));
+	hw_atl_rpo_tcp_udp_crc_offload_en_set(self, !!(aq_nic_cfg->features &
+					      NETIF_F_RXCSUM));
 
 	/* LSO offloads*/
 	hw_atl_tdm_large_send_offload_en_set(self, 0xFFFFFFFFU);
@@ -974,5 +976,6 @@ const struct aq_hw_ops hw_atl_ops_b0 = {
 	.hw_get_regs                 = hw_atl_utils_hw_get_regs,
 	.hw_get_hw_stats             = hw_atl_utils_get_hw_stats,
 	.hw_get_fw_version           = hw_atl_utils_get_fw_version,
+	.hw_set_offload              = hw_atl_b0_hw_offload_set,
 	.hw_set_fc                   = hw_atl_b0_set_fc,
 };
-- 
GitLab


From d02854dc1999ed3e7fd79ec700c64ac23ac0c458 Mon Sep 17 00:00:00 2001
From: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Date: Fri, 9 Nov 2018 18:56:27 -0700
Subject: [PATCH 0934/1890] net: qualcomm: rmnet: Fix incorrect assignment of
 real_dev

A null dereference was observed when a sysctl was being set
from userspace and rmnet was stuck trying to complete some actions
in the NETDEV_REGISTER callback. This is because the real_dev is set
only after the device registration handler completes.

sysctl call stack -

<6> Unable to handle kernel NULL pointer dereference at
    virtual address 00000108
<2> pc : rmnet_vnd_get_iflink+0x1c/0x28
<2> lr : dev_get_iflink+0x2c/0x40
<2>  rmnet_vnd_get_iflink+0x1c/0x28
<2>  inet6_fill_ifinfo+0x15c/0x234
<2>  inet6_ifinfo_notify+0x68/0xd4
<2>  ndisc_ifinfo_sysctl_change+0x1b8/0x234
<2>  proc_sys_call_handler+0xac/0x100
<2>  proc_sys_write+0x3c/0x4c
<2>  __vfs_write+0x54/0x14c
<2>  vfs_write+0xcc/0x188
<2>  SyS_write+0x60/0xc0
<2>  el0_svc_naked+0x34/0x38

device register call stack -

<2>  notifier_call_chain+0x84/0xbc
<2>  raw_notifier_call_chain+0x38/0x48
<2>  call_netdevice_notifiers_info+0x40/0x70
<2>  call_netdevice_notifiers+0x38/0x60
<2>  register_netdevice+0x29c/0x3d8
<2>  rmnet_vnd_newlink+0x68/0xe8
<2>  rmnet_newlink+0xa0/0x160
<2>  rtnl_newlink+0x57c/0x6c8
<2>  rtnetlink_rcv_msg+0x1dc/0x328
<2>  netlink_rcv_skb+0xac/0x118
<2>  rtnetlink_rcv+0x24/0x30
<2>  netlink_unicast+0x158/0x1f0
<2>  netlink_sendmsg+0x32c/0x338
<2>  sock_sendmsg+0x44/0x60
<2>  SyS_sendto+0x150/0x1ac
<2>  el0_svc_naked+0x34/0x38

Fixes: b752eff5be24 ("net: qualcomm: rmnet: Implement ndo_get_iflink")
Signed-off-by: Sean Tranchetti <stranche@codeaurora.org>
Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
index 0afc3d335d562..d11c16aeb19ad 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
@@ -234,7 +234,7 @@ int rmnet_vnd_newlink(u8 id, struct net_device *rmnet_dev,
 		      struct net_device *real_dev,
 		      struct rmnet_endpoint *ep)
 {
-	struct rmnet_priv *priv;
+	struct rmnet_priv *priv = netdev_priv(rmnet_dev);
 	int rc;
 
 	if (ep->egress_dev)
@@ -247,6 +247,8 @@ int rmnet_vnd_newlink(u8 id, struct net_device *rmnet_dev,
 	rmnet_dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
 	rmnet_dev->hw_features |= NETIF_F_SG;
 
+	priv->real_dev = real_dev;
+
 	rc = register_netdevice(rmnet_dev);
 	if (!rc) {
 		ep->egress_dev = rmnet_dev;
@@ -255,9 +257,7 @@ int rmnet_vnd_newlink(u8 id, struct net_device *rmnet_dev,
 
 		rmnet_dev->rtnl_link_ops = &rmnet_link_ops;
 
-		priv = netdev_priv(rmnet_dev);
 		priv->mux_id = id;
-		priv->real_dev = real_dev;
 
 		netdev_dbg(rmnet_dev, "rmnet dev created\n");
 	}
-- 
GitLab


From 62230715fd2453b3ba948c9d83cfb3ada9169169 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=EB=B0=B0=EC=84=9D=EC=A7=84?= <soukjin.bae@samsung.com>
Date: Fri, 9 Nov 2018 16:53:06 -0800
Subject: [PATCH 0935/1890] flow_dissector: do not dissect l4 ports for
 fragments
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Only first fragment has the sport/dport information,
not the following ones.

If we want consistent hash for all fragments, we need to
ignore ports even for first fragment.

This bug is visible for IPv6 traffic, if incoming fragments
do not have a flow label, since skb_get_hash() will give
different results for first fragment and following ones.

It is also visible if any routing rule wants dissection
and sport or dport.

See commit 5e5d6fed3741 ("ipv6: route: dissect flow
in input path if fib rules need it") for details.

[edumazet] rewrote the changelog completely.

Fixes: 06635a35d13d ("flow_dissect: use programable dissector in skb_flow_dissect and friends")
Signed-off-by: ë°°ì„ì§„ <soukjin.bae@samsung.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/flow_dissector.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 676f3ad629f95..588f475019d47 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -1166,8 +1166,8 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
 		break;
 	}
 
-	if (dissector_uses_key(flow_dissector,
-			       FLOW_DISSECTOR_KEY_PORTS)) {
+	if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS) &&
+	    !(key_control->flags & FLOW_DIS_IS_FRAGMENT)) {
 		key_ports = skb_flow_dissector_target(flow_dissector,
 						      FLOW_DISSECTOR_KEY_PORTS,
 						      target_container);
-- 
GitLab


From fbd1d5245372e48b494120a30fe0b34b304576c4 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Fri, 9 Nov 2018 17:37:20 +0100
Subject: [PATCH 0936/1890] net: mvneta: correct typo

The reserved variable should be named reserved1.

Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvneta.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 5bfd349bf41ac..3ba672e9e353d 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -494,7 +494,7 @@ struct mvneta_port {
 #if defined(__LITTLE_ENDIAN)
 struct mvneta_tx_desc {
 	u32  command;		/* Options used by HW for packet transmitting.*/
-	u16  reserverd1;	/* csum_l4 (for future use)		*/
+	u16  reserved1;		/* csum_l4 (for future use)		*/
 	u16  data_size;		/* Data size of transmitted packet in bytes */
 	u32  buf_phys_addr;	/* Physical addr of transmitted buffer	*/
 	u32  reserved2;		/* hw_cmd - (for future use, PMT)	*/
@@ -519,7 +519,7 @@ struct mvneta_rx_desc {
 #else
 struct mvneta_tx_desc {
 	u16  data_size;		/* Data size of transmitted packet in bytes */
-	u16  reserverd1;	/* csum_l4 (for future use)		*/
+	u16  reserved1;		/* csum_l4 (for future use)		*/
 	u32  command;		/* Options used by HW for packet transmitting.*/
 	u32  reserved2;		/* hw_cmd - (for future use, PMT)	*/
 	u32  buf_phys_addr;	/* Physical addr of transmitted buffer	*/
-- 
GitLab


From c4b7d1ba7d263b74bb72e9325262a67139605cde Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Sat, 10 Nov 2018 04:13:24 +0000
Subject: [PATCH 0937/1890] sysv: return 'err' instead of 0 in
 __sysv_write_inode

Fixes gcc '-Wunused-but-set-variable' warning:

fs/sysv/inode.c: In function '__sysv_write_inode':
fs/sysv/inode.c:239:6: warning:
 variable 'err' set but not used [-Wunused-but-set-variable]

__sysv_write_inode should return 'err' instead of 0

Fixes: 05459ca81ac3 ("repair sysv_write_inode(), switch sysv to simple_fsync()")
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/sysv/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 499a20a5a0107..273736f41be38 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -275,7 +275,7 @@ static int __sysv_write_inode(struct inode *inode, int wait)
                 }
         }
 	brelse(bh);
-	return 0;
+	return err;
 }
 
 int sysv_write_inode(struct inode *inode, struct writeback_control *wbc)
-- 
GitLab


From de7b75d82f70c5469675b99ad632983c50b6f7e7 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Fri, 9 Nov 2018 15:58:40 -0700
Subject: [PATCH 0938/1890] floppy: fix race condition in
 __floppy_read_block_0()

LKP recently reported a hang at bootup in the floppy code:

[  245.678853] INFO: task mount:580 blocked for more than 120 seconds.
[  245.679906]       Tainted: G                T 4.19.0-rc6-00172-ga9f38e1 #1
[  245.680959] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  245.682181] mount           D 6372   580      1 0x00000004
[  245.683023] Call Trace:
[  245.683425]  __schedule+0x2df/0x570
[  245.683975]  schedule+0x2d/0x80
[  245.684476]  schedule_timeout+0x19d/0x330
[  245.685090]  ? wait_for_common+0xa5/0x170
[  245.685735]  wait_for_common+0xac/0x170
[  245.686339]  ? do_sched_yield+0x90/0x90
[  245.686935]  wait_for_completion+0x12/0x20
[  245.687571]  __floppy_read_block_0+0xfb/0x150
[  245.688244]  ? floppy_resume+0x40/0x40
[  245.688844]  floppy_revalidate+0x20f/0x240
[  245.689486]  check_disk_change+0x43/0x60
[  245.690087]  floppy_open+0x1ea/0x360
[  245.690653]  __blkdev_get+0xb4/0x4d0
[  245.691212]  ? blkdev_get+0x1db/0x370
[  245.691777]  blkdev_get+0x1f3/0x370
[  245.692351]  ? path_put+0x15/0x20
[  245.692871]  ? lookup_bdev+0x4b/0x90
[  245.693539]  blkdev_get_by_path+0x3d/0x80
[  245.694165]  mount_bdev+0x2a/0x190
[  245.694695]  squashfs_mount+0x10/0x20
[  245.695271]  ? squashfs_alloc_inode+0x30/0x30
[  245.695960]  mount_fs+0xf/0x90
[  245.696451]  vfs_kern_mount+0x43/0x130
[  245.697036]  do_mount+0x187/0xc40
[  245.697563]  ? memdup_user+0x28/0x50
[  245.698124]  ksys_mount+0x60/0xc0
[  245.698639]  sys_mount+0x19/0x20
[  245.699167]  do_int80_syscall_32+0x61/0x130
[  245.699813]  entry_INT80_32+0xc7/0xc7

showing that we never complete that read request. The reason is that
the completion setup is racy - it initializes the completion event
AFTER submitting the IO, which means that the IO could complete
before/during the init. If it does, we are passing garbage to
complete() and we may sleep forever waiting for the event to
occur.

Fixes: 7b7b68bba5ef ("floppy: bail out in open() if drive is not responding to block0 read")
Reviewed-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/floppy.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index a8cfa011c2848..fb23578e9a416 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -4148,10 +4148,11 @@ static int __floppy_read_block_0(struct block_device *bdev, int drive)
 	bio.bi_end_io = floppy_rb0_cb;
 	bio_set_op_attrs(&bio, REQ_OP_READ, 0);
 
+	init_completion(&cbdata.complete);
+
 	submit_bio(&bio);
 	process_fd_request();
 
-	init_completion(&cbdata.complete);
 	wait_for_completion(&cbdata.complete);
 
 	__free_page(page);
-- 
GitLab


From 3fa58dcab50a0aa16817f16a8d38aee869eb3fb9 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 1 Nov 2018 00:30:22 -0700
Subject: [PATCH 0939/1890] acpi, nfit: Fix ARS overflow continuation

When the platform BIOS is unable to report all the media error records
it requires the OS to restart the scrub at a prescribed location. The
driver detects the overflow condition, but then fails to report it to
the ARS state machine after reaping the records. Propagate -ENOSPC
correctly to continue the ARS operation.

Cc: <stable@vger.kernel.org>
Fixes: 1cf03c00e7c1 ("nfit: scrub and register regions in a workqueue")
Reported-by: Jacek Zloch <jacek.zloch@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/nfit/core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index f8c638f3c946d..5970b8f5f7680 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -2928,9 +2928,9 @@ static int acpi_nfit_query_poison(struct acpi_nfit_desc *acpi_desc)
 		return rc;
 
 	if (ars_status_process_records(acpi_desc))
-		return -ENOMEM;
+		dev_err(acpi_desc->dev, "Failed to process ARS records\n");
 
-	return 0;
+	return rc;
 }
 
 static int ars_register(struct acpi_nfit_desc *acpi_desc,
-- 
GitLab


From 2121db09630113e67b51ae78c18115f1858f648a Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sat, 3 Nov 2018 17:53:09 -0700
Subject: [PATCH 0940/1890] Revert "acpi, nfit: Further restrict userspace ARS
 start requests"

The following lockdep splat results from acquiring the init_mutex in
acpi_nfit_clear_to_send():

 WARNING: possible circular locking dependency detected
 lt-daxdev-error/7216 is trying to acquire lock:
 00000000f694db15 (&acpi_desc->init_mutex){+.+.}, at: acpi_nfit_clear_to_send+0x27/0x80 [nfit]

 but task is already holding lock:
 00000000182298f2 (&nvdimm_bus->reconfig_mutex){+.+.}, at: __nd_ioctl+0x457/0x610 [libnvdimm]

 which lock already depends on the new lock.


 the existing dependency chain (in reverse order) is:

 -> #1 (&nvdimm_bus->reconfig_mutex){+.+.}:
        nvdimm_badblocks_populate+0x41/0x150 [libnvdimm]
        nd_region_notify+0x95/0xb0 [libnvdimm]
        nd_device_notify+0x40/0x50 [libnvdimm]
        ars_complete+0x7f/0xd0 [nfit]
        acpi_nfit_scrub+0xbb/0x410 [nfit]
        process_one_work+0x22b/0x5c0
        worker_thread+0x3c/0x390
        kthread+0x11e/0x140
        ret_from_fork+0x3a/0x50

 -> #0 (&acpi_desc->init_mutex){+.+.}:
        __mutex_lock+0x83/0x980
        acpi_nfit_clear_to_send+0x27/0x80 [nfit]
        __nd_ioctl+0x474/0x610 [libnvdimm]
        nd_ioctl+0xa4/0xb0 [libnvdimm]
        do_vfs_ioctl+0xa5/0x6e0
        ksys_ioctl+0x70/0x80
        __x64_sys_ioctl+0x16/0x20
        do_syscall_64+0x60/0x210
        entry_SYSCALL_64_after_hwframe+0x49/0xbe

New infrastructure is needed to be able to perform this check without
acquiring the lock.

Fixes: 594861215c83 ("acpi, nfit: Further restrict userspace ARS start")
Cc: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/nfit/core.c | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 5970b8f5f7680..14d9f5bea0151 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -3341,8 +3341,6 @@ static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
 		struct nvdimm *nvdimm, unsigned int cmd)
 {
 	struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
-	struct nfit_spa *nfit_spa;
-	int rc = 0;
 
 	if (nvdimm)
 		return 0;
@@ -3355,17 +3353,10 @@ static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
 	 * just needs guarantees that any ARS it initiates are not
 	 * interrupted by any intervening start requests from userspace.
 	 */
-	mutex_lock(&acpi_desc->init_mutex);
-	list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
-		if (acpi_desc->scrub_spa
-				|| test_bit(ARS_REQ_SHORT, &nfit_spa->ars_state)
-				|| test_bit(ARS_REQ_LONG, &nfit_spa->ars_state)) {
-			rc = -EBUSY;
-			break;
-		}
-	mutex_unlock(&acpi_desc->init_mutex);
+	if (work_busy(&acpi_desc->dwork.work))
+		return -EBUSY;
 
-	return rc;
+	return 0;
 }
 
 int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc,
-- 
GitLab


From 63c82997f5c0f3e1b914af43d82f712a86bc5f3a Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Fri, 9 Nov 2018 21:06:26 -0800
Subject: [PATCH 0941/1890] net: sched: cls_flower: validate nested
 enc_opts_policy to avoid warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

TCA_FLOWER_KEY_ENC_OPTS and TCA_FLOWER_KEY_ENC_OPTS_MASK can only
currently contain further nested attributes, which are parsed by
hand, so the policy is never actually used resulting in a W=1
build warning:

net/sched/cls_flower.c:492:1: warning: â€˜enc_opts_policyâ€™ defined but not used [-Wunused-const-variable=]
 enc_opts_policy[TCA_FLOWER_KEY_ENC_OPTS_MAX + 1] = {

Add the validation anyway to avoid potential bugs when other
attributes are added and to make the attribute structure slightly
more clear.  Validation will also set extact to point to bad
attribute on error.

Fixes: 0a6e77784f49 ("net/sched: allow flower to match tunnel options")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Simon Horman <simon.horman@netronome.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_flower.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 9aada2d0ef065..c6c327874abcc 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -709,11 +709,23 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
 			  struct netlink_ext_ack *extack)
 {
 	const struct nlattr *nla_enc_key, *nla_opt_key, *nla_opt_msk = NULL;
-	int option_len, key_depth, msk_depth = 0;
+	int err, option_len, key_depth, msk_depth = 0;
+
+	err = nla_validate_nested(tb[TCA_FLOWER_KEY_ENC_OPTS],
+				  TCA_FLOWER_KEY_ENC_OPTS_MAX,
+				  enc_opts_policy, extack);
+	if (err)
+		return err;
 
 	nla_enc_key = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS]);
 
 	if (tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]) {
+		err = nla_validate_nested(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK],
+					  TCA_FLOWER_KEY_ENC_OPTS_MAX,
+					  enc_opts_policy, extack);
+		if (err)
+			return err;
+
 		nla_opt_msk = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]);
 		msk_depth = nla_len(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]);
 	}
-- 
GitLab


From 6bbe4385d035c6fac56f840a59861a0310ce137b Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 5 Nov 2018 17:19:36 +0900
Subject: [PATCH 0942/1890] kconfig: merge_config: avoid false positive matches
 from comment lines

The current SED_CONFIG_EXP could match to comment lines in config
fragment files, especially when CONFIG_PREFIX_ is empty. For example,
Buildroot uses empty prefixing; starting symbols with BR2_ is just
convention.

Make the sed expression more robust against false positives from
comment lines. The new sed expression matches to only valid patterns.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Reviewed-by: Petr Vorel <petr.vorel@gmail.com>
Reviewed-by: Arnout Vandecappelle (Essensium/Mind) <arnout@mind.be>
---
 scripts/kconfig/merge_config.sh | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/scripts/kconfig/merge_config.sh b/scripts/kconfig/merge_config.sh
index da66e7742282a..0ef906499646b 100755
--- a/scripts/kconfig/merge_config.sh
+++ b/scripts/kconfig/merge_config.sh
@@ -102,7 +102,8 @@ if [ ! -r "$INITFILE" ]; then
 fi
 
 MERGE_LIST=$*
-SED_CONFIG_EXP="s/^\(# \)\{0,1\}\(${CONFIG_PREFIX}[a-zA-Z0-9_]*\)[= ].*/\2/p"
+SED_CONFIG_EXP1="s/^\(${CONFIG_PREFIX}[a-zA-Z0-9_]*\)=.*/\1/p"
+SED_CONFIG_EXP2="s/^# \(${CONFIG_PREFIX}[a-zA-Z0-9_]*\) is not set$/\1/p"
 
 TMP_FILE=$(mktemp ./.tmp.config.XXXXXXXXXX)
 
@@ -116,7 +117,7 @@ for MERGE_FILE in $MERGE_LIST ; do
 		echo "The merge file '$MERGE_FILE' does not exist.  Exit." >&2
 		exit 1
 	fi
-	CFG_LIST=$(sed -n "$SED_CONFIG_EXP" $MERGE_FILE)
+	CFG_LIST=$(sed -n -e "$SED_CONFIG_EXP1" -e "$SED_CONFIG_EXP2" $MERGE_FILE)
 
 	for CFG in $CFG_LIST ; do
 		grep -q -w $CFG $TMP_FILE || continue
@@ -159,7 +160,7 @@ make KCONFIG_ALLCONFIG=$TMP_FILE $OUTPUT_ARG $ALLTARGET
 
 
 # Check all specified config values took (might have missed-dependency issues)
-for CFG in $(sed -n "$SED_CONFIG_EXP" $TMP_FILE); do
+for CFG in $(sed -n -e "$SED_CONFIG_EXP1" -e "$SED_CONFIG_EXP2" $TMP_FILE); do
 
 	REQUESTED_VAL=$(grep -w -e "$CFG" $TMP_FILE)
 	ACTUAL_VAL=$(grep -w -e "$CFG" "$KCONFIG_CONFIG")
-- 
GitLab


From bbcde0a7241261cd0ca8d8e6b94a4113a4b71443 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 6 Nov 2018 13:18:05 +0900
Subject: [PATCH 0943/1890] kbuild: deb-pkg: fix too low build version number

Since commit b41d920acff8 ("kbuild: deb-pkg: split generating packaging
and build"), the build version of the kernel contained in a deb package
is too low by 1.

Prior to the bad commit, the kernel was built first, then the number
in .version file was read out, and written into the debian control file.

Now, the debian control file is created before the kernel is actually
compiled, which is causing the version number mismatch.

Let the mkdebian script pass KBUILD_BUILD_VERSION=${revision} to require
the build system to use the specified version number.

Fixes: b41d920acff8 ("kbuild: deb-pkg: split generating packaging and build")
Reported-by: Doug Smythies <dsmythies@telus.net>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Tested-by: Doug Smythies <dsmythies@telus.net>
---
 scripts/package/mkdebian | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/scripts/package/mkdebian b/scripts/package/mkdebian
index 663a7f343b42c..edcad61fe3cda 100755
--- a/scripts/package/mkdebian
+++ b/scripts/package/mkdebian
@@ -88,6 +88,7 @@ set_debarch() {
 version=$KERNELRELEASE
 if [ -n "$KDEB_PKGVERSION" ]; then
 	packageversion=$KDEB_PKGVERSION
+	revision=${packageversion##*-}
 else
 	revision=$(cat .version 2>/dev/null||echo 1)
 	packageversion=$version-$revision
@@ -205,10 +206,12 @@ cat <<EOF > debian/rules
 #!$(command -v $MAKE) -f
 
 build:
-	\$(MAKE) KERNELRELEASE=${version} ARCH=${ARCH} KBUILD_SRC=
+	\$(MAKE) KERNELRELEASE=${version} ARCH=${ARCH} \
+	KBUILD_BUILD_VERSION=${revision} KBUILD_SRC=
 
 binary-arch:
-	\$(MAKE) KERNELRELEASE=${version} ARCH=${ARCH} KBUILD_SRC= intdeb-pkg
+	\$(MAKE) KERNELRELEASE=${version} ARCH=${ARCH} \
+	KBUILD_BUILD_VERSION=${revision} KBUILD_SRC= intdeb-pkg
 
 clean:
 	rm -rf debian/*tmp debian/files
-- 
GitLab


From 8ef14c2c41d962756d314f1d7dc972b0ea7a180f Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Tue, 6 Nov 2018 10:10:38 -0800
Subject: [PATCH 0944/1890] Revert "scripts/setlocalversion: git: Make -dirty
 check more robust"

This reverts commit 6147b1cf19651c7de297e69108b141fb30aa2349.

The reverted patch results in attempted write access to the source
repository, even if that repository is mounted read-only.

Output from "strace git status -uno --porcelain":

getcwd("/tmp/linux-test", 129)          = 16
open("/tmp/linux-test/.git/index.lock", O_RDWR|O_CREAT|O_EXCL|O_CLOEXEC, 0666) =
	-1 EROFS (Read-only file system)

While git appears to be able to handle this situation, a monitored
build environment (such as the one used for Chrome OS kernel builds)
may detect it and bail out with an access violation error. On top of
that, the attempted write access suggests that git _will_ write to the
file even if a build output directory is specified. Users may have the
reasonable expectation that the source repository remains untouched in
that situation.

Fixes: 6147b1cf19651 ("scripts/setlocalversion: git: Make -dirty check more robust"
Cc: Genki Sky <sky@genki.is>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: Brian Norris <briannorris@chromium.org>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 scripts/setlocalversion | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/setlocalversion b/scripts/setlocalversion
index 79f7dd57d571e..71f39410691b6 100755
--- a/scripts/setlocalversion
+++ b/scripts/setlocalversion
@@ -74,7 +74,7 @@ scm_version()
 		fi
 
 		# Check for uncommitted changes
-		if git status -uno --porcelain | grep -qv '^.. scripts/package'; then
+		if git diff-index --name-only HEAD | grep -qv "^scripts/package"; then
 			printf '%s' -dirty
 		fi
 
-- 
GitLab


From d5615e472d23e854e96192103b6ae7977e705f01 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Wed, 7 Nov 2018 08:36:46 -0600
Subject: [PATCH 0945/1890] builddeb: Fix inclusion of dtbs in debian package
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 37c8a5fafa3b ("kbuild: consolidate Devicetree dtb build rules")
moved the location of 'dtbs_install' target which caused dtbs to not be
installed when building debian package with 'bindeb-pkg' target. Update
the builddeb script to use the same logic that determines if there's a
'dtbs_install' target which is presence of the arch dts directory. Also,
use CONFIG_OF_EARLY_FLATTREE instead of CONFIG_OF as that's a better
indication of whether we are building dtbs.

This commit will also have the side effect of installing dtbs on any
arch that has dts files. Previously, it was dependent on whether the
arch defined 'dtbs_install'.

Fixes: 37c8a5fafa3b ("kbuild: consolidate Devicetree dtb build rules")
Reported-by: Nuno GonÃ§alves <nunojpg@gmail.com>
Signed-off-by: Rob Herring <robh@kernel.org>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 scripts/package/builddeb | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index 0b31f4f1f92cf..f43a274f4f1d5 100755
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -83,9 +83,9 @@ else
 fi
 cp "$($MAKE -s -f $srctree/Makefile image_name)" "$tmpdir/$installed_image_path"
 
-if grep -q "^CONFIG_OF=y" $KCONFIG_CONFIG ; then
+if grep -q "^CONFIG_OF_EARLY_FLATTREE=y" $KCONFIG_CONFIG ; then
 	# Only some architectures with OF support have this target
-	if grep -q dtbs_install "${srctree}/arch/$SRCARCH/Makefile"; then
+	if [ -d "${srctree}/arch/$SRCARCH/boot/dts" ]; then
 		$MAKE KBUILD_SRC= INSTALL_DTBS_PATH="$tmpdir/usr/lib/$packagename" dtbs_install
 	fi
 fi
-- 
GitLab


From e670de54c813b5bc3672dd1c67871dc60e9206f4 Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Thu, 18 Oct 2018 05:09:30 +0000
Subject: [PATCH 0946/1890] Drivers: hv: kvp: Fix the recent regression caused
 by incorrect clean-up

In kvp_send_key(), we do need call process_ib_ipinfo() if
message->kvp_hdr.operation is KVP_OP_GET_IP_INFO, because it turns out
the userland hv_kvp_daemon needs the info of operation, adapter_id and
addr_family. With the incorrect fc62c3b1977d, the host can't get the
VM's IP via KVP.

And, fc62c3b1977d added a "break;", but actually forgot to initialize
the key_size/value in the case of KVP_OP_SET, so the default key_size of
0 is passed to the kvp daemon, and the pool files
/var/lib/hyperv/.kvp_pool_* can't be updated.

This patch effectively rolls back the previous fc62c3b1977d, and
correctly fixes the "this statement may fall through" warnings.

This patch is tested on WS 2012 R2 and 2016.

Fixes: fc62c3b1977d ("Drivers: hv: kvp: Fix two "this statement may fall through" warnings")
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Cc: K. Y. Srinivasan <kys@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/hv_kvp.c | 26 ++++++++++++++++++++++----
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
index a7513a8a8e372..d6106e1a0d4af 100644
--- a/drivers/hv/hv_kvp.c
+++ b/drivers/hv/hv_kvp.c
@@ -353,6 +353,9 @@ static void process_ib_ipinfo(void *in_msg, void *out_msg, int op)
 
 		out->body.kvp_ip_val.dhcp_enabled = in->kvp_ip_val.dhcp_enabled;
 
+		/* fallthrough */
+
+	case KVP_OP_GET_IP_INFO:
 		utf16s_to_utf8s((wchar_t *)in->kvp_ip_val.adapter_id,
 				MAX_ADAPTER_ID_SIZE,
 				UTF16_LITTLE_ENDIAN,
@@ -405,7 +408,11 @@ kvp_send_key(struct work_struct *dummy)
 		process_ib_ipinfo(in_msg, message, KVP_OP_SET_IP_INFO);
 		break;
 	case KVP_OP_GET_IP_INFO:
-		/* We only need to pass on message->kvp_hdr.operation.  */
+		/*
+		 * We only need to pass on the info of operation, adapter_id
+		 * and addr_family to the userland kvp daemon.
+		 */
+		process_ib_ipinfo(in_msg, message, KVP_OP_GET_IP_INFO);
 		break;
 	case KVP_OP_SET:
 		switch (in_msg->body.kvp_set.data.value_type) {
@@ -446,9 +453,9 @@ kvp_send_key(struct work_struct *dummy)
 
 		}
 
-		break;
-
-	case KVP_OP_GET:
+		/*
+		 * The key is always a string - utf16 encoding.
+		 */
 		message->body.kvp_set.data.key_size =
 			utf16s_to_utf8s(
 			(wchar_t *)in_msg->body.kvp_set.data.key,
@@ -456,6 +463,17 @@ kvp_send_key(struct work_struct *dummy)
 			UTF16_LITTLE_ENDIAN,
 			message->body.kvp_set.data.key,
 			HV_KVP_EXCHANGE_MAX_KEY_SIZE - 1) + 1;
+
+		break;
+
+	case KVP_OP_GET:
+		message->body.kvp_get.data.key_size =
+			utf16s_to_utf8s(
+			(wchar_t *)in_msg->body.kvp_get.data.key,
+			in_msg->body.kvp_get.data.key_size,
+			UTF16_LITTLE_ENDIAN,
+			message->body.kvp_get.data.key,
+			HV_KVP_EXCHANGE_MAX_KEY_SIZE - 1) + 1;
 		break;
 
 	case KVP_OP_DELETE:
-- 
GitLab


From fee05f455ceb5c670cbe48e2f9454ebc4a388554 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Tue, 16 Oct 2018 12:59:44 +0200
Subject: [PATCH 0947/1890] drivers/misc/sgi-gru: fix Spectre v1 vulnerability

req.gid can be indirectly controlled by user-space, hence leading to
a potential exploitation of the Spectre variant 1 vulnerability.

This issue was detected with the help of Smatch:

vers/misc/sgi-gru/grukdump.c:200 gru_dump_chiplet_request() warn:
potential spectre issue 'gru_base' [w]

Fix this by sanitizing req.gid before calling macro GID_TO_GRU, which
uses it to index gru_base.

Notice that given that speculation windows are large, the policy is
to kill the speculation on the first load and not worry if it can be
completed with a dependent load/store [1].

[1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2

Cc: stable@vger.kernel.org
Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/sgi-gru/grukdump.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/misc/sgi-gru/grukdump.c b/drivers/misc/sgi-gru/grukdump.c
index 313da31502626..1540a7785e147 100644
--- a/drivers/misc/sgi-gru/grukdump.c
+++ b/drivers/misc/sgi-gru/grukdump.c
@@ -27,6 +27,9 @@
 #include <linux/delay.h>
 #include <linux/bitops.h>
 #include <asm/uv/uv_hub.h>
+
+#include <linux/nospec.h>
+
 #include "gru.h"
 #include "grutables.h"
 #include "gruhandles.h"
@@ -196,6 +199,7 @@ int gru_dump_chiplet_request(unsigned long arg)
 	/* Currently, only dump by gid is implemented */
 	if (req.gid >= gru_max_gids)
 		return -EINVAL;
+	req.gid = array_index_nospec(req.gid, gru_max_gids);
 
 	gru = GID_TO_GRU(req.gid);
 	ubuf = req.buf;
-- 
GitLab


From 7c97301285b62a41d6bceded7d964085fc8cc50f Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Wed, 17 Oct 2018 10:09:02 -0700
Subject: [PATCH 0948/1890] misc: atmel-ssc: Fix section annotation on
 atmel_ssc_get_driver_data

After building the kernel with Clang, the following section mismatch
warning appears:

WARNING: vmlinux.o(.text+0x3bf19a6): Section mismatch in reference from
the function ssc_probe() to the function
.init.text:atmel_ssc_get_driver_data()
The function ssc_probe() references
the function __init atmel_ssc_get_driver_data().
This is often because ssc_probe lacks a __init
annotation or the annotation of atmel_ssc_get_driver_data is wrong.

Remove __init from atmel_ssc_get_driver_data to get rid of the mismatch.

Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/atmel-ssc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/atmel-ssc.c b/drivers/misc/atmel-ssc.c
index b2a0340f277e2..d8e3cc2dc7470 100644
--- a/drivers/misc/atmel-ssc.c
+++ b/drivers/misc/atmel-ssc.c
@@ -132,7 +132,7 @@ static const struct of_device_id atmel_ssc_dt_ids[] = {
 MODULE_DEVICE_TABLE(of, atmel_ssc_dt_ids);
 #endif
 
-static inline const struct atmel_ssc_platform_data * __init
+static inline const struct atmel_ssc_platform_data *
 	atmel_ssc_get_driver_data(struct platform_device *pdev)
 {
 	if (pdev->dev.of_node) {
-- 
GitLab


From 0749aa25af82c690395a96e799cd2c6e54c459cf Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Date: Tue, 6 Nov 2018 15:41:41 +0000
Subject: [PATCH 0949/1890] nvmem: core: fix regression in of_nvmem_cell_get()

NVMEM DT support seems to be totally broken after
commit e888d445ac33 ("nvmem: resolve cells from DT at registration time")
Fix this!

Index used in of_nvmem_cell_get() to find cell is specific to
consumer, It can not be used for searching the cell in provider.
Use device_node instead of this to find the matching cell in device
tree case.

Fixes: e888d445ac33 ("nvmem: resolve cells from DT at registration time")
Reported-by: Niklas Cassel <niklas.cassel@linaro.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Tested-by: Niklas Cassel <niklas.cassel@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/nvmem/core.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c
index 9b18ce90f9073..27f67dfa649d0 100644
--- a/drivers/nvmem/core.c
+++ b/drivers/nvmem/core.c
@@ -44,6 +44,7 @@ struct nvmem_cell {
 	int			bytes;
 	int			bit_offset;
 	int			nbits;
+	struct device_node	*np;
 	struct nvmem_device	*nvmem;
 	struct list_head	node;
 };
@@ -298,6 +299,7 @@ static void nvmem_cell_drop(struct nvmem_cell *cell)
 	mutex_lock(&nvmem_mutex);
 	list_del(&cell->node);
 	mutex_unlock(&nvmem_mutex);
+	of_node_put(cell->np);
 	kfree(cell->name);
 	kfree(cell);
 }
@@ -530,6 +532,7 @@ static int nvmem_add_cells_from_of(struct nvmem_device *nvmem)
 			return -ENOMEM;
 
 		cell->nvmem = nvmem;
+		cell->np = of_node_get(child);
 		cell->offset = be32_to_cpup(addr++);
 		cell->bytes = be32_to_cpup(addr);
 		cell->name = kasprintf(GFP_KERNEL, "%pOFn", child);
@@ -960,14 +963,13 @@ nvmem_cell_get_from_lookup(struct device *dev, const char *con_id)
 
 #if IS_ENABLED(CONFIG_OF)
 static struct nvmem_cell *
-nvmem_find_cell_by_index(struct nvmem_device *nvmem, int index)
+nvmem_find_cell_by_node(struct nvmem_device *nvmem, struct device_node *np)
 {
 	struct nvmem_cell *cell = NULL;
-	int i = 0;
 
 	mutex_lock(&nvmem_mutex);
 	list_for_each_entry(cell, &nvmem->cells, node) {
-		if (index == i++)
+		if (np == cell->np)
 			break;
 	}
 	mutex_unlock(&nvmem_mutex);
@@ -1011,7 +1013,7 @@ struct nvmem_cell *of_nvmem_cell_get(struct device_node *np, const char *id)
 	if (IS_ERR(nvmem))
 		return ERR_CAST(nvmem);
 
-	cell = nvmem_find_cell_by_index(nvmem, index);
+	cell = nvmem_find_cell_by_node(nvmem, cell_np);
 	if (!cell) {
 		__nvmem_device_put(nvmem);
 		return ERR_PTR(-ENOENT);
-- 
GitLab


From 8bb0a88600f0267cfcc245d34f8c4abe8c282713 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 19 Oct 2018 13:58:01 +0100
Subject: [PATCH 0950/1890] test_firmware: fix error return getting clobbered

In the case where eq->fw->size > PAGE_SIZE the error return rc
is being set to EINVAL however this is being overwritten to
rc = req->fw->size because the error exit path via label 'out' is
not being taken.  Fix this by adding the jump to the error exit
path 'out'.

Detected by CoverityScan, CID#1453465 ("Unused value")

Fixes: c92316bf8e94 ("test_firmware: add batched firmware tests")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 lib/test_firmware.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/test_firmware.c b/lib/test_firmware.c
index b984806d7d7bb..7cab9a9869ace 100644
--- a/lib/test_firmware.c
+++ b/lib/test_firmware.c
@@ -837,6 +837,7 @@ static ssize_t read_firmware_show(struct device *dev,
 	if (req->fw->size > PAGE_SIZE) {
 		pr_err("Testing interface must use PAGE_SIZE firmware for now\n");
 		rc = -EINVAL;
+		goto out;
 	}
 	memcpy(buf, req->fw->data, req->fw->size);
 
-- 
GitLab


From 432798195bbce1f8cd33d1c0284d0538835e25fb Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 26 Oct 2018 10:19:51 +0300
Subject: [PATCH 0951/1890] uio: Fix an Oops on load

I was trying to solve a double free but I introduced a more serious
NULL dereference bug.  The problem is that if there is an IRQ which
triggers immediately, then we need "info->uio_dev" but it's not set yet.

This patch puts the original initialization back to how it was and just
sets info->uio_dev to NULL on the error path so it should solve both
the Oops and the double free.

Fixes: f019f07ecf6a ("uio: potential double frees if __uio_register_device() fails")
Reported-by: Mathias Thore <Mathias.Thore@infinera.com>
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Cc: stable <stable@vger.kernel.org>
Tested-by: Mathias Thore <Mathias.Thore@infinera.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/uio/uio.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index 85644669fbe7b..0a357db4b31b3 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -961,6 +961,8 @@ int __uio_register_device(struct module *owner,
 	if (ret)
 		goto err_uio_dev_add_attributes;
 
+	info->uio_dev = idev;
+
 	if (info->irq && (info->irq != UIO_IRQ_CUSTOM)) {
 		/*
 		 * Note that we deliberately don't use devm_request_irq
@@ -972,11 +974,12 @@ int __uio_register_device(struct module *owner,
 		 */
 		ret = request_irq(info->irq, uio_interrupt,
 				  info->irq_flags, info->name, idev);
-		if (ret)
+		if (ret) {
+			info->uio_dev = NULL;
 			goto err_request_irq;
+		}
 	}
 
-	info->uio_dev = idev;
 	return 0;
 
 err_request_irq:
-- 
GitLab


From 7ab412d33b4c7ff3e0148d3db25dd861edd1283d Mon Sep 17 00:00:00 2001
From: Jon Maloy <donmalo99@gmail.com>
Date: Sat, 10 Nov 2018 17:30:24 -0500
Subject: [PATCH 0952/1890] tipc: fix link re-establish failure

When a link failure is detected locally, the link is reset, the flag
link->in_session is set to false, and a RESET_MSG with the 'stopping'
bit set is sent to the peer.

The purpose of this bit is to inform the peer that this endpoint just
is going down, and that the peer should handle the reception of this
particular RESET message as a local failure. This forces the peer to
accept another RESET or ACTIVATE message from this endpoint before it
can re-establish the link. This again is necessary to ensure that
link session numbers are properly exchanged before the link comes up
again.

If a failure is detected locally at the same time at the peer endpoint
this will do the same, which is also a correct behavior.

However, when receiving such messages, the endpoints will not
distinguish between 'stopping' RESETs and ordinary ones when it comes
to updating session numbers. Both endpoints will copy the received
session number and set their 'in_session' flags to true at the
reception, while they are still expecting another RESET from the
peer before they can go ahead and re-establish. This is contradictory,
since, after applying the validation check referred to below, the
'in_session' flag will cause rejection of all such messages, and the
link will never come up again.

We now fix this by not only handling received RESET/STOPPING messages
as a local failure, but also by omitting to set a new session number
and the 'in_session' flag in such cases.

Fixes: 7ea817f4e832 ("tipc: check session number before accepting link protocol messages")
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/link.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/net/tipc/link.c b/net/tipc/link.c
index 201c3b5bc96be..836727e363c46 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1594,14 +1594,17 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
 		if (in_range(peers_prio, l->priority + 1, TIPC_MAX_LINK_PRI))
 			l->priority = peers_prio;
 
-		/* ACTIVATE_MSG serves as PEER_RESET if link is already down */
-		if (msg_peer_stopping(hdr))
+		/* If peer is going down we want full re-establish cycle */
+		if (msg_peer_stopping(hdr)) {
 			rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
-		else if ((mtyp == RESET_MSG) || !link_is_up(l))
+			break;
+		}
+		/* ACTIVATE_MSG serves as PEER_RESET if link is already down */
+		if (mtyp == RESET_MSG || !link_is_up(l))
 			rc = tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT);
 
 		/* ACTIVATE_MSG takes up link if it was already locally reset */
-		if ((mtyp == ACTIVATE_MSG) && (l->state == LINK_ESTABLISHING))
+		if (mtyp == ACTIVATE_MSG && l->state == LINK_ESTABLISHING)
 			rc = TIPC_LINK_UP_EVT;
 
 		l->peer_session = msg_session(hdr);
-- 
GitLab


From a9049ff9214da68df1179a7d5e36b43479abc9b8 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sun, 11 Nov 2018 00:41:10 +0100
Subject: [PATCH 0953/1890] net: dsa: mv88e6xxx: Fix clearing of stats counters

The mv88e6161 would sometime fail to probe with a timeout waiting for
the switch to complete an operation. This operation is supposed to
clear the statistics counters. However, due to a read/modify/write,
without the needed mask, the operation actually carried out was more
random, with invalid parameters, resulting in the switch not
responding. We need to preserve the histogram mode bits, so apply a
mask to keep them.

Reported-by: Chris Healy <Chris.Healy@zii.aero>
Fixes: 40cff8fca9e3 ("net: dsa: mv88e6xxx: Fix stats histogram mode")
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/global1.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/dsa/mv88e6xxx/global1.c b/drivers/net/dsa/mv88e6xxx/global1.c
index d721ccf7d8bed..38e399e0f30e1 100644
--- a/drivers/net/dsa/mv88e6xxx/global1.c
+++ b/drivers/net/dsa/mv88e6xxx/global1.c
@@ -567,6 +567,8 @@ int mv88e6xxx_g1_stats_clear(struct mv88e6xxx_chip *chip)
 	if (err)
 		return err;
 
+	/* Keep the histogram mode bits */
+	val &= MV88E6XXX_G1_STATS_OP_HIST_RX_TX;
 	val |= MV88E6XXX_G1_STATS_OP_BUSY | MV88E6XXX_G1_STATS_OP_FLUSH_ALL;
 
 	err = mv88e6xxx_g1_write(chip, MV88E6XXX_G1_STATS_OP, val);
-- 
GitLab


From 7236ead1b14923f3ba35cd29cce13246be83f451 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 10 Nov 2018 16:22:29 -0800
Subject: [PATCH 0954/1890] act_mirred: clear skb->tstamp on redirect

If sch_fq is used at ingress, skbs that might have been
timestamped by net_timestamp_set() if a packet capture
is requesting timestamps could be delayed by arbitrary
amount of time, since sch_fq time base is MONOTONIC.

Fix this problem by moving code from sch_netem.c to act_mirred.c.

Fixes: fb420d5d91c1 ("tcp/fq: move back to CLOCK_MONOTONIC")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_mirred.c | 3 ++-
 net/sched/sch_netem.c  | 9 ---------
 2 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 1dae5f2b358fc..c8cf4d10c4355 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -258,7 +258,8 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
 	if (is_redirect) {
 		skb2->tc_redirected = 1;
 		skb2->tc_from_ingress = skb2->tc_at_ingress;
-
+		if (skb2->tc_from_ingress)
+			skb2->tstamp = 0;
 		/* let's the caller reinsert the packet, if possible */
 		if (use_reinsert) {
 			res->ingress = want_ingress;
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 57b3ad9394ad7..2c38e3d079246 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -648,15 +648,6 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
 			 */
 			skb->dev = qdisc_dev(sch);
 
-#ifdef CONFIG_NET_CLS_ACT
-			/*
-			 * If it's at ingress let's pretend the delay is
-			 * from the network (tstamp will be updated).
-			 */
-			if (skb->tc_redirected && skb->tc_from_ingress)
-				skb->tstamp = 0;
-#endif
-
 			if (q->slot.slot_next) {
 				q->slot.packets_left--;
 				q->slot.bytes_left -= qdisc_pkt_len(skb);
-- 
GitLab


From a66d972465d15b1d89281258805eb8b47d66bd36 Mon Sep 17 00:00:00 2001
From: Alexey Brodkin <alexey.brodkin@synopsys.com>
Date: Wed, 31 Oct 2018 18:25:47 +0300
Subject: [PATCH 0955/1890] devres: Align data[] to ARCH_KMALLOC_MINALIGN

Initially we bumped into problem with 32-bit aligned atomic64_t
on ARC, see [1]. And then during quite lengthly discussion Peter Z.
mentioned ARCH_KMALLOC_MINALIGN which IMHO makes perfect sense.
If allocation is done by plain kmalloc() obtained buffer will be
ARCH_KMALLOC_MINALIGN aligned and then why buffer obtained via
devm_kmalloc() should have any other alignment?

This way we at least get the same behavior for both types of
allocation.

[1] http://lists.infradead.org/pipermail/linux-snps-arc/2018-July/004009.html
[2] http://lists.infradead.org/pipermail/linux-snps-arc/2018-July/004036.html

Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: David Laight <David.Laight@ACULAB.COM>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Greg KH <greg@kroah.com>
Cc: <stable@vger.kernel.org> # 4.8+
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/devres.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/base/devres.c b/drivers/base/devres.c
index 4aaf00d2098b4..e038e2b3b7ea4 100644
--- a/drivers/base/devres.c
+++ b/drivers/base/devres.c
@@ -26,8 +26,14 @@ struct devres_node {
 
 struct devres {
 	struct devres_node		node;
-	/* -- 3 pointers */
-	unsigned long long		data[];	/* guarantee ull alignment */
+	/*
+	 * Some archs want to perform DMA into kmalloc caches
+	 * and need a guaranteed alignment larger than
+	 * the alignment of a 64-bit integer.
+	 * Thus we use ARCH_KMALLOC_MINALIGN here and get exactly the same
+	 * buffer alignment as if it was allocated by plain kmalloc().
+	 */
+	u8 __aligned(ARCH_KMALLOC_MINALIGN) data[];
 };
 
 struct devres_group {
-- 
GitLab


From cd56a5141331abfe218d744a3d66e1788135d482 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Sat, 10 Nov 2018 23:28:06 +0000
Subject: [PATCH 0956/1890] staging: mt7621-pinctrl: fix uninitialized variable
 ngroups

Currently the for_each_node_with_property loop us incrementing variable
ngroups however it was not initialized and hence will contain garbage.
Fix this by initializing ngroups to zero.

Detected with static analysis with cppcheck:

drivers/staging/mt7621-pinctrl/pinctrl-rt2880.c:89]: (error) Uninitialized
variable: ngroups

Fixes: e12a1a6e087b ("staging: mt7621-pinctrl: refactor rt2880_pinctrl_dt_node_to_map function")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Reviewed-by: Sergio Paracuellos <sergio.paracuellos@gmail.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/mt7621-pinctrl/pinctrl-rt2880.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/mt7621-pinctrl/pinctrl-rt2880.c b/drivers/staging/mt7621-pinctrl/pinctrl-rt2880.c
index b8566ed898f15..aa98fbb170139 100644
--- a/drivers/staging/mt7621-pinctrl/pinctrl-rt2880.c
+++ b/drivers/staging/mt7621-pinctrl/pinctrl-rt2880.c
@@ -82,7 +82,7 @@ static int rt2880_pinctrl_dt_node_to_map(struct pinctrl_dev *pctrldev,
 	struct property *prop;
 	const char *function_name, *group_name;
 	int ret;
-	int ngroups;
+	int ngroups = 0;
 	unsigned int reserved_maps = 0;
 
 	for_each_node_with_property(np_config, "group")
-- 
GitLab


From 5a96b2d38dc054c0bbcbcd585b116566cbd877fe Mon Sep 17 00:00:00 2001
From: Ben Wolsieffer <benwolsieffer@gmail.com>
Date: Sat, 3 Nov 2018 19:32:20 -0400
Subject: [PATCH 0957/1890] staging: vchiq_arm: fix compat
 VCHIQ_IOC_AWAIT_COMPLETION

The compatibility ioctl wrapper for VCHIQ_IOC_AWAIT_COMPLETION assumes that
the native ioctl always uses a message buffer and decrements msgbufcount.
Certain message types do not use a message buffer and in this case
msgbufcount is not decremented, and completion->header for the message is
NULL. Because the wrapper unconditionally decrements msgbufcount, the
calling process may assume that a message buffer has been used even when
it has not.

This results in a memory leak in the userspace code that interfaces with
this driver. When msgbufcount is decremented, the userspace code assumes
that the buffer can be freed though the reference in completion->header,
which cannot happen when the reference is NULL.

This patch causes the wrapper to only decrement msgbufcount when the
native ioctl decrements it. Note that we cannot simply copy the native
ioctl's value of msgbufcount, because the wrapper only retrieves messages
from the native ioctl one at a time, while userspace may request multiple
messages.

See https://github.com/raspberrypi/linux/pull/2703 for more discussion of
this patch.

Fixes: 5569a1260933 ("staging: vchiq_arm: Add compatibility wrappers for ioctls")
Signed-off-by: Ben Wolsieffer <benwolsieffer@gmail.com>
Acked-by: Stefan Wahren <stefan.wahren@i2se.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../staging/vc04_services/interface/vchiq_arm/vchiq_arm.c  | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
index ea789376de0f8..45de21c210c1c 100644
--- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
+++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
@@ -1795,6 +1795,7 @@ vchiq_compat_ioctl_await_completion(struct file *file,
 	struct vchiq_await_completion32 args32;
 	struct vchiq_completion_data32 completion32;
 	unsigned int *msgbufcount32;
+	unsigned int msgbufcount_native;
 	compat_uptr_t msgbuf32;
 	void *msgbuf;
 	void **msgbufptr;
@@ -1906,7 +1907,11 @@ vchiq_compat_ioctl_await_completion(struct file *file,
 			 sizeof(completion32)))
 		return -EFAULT;
 
-	args32.msgbufcount--;
+	if (get_user(msgbufcount_native, &args->msgbufcount))
+		return -EFAULT;
+
+	if (!msgbufcount_native)
+		args32.msgbufcount--;
 
 	msgbufcount32 =
 		&((struct vchiq_await_completion32 __user *)arg)->msgbufcount;
-- 
GitLab


From ccda4af0f4b92f7b4c308d3acc262f4a7e3affad Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 11 Nov 2018 17:12:31 -0600
Subject: [PATCH 0958/1890] Linux 4.20-rc2

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 9fce8b91c15f6..2f36db8978953 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 4
 PATCHLEVEL = 20
 SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc2
 NAME = "People's Front"
 
 # *DOCUMENTATION*
-- 
GitLab


From c8b00bb742dd036388f37d019dbb9db177f3e66c Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Thu, 1 Nov 2018 16:21:05 +1100
Subject: [PATCH 0959/1890] powerpc/mm/64s: Fix preempt warning in
 slb_allocate_kernel()

With preempt enabled we see warnings in do_slb_fault():

  BUG: using smp_processor_id() in preemptible [00000000] code: kworker/u33:0/98
  futex hash table entries: 4096 (order: 3, 524288 bytes)
  caller is do_slb_fault+0x204/0x230
  CPU: 5 PID: 98 Comm: kworker/u33:0 Not tainted 4.19.0-rc3-gcc-7.3.1-00022-g1936f094e164 #138
  Call Trace:
    dump_stack+0xb4/0x104 (unreliable)
    check_preemption_disabled+0x148/0x150
    do_slb_fault+0x204/0x230
    data_access_slb_common+0x138/0x180

This is caused by the get_paca() in slb_allocate_kernel(), which
includes a call to debug_smp_processor_id().

slb_allocate_kernel() can only be called from do_slb_fault(), and in
that path interrupts are hard disabled and so we can't be preempted,
but we can't update the preempt flags (in thread_info) because that
could cause an SLB fault.

So just use local_paca which is safe and doesn't cause the warning.

Fixes: 48e7b7695745 ("powerpc/64s/hash: Convert SLB miss handlers to C")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/slb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index b663a36f9ada1..bc3914d54e26e 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -708,7 +708,7 @@ static long slb_allocate_kernel(unsigned long ea, unsigned long id)
 			return -EFAULT;
 
 		if (ea < H_VMALLOC_END)
-			flags = get_paca()->vmalloc_sllp;
+			flags = local_paca->vmalloc_sllp;
 		else
 			flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_io_psize].sllp;
 	} else {
-- 
GitLab


From 43c6494fa1499912c8177e71450c0279041152a6 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 6 Nov 2018 23:37:58 +1100
Subject: [PATCH 0960/1890] powerpc/io: Fix the IO workarounds code to work
 with Radix

Back in 2006 Ben added some workarounds for a misbehaviour in the
Spider IO bridge used on early Cell machines, see commit
014da7ff47b5 ("[POWERPC] Cell "Spider" MMIO workarounds"). Later these
were made to be generic, ie. not tied specifically to Spider.

The code stashes a token in the high bits (59-48) of virtual addresses
used for IO (eg. returned from ioremap()). This works fine when using
the Hash MMU, but when we're using the Radix MMU the bits used for the
token overlap with some of the bits of the virtual address.

This is because the maximum virtual address is larger with Radix, up
to c00fffffffffffff, and in fact we use that high part of the address
range for ioremap(), see RADIX_KERN_IO_START.

As it happens the bits that are used overlap with the bits that
differentiate an IO address vs a linear map address. If the resulting
address lies outside the linear mapping we will crash (see below), if
not we just corrupt memory.

  virtio-pci 0000:00:00.0: Using 64-bit direct DMA at offset 800000000000000
  Unable to handle kernel paging request for data at address 0xc000000080000014
  ...
  CFAR: c000000000626b98 DAR: c000000080000014 DSISR: 42000000 IRQMASK: 0
  GPR00: c0000000006c54fc c00000003e523378 c0000000016de600 0000000000000000
  GPR04: c00c000080000014 0000000000000007 0fffffff000affff 0000000000000030
         ^^^^
  ...
  NIP [c000000000626c5c] .iowrite8+0xec/0x100
  LR [c0000000006c992c] .vp_reset+0x2c/0x90
  Call Trace:
    .pci_bus_read_config_dword+0xc4/0x120 (unreliable)
    .register_virtio_device+0x13c/0x1c0
    .virtio_pci_probe+0x148/0x1f0
    .local_pci_probe+0x68/0x140
    .pci_device_probe+0x164/0x220
    .really_probe+0x274/0x3b0
    .driver_probe_device+0x80/0x170
    .__driver_attach+0x14c/0x150
    .bus_for_each_dev+0xb8/0x130
    .driver_attach+0x34/0x50
    .bus_add_driver+0x178/0x2f0
    .driver_register+0x90/0x1a0
    .__pci_register_driver+0x6c/0x90
    .virtio_pci_driver_init+0x2c/0x40
    .do_one_initcall+0x64/0x280
    .kernel_init_freeable+0x36c/0x474
    .kernel_init+0x24/0x160
    .ret_from_kernel_thread+0x58/0x7c

This hasn't been a problem because CONFIG_PPC_IO_WORKAROUNDS which
enables this code is usually not enabled. It is only enabled when it's
selected by PPC_CELL_NATIVE which is only selected by
PPC_IBM_CELL_BLADE and that in turn depends on BIG_ENDIAN. So in order
to hit the bug you need to build a big endian kernel, with IBM Cell
Blade support enabled, as well as Radix MMU support, and then boot
that on Power9 using Radix MMU.

Still we can fix the bug, so let's do that. We simply use fewer bits
for the token, taking the union of the restrictions on the address
from both Hash and Radix, we end up with 8 bits we can use for the
token. The only user of the token is iowa_mem_find_bus() which only
supports 8 token values, so 8 bits is plenty for that.

Fixes: 566ca99af026 ("powerpc/mm/radix: Add dummy radix_enabled()")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/io.h | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index 3ef40b703c4ab..e746becd9d6ff 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -268,19 +268,13 @@ extern void _memcpy_toio(volatile void __iomem *dest, const void *src,
  * their hooks, a bitfield is reserved for use by the platform near the
  * top of MMIO addresses (not PIO, those have to cope the hard way).
  *
- * This bit field is 12 bits and is at the top of the IO virtual
- * addresses PCI_IO_INDIRECT_TOKEN_MASK.
+ * The highest address in the kernel virtual space are:
  *
- * The kernel virtual space is thus:
+ *  d0003fffffffffff	# with Hash MMU
+ *  c00fffffffffffff	# with Radix MMU
  *
- *  0xD000000000000000		: vmalloc
- *  0xD000080000000000		: PCI PHB IO space
- *  0xD000080080000000		: ioremap
- *  0xD0000fffffffffff		: end of ioremap region
- *
- * Since the top 4 bits are reserved as the region ID, we use thus
- * the next 12 bits and keep 4 bits available for the future if the
- * virtual address space is ever to be extended.
+ * The top 4 bits are reserved as the region ID on hash, leaving us 8 bits
+ * that can be used for the field.
  *
  * The direct IO mapping operations will then mask off those bits
  * before doing the actual access, though that only happen when
@@ -292,8 +286,8 @@ extern void _memcpy_toio(volatile void __iomem *dest, const void *src,
  */
 
 #ifdef CONFIG_PPC_INDIRECT_MMIO
-#define PCI_IO_IND_TOKEN_MASK	0x0fff000000000000ul
-#define PCI_IO_IND_TOKEN_SHIFT	48
+#define PCI_IO_IND_TOKEN_SHIFT	52
+#define PCI_IO_IND_TOKEN_MASK	(0xfful << PCI_IO_IND_TOKEN_SHIFT)
 #define PCI_FIX_ADDR(addr)						\
 	((PCI_IO_ADDR)(((unsigned long)(addr)) & ~PCI_IO_IND_TOKEN_MASK))
 #define PCI_GET_ADDR_TOKEN(addr)					\
-- 
GitLab


From 2c7645b0f7d1014f2636393de7906c6bfd25939f Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Mon, 12 Nov 2018 13:46:06 +1100
Subject: [PATCH 0961/1890] selftests/powerpc: Fix wild_bctr test to work on
 ppc64

The selftest I recently added to test branching to an out-of-bounds
NIP doesn't work on 64-bit big endian. It does fail but not in the
right way. That is it SEGVs trying to load from the opd at BAD_NIP,
but it never gets as far as branching to BAD_NIP.

To fix it we need to create an opd which is reachable but which holds
the bad address.

Fixes: b7683fc66eba ("selftests/powerpc: Add a test of wild bctr")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 tools/testing/selftests/powerpc/mm/wild_bctr.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/powerpc/mm/wild_bctr.c b/tools/testing/selftests/powerpc/mm/wild_bctr.c
index 1b0e9e9a2ddce..90469a9e49d40 100644
--- a/tools/testing/selftests/powerpc/mm/wild_bctr.c
+++ b/tools/testing/selftests/powerpc/mm/wild_bctr.c
@@ -105,6 +105,20 @@ static void dump_regs(void)
 	}
 }
 
+#ifdef _CALL_AIXDESC
+struct opd {
+	unsigned long ip;
+	unsigned long toc;
+	unsigned long env;
+};
+static struct opd bad_opd = {
+	.ip = BAD_NIP,
+};
+#define BAD_FUNC (&bad_opd)
+#else
+#define BAD_FUNC BAD_NIP
+#endif
+
 int test_wild_bctr(void)
 {
 	int (*func_ptr)(void);
@@ -133,7 +147,7 @@ int test_wild_bctr(void)
 
 		poison_regs();
 
-		func_ptr = (int (*)(void))BAD_NIP;
+		func_ptr = (int (*)(void))BAD_FUNC;
 		func_ptr();
 
 		FAIL_IF(1); /* we didn't segv? */
-- 
GitLab


From d19f9130b814d33c03118493c17454f7d90075d1 Mon Sep 17 00:00:00 2001
From: Elvira Khabirova <lineprinter@altlinux.org>
Date: Sat, 10 Nov 2018 04:22:09 +0100
Subject: [PATCH 0962/1890] x86/ptrace: Fix documentation for
 tracehook_report_syscall_entry()

tracehook_report_syscall_entry() is called not only
if %TIF_SYSCALL_TRACE is set, but also if %TIF_SYSCALL_EMU is set,
as appears from x86's entry code.

Signed-off-by: Elvira Khabirova <lineprinter@altlinux.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: ldv@altlinux.org
Cc: oleg@redhat.com
Cc: rostedt@goodmis.org
Link: http://lkml.kernel.org/r/20181110042209.26333972@akathisia
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/tracehook.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 40b0b4c1bf7b8..df20f8bdbfa30 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -83,8 +83,8 @@ static inline int ptrace_report_syscall(struct pt_regs *regs)
  * tracehook_report_syscall_entry - task is about to attempt a system call
  * @regs:		user register state of current task
  *
- * This will be called if %TIF_SYSCALL_TRACE has been set, when the
- * current task has just entered the kernel for a system call.
+ * This will be called if %TIF_SYSCALL_TRACE or %TIF_SYSCALL_EMU have been set,
+ * when the current task has just entered the kernel for a system call.
  * Full user register state is available here.  Changing the values
  * in @regs can affect the system call number and arguments to be tried.
  * It is safe to block here, preventing the system call from beginning.
-- 
GitLab


From c469933e772132aad040bd6a2adc8edf9ad6f825 Mon Sep 17 00:00:00 2001
From: Patrick Bellasi <patrick.bellasi@arm.com>
Date: Mon, 5 Nov 2018 14:53:58 +0000
Subject: [PATCH 0963/1890] sched/fair: Fix cpu_util_wake() for 'execl' type
 workloads

A ~10% regression has been reported for UnixBench's execl throughput
test by Aaron Lu and Ye Xiaolong:

  https://lkml.org/lkml/2018/10/30/765

That test is pretty simple, it does a "recursive" execve() syscall on the
same binary. Starting from the syscall, this sequence is possible:

   do_execve()
     do_execveat_common()
       __do_execve_file()
         sched_exec()
           select_task_rq_fair()          <==| Task already enqueued
             find_idlest_cpu()
               find_idlest_group()
                 capacity_spare_wake()    <==| Functions not called from
		   cpu_util_wake()           | the wakeup path

which means we can end up calling cpu_util_wake() not only from the
"wakeup path", as its name would suggest. Indeed, the task doing an
execve() syscall is already enqueued on the CPU we want to get the
cpu_util_wake() for.

The estimated utilization for a CPU computed in cpu_util_wake() was
written under the assumption that function can be called only from the
wakeup path. If instead the task is already enqueued, we end up with a
utilization which does not remove the current task's contribution from
the estimated utilization of the CPU.
This will wrongly assume a reduced spare capacity on the current CPU and
increase the chances to migrate the task on execve.

The regression is tracked down to:

 commit d519329f72a6 ("sched/fair: Update util_est only on util_avg updates")

because in that patch we turn on by default the UTIL_EST sched feature.
However, the real issue is introduced by:

 commit f9be3e5961c5 ("sched/fair: Use util_est in LB and WU paths")

Let's fix this by ensuring to always discount the task estimated
utilization from the CPU's estimated utilization when the task is also
the current one. The same benchmark of the bug report, executed on a
dual socket 40 CPUs Intel(R) Xeon(R) CPU E5-2690 v2 @ 3.00GHz machine,
reports these "Execl Throughput" figures (higher the better):

   mainline     : 48136.5 lps
   mainline+fix : 55376.5 lps

which correspond to a 15% speedup.

Moreover, since {cpu_util,capacity_spare}_wake() are not really only
used from the wakeup path, let's remove this ambiguity by using a better
matching name: {cpu_util,capacity_spare}_without().

Since we are at that, let's also improve the existing documentation.

Reported-by: Aaron Lu <aaron.lu@intel.com>
Reported-by: Ye Xiaolong <xiaolong.ye@intel.com>
Tested-by: Aaron Lu <aaron.lu@intel.com>
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Morten Rasmussen <morten.rasmussen@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Quentin Perret <quentin.perret@arm.com>
Cc: Steve Muckle <smuckle@google.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Todd Kjos <tkjos@google.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Fixes: f9be3e5961c5 (sched/fair: Use util_est in LB and WU paths)
Link: https://lore.kernel.org/lkml/20181025093100.GB13236@e110439-lin/
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/fair.c | 62 +++++++++++++++++++++++++++++++++++----------
 1 file changed, 48 insertions(+), 14 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 3648d0300fdf9..ac855b2f47746 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5674,11 +5674,11 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p,
 	return target;
 }
 
-static unsigned long cpu_util_wake(int cpu, struct task_struct *p);
+static unsigned long cpu_util_without(int cpu, struct task_struct *p);
 
-static unsigned long capacity_spare_wake(int cpu, struct task_struct *p)
+static unsigned long capacity_spare_without(int cpu, struct task_struct *p)
 {
-	return max_t(long, capacity_of(cpu) - cpu_util_wake(cpu, p), 0);
+	return max_t(long, capacity_of(cpu) - cpu_util_without(cpu, p), 0);
 }
 
 /*
@@ -5738,7 +5738,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
 
 			avg_load += cfs_rq_load_avg(&cpu_rq(i)->cfs);
 
-			spare_cap = capacity_spare_wake(i, p);
+			spare_cap = capacity_spare_without(i, p);
 
 			if (spare_cap > max_spare_cap)
 				max_spare_cap = spare_cap;
@@ -5889,8 +5889,8 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p
 		return prev_cpu;
 
 	/*
-	 * We need task's util for capacity_spare_wake, sync it up to prev_cpu's
-	 * last_update_time.
+	 * We need task's util for capacity_spare_without, sync it up to
+	 * prev_cpu's last_update_time.
 	 */
 	if (!(sd_flag & SD_BALANCE_FORK))
 		sync_entity_load_avg(&p->se);
@@ -6216,10 +6216,19 @@ static inline unsigned long cpu_util(int cpu)
 }
 
 /*
- * cpu_util_wake: Compute CPU utilization with any contributions from
- * the waking task p removed.
+ * cpu_util_without: compute cpu utilization without any contributions from *p
+ * @cpu: the CPU which utilization is requested
+ * @p: the task which utilization should be discounted
+ *
+ * The utilization of a CPU is defined by the utilization of tasks currently
+ * enqueued on that CPU as well as tasks which are currently sleeping after an
+ * execution on that CPU.
+ *
+ * This method returns the utilization of the specified CPU by discounting the
+ * utilization of the specified task, whenever the task is currently
+ * contributing to the CPU utilization.
  */
-static unsigned long cpu_util_wake(int cpu, struct task_struct *p)
+static unsigned long cpu_util_without(int cpu, struct task_struct *p)
 {
 	struct cfs_rq *cfs_rq;
 	unsigned int util;
@@ -6231,7 +6240,7 @@ static unsigned long cpu_util_wake(int cpu, struct task_struct *p)
 	cfs_rq = &cpu_rq(cpu)->cfs;
 	util = READ_ONCE(cfs_rq->avg.util_avg);
 
-	/* Discount task's blocked util from CPU's util */
+	/* Discount task's util from CPU's util */
 	util -= min_t(unsigned int, util, task_util(p));
 
 	/*
@@ -6240,14 +6249,14 @@ static unsigned long cpu_util_wake(int cpu, struct task_struct *p)
 	 * a) if *p is the only task sleeping on this CPU, then:
 	 *      cpu_util (== task_util) > util_est (== 0)
 	 *    and thus we return:
-	 *      cpu_util_wake = (cpu_util - task_util) = 0
+	 *      cpu_util_without = (cpu_util - task_util) = 0
 	 *
 	 * b) if other tasks are SLEEPING on this CPU, which is now exiting
 	 *    IDLE, then:
 	 *      cpu_util >= task_util
 	 *      cpu_util > util_est (== 0)
 	 *    and thus we discount *p's blocked utilization to return:
-	 *      cpu_util_wake = (cpu_util - task_util) >= 0
+	 *      cpu_util_without = (cpu_util - task_util) >= 0
 	 *
 	 * c) if other tasks are RUNNABLE on that CPU and
 	 *      util_est > cpu_util
@@ -6260,8 +6269,33 @@ static unsigned long cpu_util_wake(int cpu, struct task_struct *p)
 	 * covered by the following code when estimated utilization is
 	 * enabled.
 	 */
-	if (sched_feat(UTIL_EST))
-		util = max(util, READ_ONCE(cfs_rq->avg.util_est.enqueued));
+	if (sched_feat(UTIL_EST)) {
+		unsigned int estimated =
+			READ_ONCE(cfs_rq->avg.util_est.enqueued);
+
+		/*
+		 * Despite the following checks we still have a small window
+		 * for a possible race, when an execl's select_task_rq_fair()
+		 * races with LB's detach_task():
+		 *
+		 *   detach_task()
+		 *     p->on_rq = TASK_ON_RQ_MIGRATING;
+		 *     ---------------------------------- A
+		 *     deactivate_task()                   \
+		 *       dequeue_task()                     + RaceTime
+		 *         util_est_dequeue()              /
+		 *     ---------------------------------- B
+		 *
+		 * The additional check on "current == p" it's required to
+		 * properly fix the execl regression and it helps in further
+		 * reducing the chances for the above race.
+		 */
+		if (unlikely(task_on_rq_queued(p) || current == p)) {
+			estimated -= min_t(unsigned int, estimated,
+					   (_task_util_est(p) | UTIL_AVG_UNCHANGED));
+		}
+		util = max(util, estimated);
+	}
 
 	/*
 	 * Utilization (estimated) can exceed the CPU capacity, thus let's
-- 
GitLab


From c10a8de0d32e95b0b8c7c17b6dc09baea5a5a899 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Fri, 19 Oct 2018 10:04:18 -0700
Subject: [PATCH 0964/1890] perf/x86/intel/uncore: Add more IMC PCI IDs for
 KabyLake and CoffeeLake CPUs

KabyLake and CoffeeLake CPUs have the same client uncore events as SkyLake.

Add the PCI IDs for the KabyLake Y, U, S processor lines and CoffeeLake U,
H, S processor lines.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/20181019170419.378-1-kan.liang@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/uncore_snb.c | 115 ++++++++++++++++++++++++++++-
 1 file changed, 114 insertions(+), 1 deletion(-)

diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index 8527c3e1038b7..bfa25814fe5f2 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -15,6 +15,25 @@
 #define PCI_DEVICE_ID_INTEL_SKL_HQ_IMC	0x1910
 #define PCI_DEVICE_ID_INTEL_SKL_SD_IMC	0x190f
 #define PCI_DEVICE_ID_INTEL_SKL_SQ_IMC	0x191f
+#define PCI_DEVICE_ID_INTEL_KBL_Y_IMC	0x590c
+#define PCI_DEVICE_ID_INTEL_KBL_U_IMC	0x5904
+#define PCI_DEVICE_ID_INTEL_KBL_UQ_IMC	0x5914
+#define PCI_DEVICE_ID_INTEL_KBL_SD_IMC	0x590f
+#define PCI_DEVICE_ID_INTEL_KBL_SQ_IMC	0x591f
+#define PCI_DEVICE_ID_INTEL_CFL_2U_IMC	0x3ecc
+#define PCI_DEVICE_ID_INTEL_CFL_4U_IMC	0x3ed0
+#define PCI_DEVICE_ID_INTEL_CFL_4H_IMC	0x3e10
+#define PCI_DEVICE_ID_INTEL_CFL_6H_IMC	0x3ec4
+#define PCI_DEVICE_ID_INTEL_CFL_2S_D_IMC	0x3e0f
+#define PCI_DEVICE_ID_INTEL_CFL_4S_D_IMC	0x3e1f
+#define PCI_DEVICE_ID_INTEL_CFL_6S_D_IMC	0x3ec2
+#define PCI_DEVICE_ID_INTEL_CFL_8S_D_IMC	0x3e30
+#define PCI_DEVICE_ID_INTEL_CFL_4S_W_IMC	0x3e18
+#define PCI_DEVICE_ID_INTEL_CFL_6S_W_IMC	0x3ec6
+#define PCI_DEVICE_ID_INTEL_CFL_8S_W_IMC	0x3e31
+#define PCI_DEVICE_ID_INTEL_CFL_4S_S_IMC	0x3e33
+#define PCI_DEVICE_ID_INTEL_CFL_6S_S_IMC	0x3eca
+#define PCI_DEVICE_ID_INTEL_CFL_8S_S_IMC	0x3e32
 
 /* SNB event control */
 #define SNB_UNC_CTL_EV_SEL_MASK			0x000000ff
@@ -569,7 +588,82 @@ static const struct pci_device_id skl_uncore_pci_ids[] = {
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_SQ_IMC),
 		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
 	},
-
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_Y_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_U_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_UQ_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_SD_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_SQ_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_2U_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4U_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4H_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6H_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_2S_D_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4S_D_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6S_D_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_D_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4S_W_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6S_W_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_W_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4S_S_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6S_S_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_S_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
 	{ /* end: all zeroes */ },
 };
 
@@ -618,6 +712,25 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
 	IMC_DEV(SKL_HQ_IMC, &skl_uncore_pci_driver),  /* 6th Gen Core H Quad Core */
 	IMC_DEV(SKL_SD_IMC, &skl_uncore_pci_driver),  /* 6th Gen Core S Dual Core */
 	IMC_DEV(SKL_SQ_IMC, &skl_uncore_pci_driver),  /* 6th Gen Core S Quad Core */
+	IMC_DEV(KBL_Y_IMC, &skl_uncore_pci_driver),  /* 7th Gen Core Y */
+	IMC_DEV(KBL_U_IMC, &skl_uncore_pci_driver),  /* 7th Gen Core U */
+	IMC_DEV(KBL_UQ_IMC, &skl_uncore_pci_driver),  /* 7th Gen Core U Quad Core */
+	IMC_DEV(KBL_SD_IMC, &skl_uncore_pci_driver),  /* 7th Gen Core S Dual Core */
+	IMC_DEV(KBL_SQ_IMC, &skl_uncore_pci_driver),  /* 7th Gen Core S Quad Core */
+	IMC_DEV(CFL_2U_IMC, &skl_uncore_pci_driver),  /* 8th Gen Core U 2 Cores */
+	IMC_DEV(CFL_4U_IMC, &skl_uncore_pci_driver),  /* 8th Gen Core U 4 Cores */
+	IMC_DEV(CFL_4H_IMC, &skl_uncore_pci_driver),  /* 8th Gen Core H 4 Cores */
+	IMC_DEV(CFL_6H_IMC, &skl_uncore_pci_driver),  /* 8th Gen Core H 6 Cores */
+	IMC_DEV(CFL_2S_D_IMC, &skl_uncore_pci_driver),  /* 8th Gen Core S 2 Cores Desktop */
+	IMC_DEV(CFL_4S_D_IMC, &skl_uncore_pci_driver),  /* 8th Gen Core S 4 Cores Desktop */
+	IMC_DEV(CFL_6S_D_IMC, &skl_uncore_pci_driver),  /* 8th Gen Core S 6 Cores Desktop */
+	IMC_DEV(CFL_8S_D_IMC, &skl_uncore_pci_driver),  /* 8th Gen Core S 8 Cores Desktop */
+	IMC_DEV(CFL_4S_W_IMC, &skl_uncore_pci_driver),  /* 8th Gen Core S 4 Cores Work Station */
+	IMC_DEV(CFL_6S_W_IMC, &skl_uncore_pci_driver),  /* 8th Gen Core S 6 Cores Work Station */
+	IMC_DEV(CFL_8S_W_IMC, &skl_uncore_pci_driver),  /* 8th Gen Core S 8 Cores Work Station */
+	IMC_DEV(CFL_4S_S_IMC, &skl_uncore_pci_driver),  /* 8th Gen Core S 4 Cores Server */
+	IMC_DEV(CFL_6S_S_IMC, &skl_uncore_pci_driver),  /* 8th Gen Core S 6 Cores Server */
+	IMC_DEV(CFL_8S_S_IMC, &skl_uncore_pci_driver),  /* 8th Gen Core S 8 Cores Server */
 	{  /* end marker */ }
 };
 
-- 
GitLab


From 4d47d6407ac7b4b442a4e717488a3bb137398b6c Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Fri, 19 Oct 2018 10:04:19 -0700
Subject: [PATCH 0965/1890] perf/x86/intel/uncore: Support CoffeeLake 8th CBOX

Coffee Lake has 8 core products which has 8 Cboxes. The 8th CBOX is
mapped into different MSR space.

Increase the num_boxes to 8 to handle the new products. It will not
impact the previous platforms, SkyLake, KabyLake and earlier CoffeeLake.
Because the num_boxes will be recalculated in uncore_cpu_init and
doesn't exceed the x86_max_cores.

Introduce a new box flag bit to indicate the 8th CBOX.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/20181019170419.378-2-kan.liang@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/uncore.h     | 33 ++++++++++++++++++++++--------
 arch/x86/events/intel/uncore_snb.c |  6 +++++-
 2 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index e17ab885b1e92..cb46d602a6b8b 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -129,8 +129,15 @@ struct intel_uncore_box {
 	struct intel_uncore_extra_reg shared_regs[0];
 };
 
-#define UNCORE_BOX_FLAG_INITIATED	0
-#define UNCORE_BOX_FLAG_CTL_OFFS8	1 /* event config registers are 8-byte apart */
+/* CFL uncore 8th cbox MSRs */
+#define CFL_UNC_CBO_7_PERFEVTSEL0		0xf70
+#define CFL_UNC_CBO_7_PER_CTR0			0xf76
+
+#define UNCORE_BOX_FLAG_INITIATED		0
+/* event config registers are 8-byte apart */
+#define UNCORE_BOX_FLAG_CTL_OFFS8		1
+/* CFL 8th CBOX has different MSR space */
+#define UNCORE_BOX_FLAG_CFL8_CBOX_MSR_OFFS	2
 
 struct uncore_event_desc {
 	struct kobj_attribute attr;
@@ -297,17 +304,27 @@ unsigned int uncore_freerunning_counter(struct intel_uncore_box *box,
 static inline
 unsigned uncore_msr_event_ctl(struct intel_uncore_box *box, int idx)
 {
-	return box->pmu->type->event_ctl +
-		(box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) +
-		uncore_msr_box_offset(box);
+	if (test_bit(UNCORE_BOX_FLAG_CFL8_CBOX_MSR_OFFS, &box->flags)) {
+		return CFL_UNC_CBO_7_PERFEVTSEL0 +
+		       (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx);
+	} else {
+		return box->pmu->type->event_ctl +
+		       (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) +
+		       uncore_msr_box_offset(box);
+	}
 }
 
 static inline
 unsigned uncore_msr_perf_ctr(struct intel_uncore_box *box, int idx)
 {
-	return box->pmu->type->perf_ctr +
-		(box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) +
-		uncore_msr_box_offset(box);
+	if (test_bit(UNCORE_BOX_FLAG_CFL8_CBOX_MSR_OFFS, &box->flags)) {
+		return CFL_UNC_CBO_7_PER_CTR0 +
+		       (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx);
+	} else {
+		return box->pmu->type->perf_ctr +
+		       (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) +
+		       uncore_msr_box_offset(box);
+	}
 }
 
 static inline
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index bfa25814fe5f2..2593b0d7aeee6 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -221,6 +221,10 @@ static void skl_uncore_msr_init_box(struct intel_uncore_box *box)
 		wrmsrl(SKL_UNC_PERF_GLOBAL_CTL,
 			SNB_UNC_GLOBAL_CTL_EN | SKL_UNC_GLOBAL_CTL_CORE_ALL);
 	}
+
+	/* The 8th CBOX has different MSR space */
+	if (box->pmu->pmu_idx == 7)
+		__set_bit(UNCORE_BOX_FLAG_CFL8_CBOX_MSR_OFFS, &box->flags);
 }
 
 static void skl_uncore_msr_enable_box(struct intel_uncore_box *box)
@@ -247,7 +251,7 @@ static struct intel_uncore_ops skl_uncore_msr_ops = {
 static struct intel_uncore_type skl_uncore_cbox = {
 	.name		= "cbox",
 	.num_counters   = 4,
-	.num_boxes	= 5,
+	.num_boxes	= 8,
 	.perf_ctr_bits	= 44,
 	.fixed_ctr_bits	= 48,
 	.perf_ctr	= SNB_UNC_CBO_0_PER_CTR0,
-- 
GitLab


From 1e9c75fb9c47a75a9aec0cd17db5f6dc36b58e00 Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Wed, 3 Oct 2018 10:18:33 -0400
Subject: [PATCH 0966/1890] mnt: fix __detach_mounts infinite loop

Since commit ff17fa561a04 ("d_invalidate(): unhash immediately")
immediately unhashes the dentry, we'll never return the mountpoint in
lookup_mountpoint(), which can lead to an unbreakable loop in
d_invalidate().

I have reports of NFS clients getting into this condition after the server
removes an export of an existing mount created through follow_automount(),
but I suspect there are various other ways to produce this problem if we
hunt down users of d_invalidate().  For example, it is possible to get into
this state by using XFS' d_invalidate() call in xfs_vn_unlink():

truncate -s 100m img{1,2}

mkfs.xfs -q -n version=ci img1
mkfs.xfs -q -n version=ci img2

mkdir -p /mnt/xfs
mount img1 /mnt/xfs

mkdir /mnt/xfs/sub1
mount img2 /mnt/xfs/sub1

cat > /mnt/xfs/sub1/foo &
umount -l /mnt/xfs/sub1
mount img2 /mnt/xfs/sub1

mount --make-private /mnt/xfs

mkdir /mnt/xfs/sub2
mount --move /mnt/xfs/sub1 /mnt/xfs/sub2
rmdir /mnt/xfs/sub1

Fix this by moving the check for an unlinked dentry out of the
detach_mounts() path.

Fixes: ff17fa561a04 ("d_invalidate(): unhash immediately")
Cc: stable@vger.kernel.org
Reviewed-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 fs/namespace.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index 74f64294a4108..a7f91265ea671 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -695,9 +695,6 @@ static struct mountpoint *lookup_mountpoint(struct dentry *dentry)
 
 	hlist_for_each_entry(mp, chain, m_hash) {
 		if (mp->m_dentry == dentry) {
-			/* might be worth a WARN_ON() */
-			if (d_unlinked(dentry))
-				return ERR_PTR(-ENOENT);
 			mp->m_count++;
 			return mp;
 		}
@@ -711,6 +708,9 @@ static struct mountpoint *get_mountpoint(struct dentry *dentry)
 	int ret;
 
 	if (d_mountpoint(dentry)) {
+		/* might be worth a WARN_ON() */
+		if (d_unlinked(dentry))
+			return ERR_PTR(-ENOENT);
 mountpoint:
 		read_seqlock_excl(&mount_lock);
 		mp = lookup_mountpoint(dentry);
-- 
GitLab


From 563785edfcef02b566e64fb5292c74c1600808aa Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 12 Nov 2018 09:43:12 +0100
Subject: [PATCH 0967/1890] ALSA: hda/realtek - Add quirk entry for HP Pavilion
 15

HP Pavilion 15 (103c:820d) with ALC295 codec requires the quirk for
the mute LED control over mic3 pin.  Added the corresponding quirk
entry.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=201653
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index fa61674a56050..970bc44a378b8 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -6481,6 +6481,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x103c, 0x2336, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK(0x103c, 0x2337, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
 	SND_PCI_QUIRK(0x103c, 0x221c, "HP EliteBook 755 G2", ALC280_FIXUP_HP_HEADSET_MIC),
+	SND_PCI_QUIRK(0x103c, 0x820d, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3),
 	SND_PCI_QUIRK(0x103c, 0x8256, "HP", ALC221_FIXUP_HP_FRONT_MIC),
 	SND_PCI_QUIRK(0x103c, 0x827e, "HP x360", ALC295_FIXUP_HP_X360),
 	SND_PCI_QUIRK(0x103c, 0x82bf, "HP", ALC221_FIXUP_HP_MIC_NO_PRESENCE),
-- 
GitLab


From def40774f63ad446aaf5c12e2185045979c06c75 Mon Sep 17 00:00:00 2001
From: Xinyun Liu <xinyun.liu@intel.com>
Date: Mon, 29 Oct 2018 14:18:25 +0800
Subject: [PATCH 0968/1890] drm/i915/gvt: not to touch undefined MOCS registers

Some engines are not available for all Gens. eg, Gen11 introduced
VCS3/VCS4/VECS2, and VCS2 is not supported on some Gen9 machines. So need to
add check before access them.

Signed-off-by: Xinyun Liu <xinyun.liu@intel.com>
Signed-off-by: Yakui Zhao <Yakui.Zhao@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/mmio_context.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
index 088a62ab2bc89..cdd366d44938b 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.c
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -171,6 +171,8 @@ static void load_render_mocs(struct drm_i915_private *dev_priv)
 	int ring_id, i;
 
 	for (ring_id = 0; ring_id < ARRAY_SIZE(regs); ring_id++) {
+		if (!HAS_ENGINE(dev_priv, ring_id))
+			continue;
 		offset.reg = regs[ring_id];
 		for (i = 0; i < GEN9_MOCS_SIZE; i++) {
 			gen9_render_mocs.control_table[ring_id][i] =
-- 
GitLab


From f3be657d96b0709c832b165501170f072882df3c Mon Sep 17 00:00:00 2001
From: Hang Yuan <hang.yuan@linux.intel.com>
Date: Tue, 30 Oct 2018 13:12:23 +0800
Subject: [PATCH 0969/1890] drm/i915/gvt: ensure gpu is powered before do
 i915_gem_gtt_insert

i915_gem_gtt_insert may evict some vmas and access HW if ggtt
vm space is not enough. So add mmio_hw_access_pre before invoke
i915_gem_gtt_insert to avoid call trace like below in vgpu create/
destroy test.

WARNING: CPU: 6 PID: 8720 at drivers/gpu/drm/i915/intel_drv.h:1768
assert_rpm_wakelock_held.part.2+0x27/0x30 [i915]
RPM wakelock ref not held during HW access

Call Trace:
  [<ffffffff99af3b22>] dump_stack+0x19/0x1b
  [<ffffffff9948e338>] __warn+0xd8/0x100
  [<ffffffff9948e3bf>] warn_slowpath_fmt+0x5f/0x80
  [<ffffffffc0d5cc32>] assert_rpm_wakelock_held.part.2+0x27/0x30 [i915]
  [<ffffffffc0c7ffcf>] intel_runtime_pm_get_noresume+0x6f/0x80 [i915]
  [<ffffffffc0ca614d>] i915_gem_request_alloc+0x2dd/0x3c0 [i915]
  [<ffffffffc0c9056e>] i915_gem_switch_to_kernel_context+0xae/0x1d0 [i915]
  [<ffffffffc0c91572>] ggtt_flush+0x12/0x30 [i915]
  [<ffffffffc0c917ef>] i915_gem_evict_something+0x25f/0x470 [i915]
  [<ffffffffc0c9b62c>] i915_gem_gtt_insert+0x15c/0x1c0 [i915]
  [<ffffffffc0d35837>] alloc_gm+0xa7/0x160 [i915]
  [<ffffffffc0d35d8d>] intel_vgpu_alloc_resource+0x1ad/0x410 [i915]
  [<ffffffffc0d4819c>] intel_gvt_create_vgpu+0x16c/0x260 [i915]
  [<ffffffffc055d980>] intel_vgpu_create+0x50/0x140 [kvmgt]
  [<ffffffffc04fc6fa>] mdev_device_create+0x1aa/0x2e0 [mdev]

v2: use mmio_hw_access_pre/post <Zhenyu>

Signed-off-by: Hang Yuan <hang.yuan@linux.intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/aperture_gm.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c
index fe754022e356b..359d37d5c958c 100644
--- a/drivers/gpu/drm/i915/gvt/aperture_gm.c
+++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c
@@ -61,10 +61,12 @@ static int alloc_gm(struct intel_vgpu *vgpu, bool high_gm)
 	}
 
 	mutex_lock(&dev_priv->drm.struct_mutex);
+	mmio_hw_access_pre(dev_priv);
 	ret = i915_gem_gtt_insert(&dev_priv->ggtt.vm, node,
 				  size, I915_GTT_PAGE_SIZE,
 				  I915_COLOR_UNEVICTABLE,
 				  start, end, flags);
+	mmio_hw_access_post(dev_priv);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 	if (ret)
 		gvt_err("fail to alloc %s gm space from host\n",
-- 
GitLab


From f4156f9656feac21f4de712fac94fae964c5d402 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Tue, 30 Oct 2018 12:17:10 +0100
Subject: [PATCH 0970/1890] batman-adv: Use explicit tvlv padding for ELP
 packets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The announcement messages of batman-adv COMPAT_VERSION 15 have the
possibility to announce additional information via a dynamic TVLV part.
This part is optional for the ELP packets and currently not parsed by the
Linux implementation. Still out-of-tree versions are using it to transport
things like neighbor hashes to optimize the rebroadcast behavior.

Since the ELP broadcast packets are smaller than the minimal ethernet
packet, it often has to be padded. This is often done (as specified in
RFC894) with octets of zero and thus work perfectly fine with the TVLV
part (making it a zero length and thus empty). But not all ethernet
compatible hardware seems to follow this advice. To avoid ambiguous
situations when parsing the TVLV header, just force the 4 bytes (TVLV
length + padding) after the required ELP header to zero.

Fixes: d6f94d91f766 ("batman-adv: ELP - adding basic infrastructure")
Reported-by: Linus LÃ¼ssing <linus.luessing@c0d3.blue>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/bat_v_elp.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index 9f481cfdf77da..e8090f099eb80 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -352,19 +352,21 @@ static void batadv_v_elp_periodic_work(struct work_struct *work)
  */
 int batadv_v_elp_iface_enable(struct batadv_hard_iface *hard_iface)
 {
+	static const size_t tvlv_padding = sizeof(__be32);
 	struct batadv_elp_packet *elp_packet;
 	unsigned char *elp_buff;
 	u32 random_seqno;
 	size_t size;
 	int res = -ENOMEM;
 
-	size = ETH_HLEN + NET_IP_ALIGN + BATADV_ELP_HLEN;
+	size = ETH_HLEN + NET_IP_ALIGN + BATADV_ELP_HLEN + tvlv_padding;
 	hard_iface->bat_v.elp_skb = dev_alloc_skb(size);
 	if (!hard_iface->bat_v.elp_skb)
 		goto out;
 
 	skb_reserve(hard_iface->bat_v.elp_skb, ETH_HLEN + NET_IP_ALIGN);
-	elp_buff = skb_put_zero(hard_iface->bat_v.elp_skb, BATADV_ELP_HLEN);
+	elp_buff = skb_put_zero(hard_iface->bat_v.elp_skb,
+				BATADV_ELP_HLEN + tvlv_padding);
 	elp_packet = (struct batadv_elp_packet *)elp_buff;
 
 	elp_packet->packet_type = BATADV_ELP;
-- 
GitLab


From d7d8bbb40a5b1f682ee6589e212934f4c6b8ad60 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Wed, 7 Nov 2018 23:09:12 +0100
Subject: [PATCH 0971/1890] batman-adv: Expand merged fragment buffer for full
 packet
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The complete size ("total_size") of the fragmented packet is stored in the
fragment header and in the size of the fragment chain. When the fragments
are ready for merge, the skbuff's tail of the first fragment is expanded to
have enough room after the data pointer for at least total_size. This means
that it gets expanded by total_size - first_skb->len.

But this is ignoring the fact that after expanding the buffer, the fragment
header is pulled by from this buffer. Assuming that the tailroom of the
buffer was already 0, the buffer after the data pointer of the skbuff is
now only total_size - len(fragment_header) large. When the merge function
is then processing the remaining fragments, the code to copy the data over
to the merged skbuff will cause an skb_over_panic when it tries to actually
put enough data to fill the total_size bytes of the packet.

The size of the skb_pull must therefore also be taken into account when the
buffer's tailroom is expanded.

Fixes: 610bfc6bc99b ("batman-adv: Receive fragmented packets and merge")
Reported-by: Martin Weinelt <martin@darmstadt.freifunk.net>
Co-authored-by: Linus LÃ¼ssing <linus.luessing@c0d3.blue>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/fragmentation.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 0fddc17106bd8..5b71a289d04fc 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -275,7 +275,7 @@ batadv_frag_merge_packets(struct hlist_head *chain)
 	kfree(entry);
 
 	packet = (struct batadv_frag_packet *)skb_out->data;
-	size = ntohs(packet->total_size);
+	size = ntohs(packet->total_size) + hdr_size;
 
 	/* Make room for the rest of the fragments. */
 	if (pskb_expand_head(skb_out, 0, size - skb_out->len, GFP_ATOMIC) < 0) {
-- 
GitLab


From e0c827aca0730b51f38081aa4e8ecf0912aab55f Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Sat, 10 Nov 2018 13:16:51 +0200
Subject: [PATCH 0972/1890] drm/omap: Populate DSS children in omapdss driver

The DSS DT node contains children that describe the DSS components
(DISPC and internal encoders). Each of those components is handled by a
platform driver, and thus needs to be backed by a platform device.

The corresponding platform devices are created in mach-omap2 code by a
call to of_platform_populate(). While this approach has worked so far,
it doesn't model the hardware architecture very well, as it creates
child devices before the parent is ready to handle them. This would be
akin to creating I2C slaves before the I2C master is available.

The task can be easily performed in the omapdss driver code instead,
simplifying mach-omap2 code. We however can't remove the mach-omap2 code
completely as the omap2fb driver still depends on it, but we can move it
to the omap2fb-specific section, where it can stay until the omap2fb
driver gets removed.

This has the added benefit of not allowing DSS components to probe
before the DSS itself, which led to runtime PM issues when the DSS probe
is deferred.

Fixes: 27d624527d99 ("drm/omap: dss: Acquire next dssdev at probe time")
Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Reviewed-by: Sebastian Reichel <sebastian.reichel@collabora.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181110111654.4387-2-laurent.pinchart@ideasonboard.com
---
 arch/arm/mach-omap2/display.c     | 111 ++++++++++++++----------------
 drivers/gpu/drm/omapdrm/dss/dss.c |  11 ++-
 2 files changed, 63 insertions(+), 59 deletions(-)

diff --git a/arch/arm/mach-omap2/display.c b/arch/arm/mach-omap2/display.c
index 9500b6e273801..f86b72d1d59e5 100644
--- a/arch/arm/mach-omap2/display.c
+++ b/arch/arm/mach-omap2/display.c
@@ -209,11 +209,61 @@ static int __init omapdss_init_fbdev(void)
 
 	return 0;
 }
-#else
-static inline int omapdss_init_fbdev(void)
+
+static const char * const omapdss_compat_names[] __initconst = {
+	"ti,omap2-dss",
+	"ti,omap3-dss",
+	"ti,omap4-dss",
+	"ti,omap5-dss",
+	"ti,dra7-dss",
+};
+
+static struct device_node * __init omapdss_find_dss_of_node(void)
 {
-	return 0;
+	struct device_node *node;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(omapdss_compat_names); ++i) {
+		node = of_find_compatible_node(NULL, NULL,
+			omapdss_compat_names[i]);
+		if (node)
+			return node;
+	}
+
+	return NULL;
 }
+
+static int __init omapdss_init_of(void)
+{
+	int r;
+	struct device_node *node;
+	struct platform_device *pdev;
+
+	/* only create dss helper devices if dss is enabled in the .dts */
+
+	node = omapdss_find_dss_of_node();
+	if (!node)
+		return 0;
+
+	if (!of_device_is_available(node))
+		return 0;
+
+	pdev = of_find_device_by_node(node);
+
+	if (!pdev) {
+		pr_err("Unable to find DSS platform device\n");
+		return -ENODEV;
+	}
+
+	r = of_platform_populate(node, NULL, NULL, &pdev->dev);
+	if (r) {
+		pr_err("Unable to populate DSS submodule devices\n");
+		return r;
+	}
+
+	return omapdss_init_fbdev();
+}
+omap_device_initcall(omapdss_init_of);
 #endif /* CONFIG_FB_OMAP2 */
 
 static void dispc_disable_outputs(void)
@@ -361,58 +411,3 @@ int omap_dss_reset(struct omap_hwmod *oh)
 
 	return r;
 }
-
-static const char * const omapdss_compat_names[] __initconst = {
-	"ti,omap2-dss",
-	"ti,omap3-dss",
-	"ti,omap4-dss",
-	"ti,omap5-dss",
-	"ti,dra7-dss",
-};
-
-static struct device_node * __init omapdss_find_dss_of_node(void)
-{
-	struct device_node *node;
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(omapdss_compat_names); ++i) {
-		node = of_find_compatible_node(NULL, NULL,
-			omapdss_compat_names[i]);
-		if (node)
-			return node;
-	}
-
-	return NULL;
-}
-
-static int __init omapdss_init_of(void)
-{
-	int r;
-	struct device_node *node;
-	struct platform_device *pdev;
-
-	/* only create dss helper devices if dss is enabled in the .dts */
-
-	node = omapdss_find_dss_of_node();
-	if (!node)
-		return 0;
-
-	if (!of_device_is_available(node))
-		return 0;
-
-	pdev = of_find_device_by_node(node);
-
-	if (!pdev) {
-		pr_err("Unable to find DSS platform device\n");
-		return -ENODEV;
-	}
-
-	r = of_platform_populate(node, NULL, NULL, &pdev->dev);
-	if (r) {
-		pr_err("Unable to populate DSS submodule devices\n");
-		return r;
-	}
-
-	return omapdss_init_fbdev();
-}
-omap_device_initcall(omapdss_init_of);
diff --git a/drivers/gpu/drm/omapdrm/dss/dss.c b/drivers/gpu/drm/omapdrm/dss/dss.c
index 1aaf260aa9b86..7553c7fc1c457 100644
--- a/drivers/gpu/drm/omapdrm/dss/dss.c
+++ b/drivers/gpu/drm/omapdrm/dss/dss.c
@@ -1484,16 +1484,23 @@ static int dss_probe(struct platform_device *pdev)
 						   dss);
 
 	/* Add all the child devices as components. */
+	r = of_platform_populate(pdev->dev.of_node, NULL, NULL, &pdev->dev);
+	if (r)
+		goto err_uninit_debugfs;
+
 	omapdss_gather_components(&pdev->dev);
 
 	device_for_each_child(&pdev->dev, &match, dss_add_child_component);
 
 	r = component_master_add_with_match(&pdev->dev, &dss_component_ops, match);
 	if (r)
-		goto err_uninit_debugfs;
+		goto err_of_depopulate;
 
 	return 0;
 
+err_of_depopulate:
+	of_platform_depopulate(&pdev->dev);
+
 err_uninit_debugfs:
 	dss_debugfs_remove_file(dss->debugfs.clk);
 	dss_debugfs_remove_file(dss->debugfs.dss);
@@ -1522,6 +1529,8 @@ static int dss_remove(struct platform_device *pdev)
 {
 	struct dss_device *dss = platform_get_drvdata(pdev);
 
+	of_platform_depopulate(&pdev->dev);
+
 	component_master_del(&pdev->dev, &dss_component_ops);
 
 	dss_debugfs_remove_file(dss->debugfs.clk);
-- 
GitLab


From f8523b64d2d2f94bb429c15166d7601d39243c4d Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Sat, 10 Nov 2018 13:16:52 +0200
Subject: [PATCH 0973/1890] drm/omap: hdmi4: Ensure the device is active during
 bind

The bind function performs hardware access (in hdmi4_cec_init()) and
thus requires the device to be active. Ensure this by surrounding the
bind function by hdmi_runtime_get() and hdmi_runtime_put() calls.

Fixes: 27d624527d99 ("drm/omap: dss: Acquire next dssdev at probe time")
Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Reviewed-by: Sebastian Reichel <sebastian.reichel@collabora.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181110111654.4387-3-laurent.pinchart@ideasonboard.com
---
 drivers/gpu/drm/omapdrm/dss/hdmi4.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi4.c b/drivers/gpu/drm/omapdrm/dss/hdmi4.c
index cf6230eac31a3..073fa462930a2 100644
--- a/drivers/gpu/drm/omapdrm/dss/hdmi4.c
+++ b/drivers/gpu/drm/omapdrm/dss/hdmi4.c
@@ -635,10 +635,14 @@ static int hdmi4_bind(struct device *dev, struct device *master, void *data)
 
 	hdmi->dss = dss;
 
-	r = hdmi_pll_init(dss, hdmi->pdev, &hdmi->pll, &hdmi->wp);
+	r = hdmi_runtime_get(hdmi);
 	if (r)
 		return r;
 
+	r = hdmi_pll_init(dss, hdmi->pdev, &hdmi->pll, &hdmi->wp);
+	if (r)
+		goto err_runtime_put;
+
 	r = hdmi4_cec_init(hdmi->pdev, &hdmi->core, &hdmi->wp);
 	if (r)
 		goto err_pll_uninit;
@@ -652,12 +656,16 @@ static int hdmi4_bind(struct device *dev, struct device *master, void *data)
 	hdmi->debugfs = dss_debugfs_create_file(dss, "hdmi", hdmi_dump_regs,
 					       hdmi);
 
+	hdmi_runtime_put(hdmi);
+
 	return 0;
 
 err_cec_uninit:
 	hdmi4_cec_uninit(&hdmi->core);
 err_pll_uninit:
 	hdmi_pll_uninit(&hdmi->pll);
+err_runtime_put:
+	hdmi_runtime_put(hdmi);
 	return r;
 }
 
-- 
GitLab


From 350c03e880038bf60184500bab9025d3361c0b0e Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Sat, 10 Nov 2018 13:16:53 +0200
Subject: [PATCH 0974/1890] drm/omap: dsi: Ensure the device is active during
 probe

The probe function performs hardware access to read the number of
supported data lanes from a configuration register and thus requires the
device to be active. Ensure this by surrounding the access with
dsi_runtime_get() and dsi_runtime_put() calls.

Fixes: edb715dffdee ("drm/omap: dss: dsi: Move initialization code from bind to probe")
Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Reviewed-by: Sebastian Reichel <sebastian.reichel@collabora.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181110111654.4387-4-laurent.pinchart@ideasonboard.com
---
 drivers/gpu/drm/omapdrm/dss/dsi.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/omapdrm/dss/dsi.c b/drivers/gpu/drm/omapdrm/dss/dsi.c
index 394c129cfb3bb..b9d5ad7e67d8a 100644
--- a/drivers/gpu/drm/omapdrm/dss/dsi.c
+++ b/drivers/gpu/drm/omapdrm/dss/dsi.c
@@ -5409,11 +5409,14 @@ static int dsi_probe(struct platform_device *pdev)
 
 	/* DSI on OMAP3 doesn't have register DSI_GNQ, set number
 	 * of data to 3 by default */
-	if (dsi->data->quirks & DSI_QUIRK_GNQ)
+	if (dsi->data->quirks & DSI_QUIRK_GNQ) {
+		dsi_runtime_get(dsi);
 		/* NB_DATA_LANES */
 		dsi->num_lanes_supported = 1 + REG_GET(dsi, DSI_GNQ, 11, 9);
-	else
+		dsi_runtime_put(dsi);
+	} else {
 		dsi->num_lanes_supported = 3;
+	}
 
 	r = dsi_init_output(dsi);
 	if (r)
-- 
GitLab


From 24ec84e854c68ceda59a26027114eb7f260f9411 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Sat, 10 Nov 2018 13:16:54 +0200
Subject: [PATCH 0975/1890] drm/omap: Move DISPC runtime PM handling to omapdrm

The internal encoders (DSI, HDMI4, HDMI5 and VENC) runtime PM handlers
attempt to manage the runtime PM state of the connected DISPC, based on
the rationale that the DISPC providing data to the encoders requires
ensuring that the display is active whenever the encoders are active.

While the DISPC provides data to the encoders, it doesn't as such
constitute a resource that encoders require in order to be taken out
of suspend, contrary to for instance a functional clock or a power
supply. Encoders registers can be accessed without the DISPC being
active, and while the encoders will not output any video stream without
being fed by the DISPC, the DISPC PM state doesn't influence the
encoders PM state.

For this reason the DISPC PM state is better managed from the omapdrm
driver, in the CRTC enable and disable operations. This allows the
encoders PM state to be handled separately from the DISPC, and in
particular at times when the DISPC may not be available (for instance at
probe due to the DSS probe being deferred, or at remove time du to the
DISPC being already removed).

Fixes: edb715dffdee ("drm/omap: dss: dsi: Move initialization code from bind to probe")
Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Reviewed-by: Sebastian Reichel <sebastian.reichel@collabora.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181110111654.4387-5-laurent.pinchart@ideasonboard.com
---
 drivers/gpu/drm/omapdrm/dss/dsi.c   |  7 -------
 drivers/gpu/drm/omapdrm/dss/hdmi4.c | 27 ---------------------------
 drivers/gpu/drm/omapdrm/dss/hdmi5.c | 27 ---------------------------
 drivers/gpu/drm/omapdrm/dss/venc.c  |  7 -------
 drivers/gpu/drm/omapdrm/omap_crtc.c |  6 ++++++
 5 files changed, 6 insertions(+), 68 deletions(-)

diff --git a/drivers/gpu/drm/omapdrm/dss/dsi.c b/drivers/gpu/drm/omapdrm/dss/dsi.c
index b9d5ad7e67d8a..36123c086d97b 100644
--- a/drivers/gpu/drm/omapdrm/dss/dsi.c
+++ b/drivers/gpu/drm/omapdrm/dss/dsi.c
@@ -5473,19 +5473,12 @@ static int dsi_runtime_suspend(struct device *dev)
 	/* wait for current handler to finish before turning the DSI off */
 	synchronize_irq(dsi->irq);
 
-	dispc_runtime_put(dsi->dss->dispc);
-
 	return 0;
 }
 
 static int dsi_runtime_resume(struct device *dev)
 {
 	struct dsi_data *dsi = dev_get_drvdata(dev);
-	int r;
-
-	r = dispc_runtime_get(dsi->dss->dispc);
-	if (r)
-		return r;
 
 	dsi->is_enabled = true;
 	/* ensure the irq handler sees the is_enabled value */
diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi4.c b/drivers/gpu/drm/omapdrm/dss/hdmi4.c
index 073fa462930a2..aabdda394c9c6 100644
--- a/drivers/gpu/drm/omapdrm/dss/hdmi4.c
+++ b/drivers/gpu/drm/omapdrm/dss/hdmi4.c
@@ -841,32 +841,6 @@ static int hdmi4_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static int hdmi_runtime_suspend(struct device *dev)
-{
-	struct omap_hdmi *hdmi = dev_get_drvdata(dev);
-
-	dispc_runtime_put(hdmi->dss->dispc);
-
-	return 0;
-}
-
-static int hdmi_runtime_resume(struct device *dev)
-{
-	struct omap_hdmi *hdmi = dev_get_drvdata(dev);
-	int r;
-
-	r = dispc_runtime_get(hdmi->dss->dispc);
-	if (r < 0)
-		return r;
-
-	return 0;
-}
-
-static const struct dev_pm_ops hdmi_pm_ops = {
-	.runtime_suspend = hdmi_runtime_suspend,
-	.runtime_resume = hdmi_runtime_resume,
-};
-
 static const struct of_device_id hdmi_of_match[] = {
 	{ .compatible = "ti,omap4-hdmi", },
 	{},
@@ -877,7 +851,6 @@ struct platform_driver omapdss_hdmi4hw_driver = {
 	.remove		= hdmi4_remove,
 	.driver         = {
 		.name   = "omapdss_hdmi",
-		.pm	= &hdmi_pm_ops,
 		.of_match_table = hdmi_of_match,
 		.suppress_bind_attrs = true,
 	},
diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi5.c b/drivers/gpu/drm/omapdrm/dss/hdmi5.c
index b0e4a7463f8c8..9e8556f67a291 100644
--- a/drivers/gpu/drm/omapdrm/dss/hdmi5.c
+++ b/drivers/gpu/drm/omapdrm/dss/hdmi5.c
@@ -825,32 +825,6 @@ static int hdmi5_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static int hdmi_runtime_suspend(struct device *dev)
-{
-	struct omap_hdmi *hdmi = dev_get_drvdata(dev);
-
-	dispc_runtime_put(hdmi->dss->dispc);
-
-	return 0;
-}
-
-static int hdmi_runtime_resume(struct device *dev)
-{
-	struct omap_hdmi *hdmi = dev_get_drvdata(dev);
-	int r;
-
-	r = dispc_runtime_get(hdmi->dss->dispc);
-	if (r < 0)
-		return r;
-
-	return 0;
-}
-
-static const struct dev_pm_ops hdmi_pm_ops = {
-	.runtime_suspend = hdmi_runtime_suspend,
-	.runtime_resume = hdmi_runtime_resume,
-};
-
 static const struct of_device_id hdmi_of_match[] = {
 	{ .compatible = "ti,omap5-hdmi", },
 	{ .compatible = "ti,dra7-hdmi", },
@@ -862,7 +836,6 @@ struct platform_driver omapdss_hdmi5hw_driver = {
 	.remove		= hdmi5_remove,
 	.driver         = {
 		.name   = "omapdss_hdmi5",
-		.pm	= &hdmi_pm_ops,
 		.of_match_table = hdmi_of_match,
 		.suppress_bind_attrs = true,
 	},
diff --git a/drivers/gpu/drm/omapdrm/dss/venc.c b/drivers/gpu/drm/omapdrm/dss/venc.c
index ff0b18c8e4ace..b5f52727f8b17 100644
--- a/drivers/gpu/drm/omapdrm/dss/venc.c
+++ b/drivers/gpu/drm/omapdrm/dss/venc.c
@@ -946,19 +946,12 @@ static int venc_runtime_suspend(struct device *dev)
 	if (venc->tv_dac_clk)
 		clk_disable_unprepare(venc->tv_dac_clk);
 
-	dispc_runtime_put(venc->dss->dispc);
-
 	return 0;
 }
 
 static int venc_runtime_resume(struct device *dev)
 {
 	struct venc_device *venc = dev_get_drvdata(dev);
-	int r;
-
-	r = dispc_runtime_get(venc->dss->dispc);
-	if (r < 0)
-		return r;
 
 	if (venc->tv_dac_clk)
 		clk_prepare_enable(venc->tv_dac_clk);
diff --git a/drivers/gpu/drm/omapdrm/omap_crtc.c b/drivers/gpu/drm/omapdrm/omap_crtc.c
index 62928ec0e7db7..caffc547ef97e 100644
--- a/drivers/gpu/drm/omapdrm/omap_crtc.c
+++ b/drivers/gpu/drm/omapdrm/omap_crtc.c
@@ -350,11 +350,14 @@ static void omap_crtc_arm_event(struct drm_crtc *crtc)
 static void omap_crtc_atomic_enable(struct drm_crtc *crtc,
 				    struct drm_crtc_state *old_state)
 {
+	struct omap_drm_private *priv = crtc->dev->dev_private;
 	struct omap_crtc *omap_crtc = to_omap_crtc(crtc);
 	int ret;
 
 	DBG("%s", omap_crtc->name);
 
+	priv->dispc_ops->runtime_get(priv->dispc);
+
 	spin_lock_irq(&crtc->dev->event_lock);
 	drm_crtc_vblank_on(crtc);
 	ret = drm_crtc_vblank_get(crtc);
@@ -367,6 +370,7 @@ static void omap_crtc_atomic_enable(struct drm_crtc *crtc,
 static void omap_crtc_atomic_disable(struct drm_crtc *crtc,
 				     struct drm_crtc_state *old_state)
 {
+	struct omap_drm_private *priv = crtc->dev->dev_private;
 	struct omap_crtc *omap_crtc = to_omap_crtc(crtc);
 
 	DBG("%s", omap_crtc->name);
@@ -379,6 +383,8 @@ static void omap_crtc_atomic_disable(struct drm_crtc *crtc,
 	spin_unlock_irq(&crtc->dev->event_lock);
 
 	drm_crtc_vblank_off(crtc);
+
+	priv->dispc_ops->runtime_put(priv->dispc);
 }
 
 static enum drm_mode_status omap_crtc_mode_valid(struct drm_crtc *crtc,
-- 
GitLab


From cbed7545db7ae5907d7dc9d4002717d46cae29e9 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Tue, 6 Nov 2018 07:28:02 -0800
Subject: [PATCH 0976/1890] drm/omap: dsi: Fix missing of_platform_depopulate()

We're missing a call to of_platform_depopulate() on errors for dsi.
Looks like dss is already doing this.

Signed-off-by: Tony Lindgren <tony@atomide.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Reviewed-by: Sebastian Reichel <sebastian.reichel@collabora.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181106152802.38599-1-tony@atomide.com
---
 drivers/gpu/drm/omapdrm/dss/dsi.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/omapdrm/dss/dsi.c b/drivers/gpu/drm/omapdrm/dss/dsi.c
index 36123c086d97b..0a485c5b982eb 100644
--- a/drivers/gpu/drm/omapdrm/dss/dsi.c
+++ b/drivers/gpu/drm/omapdrm/dss/dsi.c
@@ -5429,15 +5429,19 @@ static int dsi_probe(struct platform_device *pdev)
 	}
 
 	r = of_platform_populate(dev->of_node, NULL, NULL, dev);
-	if (r)
+	if (r) {
 		DSSERR("Failed to populate DSI child devices: %d\n", r);
+		goto err_uninit_output;
+	}
 
 	r = component_add(&pdev->dev, &dsi_component_ops);
 	if (r)
-		goto err_uninit_output;
+		goto err_of_depopulate;
 
 	return 0;
 
+err_of_depopulate:
+	of_platform_depopulate(dev);
 err_uninit_output:
 	dsi_uninit_output(dsi);
 err_pm_disable:
-- 
GitLab


From f5fde094a7425ac062f22a94e8a711efa5074946 Mon Sep 17 00:00:00 2001
From: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Date: Mon, 15 Oct 2018 10:31:30 +0900
Subject: [PATCH 0977/1890] phy: uniphier-pcie: Depend on HAS_IOMEM

The driver uses devm_ioremap_resource() which is only available when
CONFIG_HAS_IOMEM is set, so the driver depends on this option.

Signed-off-by: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
---
 drivers/phy/socionext/Kconfig | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/phy/socionext/Kconfig b/drivers/phy/socionext/Kconfig
index 467e8147972b0..9c85231a6dbcc 100644
--- a/drivers/phy/socionext/Kconfig
+++ b/drivers/phy/socionext/Kconfig
@@ -26,7 +26,8 @@ config PHY_UNIPHIER_USB3
 
 config PHY_UNIPHIER_PCIE
 	tristate "Uniphier PHY driver for PCIe controller"
-	depends on (ARCH_UNIPHIER || COMPILE_TEST) && OF
+	depends on ARCH_UNIPHIER || COMPILE_TEST
+	depends on OF && HAS_IOMEM
 	default PCIE_UNIPHIER
 	select GENERIC_PHY
 	help
-- 
GitLab


From 7243ec72b9024b1ae98ed571db559a7cf0c5a0f4 Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Fri, 12 Oct 2018 14:36:32 -0700
Subject: [PATCH 0978/1890] dt-bindings: phy-qcom-qmp: Fix several mistakes
 from prior commits

Digging through the "phy-qcom-qmp" showed me many inconsistencies
between the bindings and the reality of the driver.  Let's fix them
all.

* In commit 2d66eab18375 ("dt-bindings: phy: qmp: Add support for QMP
  phy in IPQ8074") we probably should have explicitly listed that
  there are no clocks for this PHY and also added the reset names in
  alphabetical order.  You can see that there are no clocks in the
  driver where "clk_list" is NULL.

* In commit 8587b220f05e ("dt-bindings: phy-qcom-qmp: Update bindings
  for QMP V3 USB PHY") we probably should have listed the resets for
  this new PHY and also removed the "(Optional)" marking for the "cfg"
  reset since PHYs that need "cfg" really do need it.  It's just that
  not all PHYs need it.

* In commit 7f0802074120 ("dt-bindings: phy-qcom-qmp: Update bindings
  for sdm845") we forgot to update one instance of the string
  "qcom,qmp-v3-usb3-phy" to be "qcom,sdm845-qmp-usb3-phy".  Let's fix
  that.  We should also have added "qcom,sdm845-qmp-usb3-uni-phy" to
  the clock-names and reset-names lists.

* In commit 99c7c7364b71 ("dt-bindings: phy-qcom-qmp: Add UFS phy
  compatible string for sdm845") we should have added the set of
  clocks and resets for "qcom,sdm845-qmp-ufs-phy".  These were taken
  from the driver.

* Cleanup the wording for what properties child nodes have to make it
  more obvious which types of PHYs need clocks and resets.  This was
  sorta implicit in the "-names" description but I found myself
  confused.

* As per the code not all "pcie qmp phys" have resets.  Specifically
  note that the "has_lane_rst" property in the driver is false for
  "ipq8074-qmp-pcie-phy".  Thus make it clear exactly which PHYs need
  child nodes with resets.

Signed-off-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: Evan Green <evgreen@chromium.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
---
 .../devicetree/bindings/phy/qcom-qmp-phy.txt  | 31 ++++++++++++++-----
 1 file changed, 23 insertions(+), 8 deletions(-)

diff --git a/Documentation/devicetree/bindings/phy/qcom-qmp-phy.txt b/Documentation/devicetree/bindings/phy/qcom-qmp-phy.txt
index adf20b2bdf715..fbc198d5dd39e 100644
--- a/Documentation/devicetree/bindings/phy/qcom-qmp-phy.txt
+++ b/Documentation/devicetree/bindings/phy/qcom-qmp-phy.txt
@@ -40,24 +40,36 @@ Required properties:
 		"ref" for 19.2 MHz ref clk,
 		"com_aux" for phy common block aux clock,
 		"ref_aux" for phy reference aux clock,
+
+		For "qcom,ipq8074-qmp-pcie-phy": no clocks are listed.
 		For "qcom,msm8996-qmp-pcie-phy" must contain:
 			"aux", "cfg_ahb", "ref".
 		For "qcom,msm8996-qmp-usb3-phy" must contain:
 			"aux", "cfg_ahb", "ref".
-		For "qcom,qmp-v3-usb3-phy" must contain:
+		For "qcom,sdm845-qmp-usb3-phy" must contain:
+			"aux", "cfg_ahb", "ref", "com_aux".
+		For "qcom,sdm845-qmp-usb3-uni-phy" must contain:
 			"aux", "cfg_ahb", "ref", "com_aux".
+		For "qcom,sdm845-qmp-ufs-phy" must contain:
+			"ref", "ref_aux".
 
  - resets: a list of phandles and reset controller specifier pairs,
 	   one for each entry in reset-names.
  - reset-names: "phy" for reset of phy block,
 		"common" for phy common block reset,
-		"cfg" for phy's ahb cfg block reset (Optional).
+		"cfg" for phy's ahb cfg block reset.
+
+		For "qcom,ipq8074-qmp-pcie-phy" must contain:
+			"phy", "common".
 		For "qcom,msm8996-qmp-pcie-phy" must contain:
-		 "phy", "common", "cfg".
+			"phy", "common", "cfg".
 		For "qcom,msm8996-qmp-usb3-phy" must contain
-		 "phy", "common".
-		For "qcom,ipq8074-qmp-pcie-phy" must contain:
-		 "phy", "common".
+			"phy", "common".
+		For "qcom,sdm845-qmp-usb3-phy" must contain:
+			"phy", "common".
+		For "qcom,sdm845-qmp-usb3-uni-phy" must contain:
+			"phy", "common".
+		For "qcom,sdm845-qmp-ufs-phy": no resets are listed.
 
  - vdda-phy-supply: Phandle to a regulator supply to PHY core block.
  - vdda-pll-supply: Phandle to 1.8V regulator supply to PHY refclk pll block.
@@ -79,9 +91,10 @@ Required properties for child node:
 
  - #phy-cells: must be 0
 
+Required properties child node of pcie and usb3 qmp phys:
  - clocks: a list of phandles and clock-specifier pairs,
 	   one for each entry in clock-names.
- - clock-names: Must contain following for pcie and usb qmp phys:
+ - clock-names: Must contain following:
 		 "pipe<lane-number>" for pipe clock specific to each lane.
  - clock-output-names: Name of the PHY clock that will be the parent for
 		       the above pipe clock.
@@ -91,9 +104,11 @@ Required properties for child node:
 			(or)
 		  "pcie20_phy1_pipe_clk"
 
+Required properties for child node of PHYs with lane reset, AKA:
+	"qcom,msm8996-qmp-pcie-phy"
  - resets: a list of phandles and reset controller specifier pairs,
 	   one for each entry in reset-names.
- - reset-names: Must contain following for pcie qmp phys:
+ - reset-names: Must contain following:
 		 "lane<lane-number>" for reset specific to each lane.
 
 Example:
-- 
GitLab


From 899a42f836678a595f7d2bc36a5a0c2b03d08cbc Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Thu, 19 Jul 2018 11:42:36 +0100
Subject: [PATCH 0979/1890] ARM: make lookup_processor_type() non-__init

Move lookup_processor_type() out of the __init section so it is callable
from (eg) the secondary startup code during hotplug.

Reviewed-by: Julien Thierry <julien.thierry@arm.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/kernel/head-common.S | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S
index 6e0375e7db055..997b02302c314 100644
--- a/arch/arm/kernel/head-common.S
+++ b/arch/arm/kernel/head-common.S
@@ -145,6 +145,9 @@ __mmap_switched_data:
 #endif
 	.size	__mmap_switched_data, . - __mmap_switched_data
 
+	__FINIT
+	.text
+
 /*
  * This provides a C-API version of __lookup_processor_type
  */
@@ -156,9 +159,6 @@ ENTRY(lookup_processor_type)
 	ldmfd	sp!, {r4 - r6, r9, pc}
 ENDPROC(lookup_processor_type)
 
-	__FINIT
-	.text
-
 /*
  * Read processor ID register (CP#15, CR0), and look up in the linker-built
  * supported processor list.  Note that we can't use the absolute addresses
-- 
GitLab


From 65987a8553061515b5851b472081aedb9837a391 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Thu, 19 Jul 2018 11:59:56 +0100
Subject: [PATCH 0980/1890] ARM: split out processor lookup

Split out the lookup of the processor type and associated error handling
from the rest of setup_processor() - we will need to use this in the
secondary CPU bringup path for big.Little Spectre variant 2 mitigation.

Reviewed-by: Julien Thierry <julien.thierry@arm.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/include/asm/cputype.h |  1 +
 arch/arm/kernel/setup.c        | 31 +++++++++++++++++++------------
 2 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h
index 26021980504db..f6df4bb4e543f 100644
--- a/arch/arm/include/asm/cputype.h
+++ b/arch/arm/include/asm/cputype.h
@@ -107,6 +107,7 @@
 #define ARM_CPU_PART_SCORPION		0x510002d0
 
 extern unsigned int processor_id;
+struct proc_info_list *lookup_processor(u32 midr);
 
 #ifdef CONFIG_CPU_CP15
 #define read_cpuid(reg)							\
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index fc40a2b40595b..05a4eb6b0d01b 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -667,22 +667,29 @@ static void __init smp_build_mpidr_hash(void)
 }
 #endif
 
-static void __init setup_processor(void)
+/*
+ * locate processor in the list of supported processor types.  The linker
+ * builds this table for us from the entries in arch/arm/mm/proc-*.S
+ */
+struct proc_info_list *lookup_processor(u32 midr)
 {
-	struct proc_info_list *list;
+	struct proc_info_list *list = lookup_processor_type(midr);
 
-	/*
-	 * locate processor in the list of supported processor
-	 * types.  The linker builds this table for us from the
-	 * entries in arch/arm/mm/proc-*.S
-	 */
-	list = lookup_processor_type(read_cpuid_id());
 	if (!list) {
-		pr_err("CPU configuration botched (ID %08x), unable to continue.\n",
-		       read_cpuid_id());
-		while (1);
+		pr_err("CPU%u: configuration botched (ID %08x), CPU halted\n",
+		       smp_processor_id(), midr);
+		while (1)
+		/* can't use cpu_relax() here as it may require MMU setup */;
 	}
 
+	return list;
+}
+
+static void __init setup_processor(void)
+{
+	unsigned int midr = read_cpuid_id();
+	struct proc_info_list *list = lookup_processor(midr);
+
 	cpu_name = list->cpu_name;
 	__cpu_architecture = __get_cpu_architecture();
 
@@ -700,7 +707,7 @@ static void __init setup_processor(void)
 #endif
 
 	pr_info("CPU: %s [%08x] revision %d (ARMv%s), cr=%08lx\n",
-		cpu_name, read_cpuid_id(), read_cpuid_id() & 15,
+		list->cpu_name, midr, midr & 15,
 		proc_arch[cpu_architecture()], get_cr());
 
 	snprintf(init_utsname()->machine, __NEW_UTS_LEN + 1, "%s%c",
-- 
GitLab


From 945aceb1db8885d3a35790cf2e810f681db52756 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Thu, 19 Jul 2018 12:43:03 +0100
Subject: [PATCH 0981/1890] ARM: clean up per-processor check_bugs method call

Call the per-processor type check_bugs() method in the same way as we
do other per-processor functions - move the "processor." detail into
proc-fns.h.

Reviewed-by: Julien Thierry <julien.thierry@arm.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/include/asm/proc-fns.h | 1 +
 arch/arm/kernel/bugs.c          | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h
index e25f4392e1b28..30c499146320d 100644
--- a/arch/arm/include/asm/proc-fns.h
+++ b/arch/arm/include/asm/proc-fns.h
@@ -99,6 +99,7 @@ extern void cpu_do_suspend(void *);
 extern void cpu_do_resume(void *);
 #else
 #define cpu_proc_init			processor._proc_init
+#define cpu_check_bugs			processor.check_bugs
 #define cpu_proc_fin			processor._proc_fin
 #define cpu_reset			processor.reset
 #define cpu_do_idle			processor._do_idle
diff --git a/arch/arm/kernel/bugs.c b/arch/arm/kernel/bugs.c
index 7be5113101915..d41d3598e5e54 100644
--- a/arch/arm/kernel/bugs.c
+++ b/arch/arm/kernel/bugs.c
@@ -6,8 +6,8 @@
 void check_other_bugs(void)
 {
 #ifdef MULTI_CPU
-	if (processor.check_bugs)
-		processor.check_bugs();
+	if (cpu_check_bugs)
+		cpu_check_bugs();
 #endif
 }
 
-- 
GitLab


From e209950fdd065d2cc46e6338e47e52841b830cba Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Thu, 19 Jul 2018 12:17:38 +0100
Subject: [PATCH 0982/1890] ARM: add PROC_VTABLE and PROC_TABLE macros

Allow the way we access members of the processor vtable to be changed
at compile time.  We will need to move to per-CPU vtables to fix the
Spectre variant 2 issues on big.Little systems.

However, we have a couple of calls that do not need the vtable
treatment, and indeed cause a kernel warning due to the (later) use
of smp_processor_id(), so also introduce the PROC_TABLE macro for
these which always use CPU 0's function pointers.

Reviewed-by: Julien Thierry <julien.thierry@arm.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/include/asm/proc-fns.h | 39 ++++++++++++++++++++++-----------
 arch/arm/kernel/setup.c         |  4 +---
 2 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h
index 30c499146320d..c259cc49c6415 100644
--- a/arch/arm/include/asm/proc-fns.h
+++ b/arch/arm/include/asm/proc-fns.h
@@ -23,7 +23,7 @@ struct mm_struct;
 /*
  * Don't change this structure - ASM code relies on it.
  */
-extern struct processor {
+struct processor {
 	/* MISC
 	 * get data abort address/flags
 	 */
@@ -79,9 +79,13 @@ extern struct processor {
 	unsigned int suspend_size;
 	void (*do_suspend)(void *);
 	void (*do_resume)(void *);
-} processor;
+};
 
 #ifndef MULTI_CPU
+static inline void init_proc_vtable(const struct processor *p)
+{
+}
+
 extern void cpu_proc_init(void);
 extern void cpu_proc_fin(void);
 extern int cpu_do_idle(void);
@@ -98,18 +102,27 @@ extern void cpu_reset(unsigned long addr, bool hvc) __attribute__((noreturn));
 extern void cpu_do_suspend(void *);
 extern void cpu_do_resume(void *);
 #else
-#define cpu_proc_init			processor._proc_init
-#define cpu_check_bugs			processor.check_bugs
-#define cpu_proc_fin			processor._proc_fin
-#define cpu_reset			processor.reset
-#define cpu_do_idle			processor._do_idle
-#define cpu_dcache_clean_area		processor.dcache_clean_area
-#define cpu_set_pte_ext			processor.set_pte_ext
-#define cpu_do_switch_mm		processor.switch_mm
 
-/* These three are private to arch/arm/kernel/suspend.c */
-#define cpu_do_suspend			processor.do_suspend
-#define cpu_do_resume			processor.do_resume
+extern struct processor processor;
+#define PROC_VTABLE(f)			processor.f
+#define PROC_TABLE(f)			processor.f
+static inline void init_proc_vtable(const struct processor *p)
+{
+	processor = *p;
+}
+
+#define cpu_proc_init			PROC_VTABLE(_proc_init)
+#define cpu_check_bugs			PROC_VTABLE(check_bugs)
+#define cpu_proc_fin			PROC_VTABLE(_proc_fin)
+#define cpu_reset			PROC_VTABLE(reset)
+#define cpu_do_idle			PROC_VTABLE(_do_idle)
+#define cpu_dcache_clean_area		PROC_TABLE(dcache_clean_area)
+#define cpu_set_pte_ext			PROC_TABLE(set_pte_ext)
+#define cpu_do_switch_mm		PROC_VTABLE(switch_mm)
+
+/* These two are private to arch/arm/kernel/suspend.c */
+#define cpu_do_suspend			PROC_VTABLE(do_suspend)
+#define cpu_do_resume			PROC_VTABLE(do_resume)
 #endif
 
 extern void cpu_resume(void);
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 05a4eb6b0d01b..c214bd14a1fe3 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -693,9 +693,7 @@ static void __init setup_processor(void)
 	cpu_name = list->cpu_name;
 	__cpu_architecture = __get_cpu_architecture();
 
-#ifdef MULTI_CPU
-	processor = *list->proc;
-#endif
+	init_proc_vtable(list->proc);
 #ifdef MULTI_TLB
 	cpu_tlb = *list->tlb;
 #endif
-- 
GitLab


From 5df7a99bdd0de4a0480320264c44c04543c29d5a Mon Sep 17 00:00:00 2001
From: Julien Thierry <julien.thierry@arm.com>
Date: Thu, 8 Nov 2018 17:25:28 +0100
Subject: [PATCH 0983/1890] ARM: 8810/1: vfp: Fix wrong assignement to ufp_exc

In vfp_preserve_user_clear_hwstate, ufp_exc->fpinst2 gets assigned to
itself. It should actually be hwstate->fpinst2 that gets assigned to the
ufp_exc field.

Fixes commit 3aa2df6ec2ca6bc143a65351cca4266d03a8bc41 ("ARM: 8791/1:
vfp: use __copy_to_user() when saving VFP state").

Reported-by: David Binderman <dcb314@hotmail.com>
Signed-off-by: Julien Thierry <julien.thierry@arm.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/vfp/vfpmodule.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index 3b75f1d8a491a..15bc3cf2a7fdc 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -579,7 +579,7 @@ int vfp_preserve_user_clear_hwstate(struct user_vfp *ufp,
 	 */
 	ufp_exc->fpexc = hwstate->fpexc;
 	ufp_exc->fpinst = hwstate->fpinst;
-	ufp_exc->fpinst2 = ufp_exc->fpinst2;
+	ufp_exc->fpinst2 = hwstate->fpinst2;
 
 	/* Ensure that VFP is disabled. */
 	vfp_flush_hwstate(thread);
-- 
GitLab


From 383fb3ee8024d596f488d2dbaf45e572897acbdb Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Thu, 19 Jul 2018 12:21:31 +0100
Subject: [PATCH 0984/1890] ARM: spectre-v2: per-CPU vtables to work around
 big.Little systems

In big.Little systems, some CPUs require the Spectre workarounds in
paths such as the context switch, but other CPUs do not.  In order
to handle these differences, we need per-CPU vtables.

We are unable to use the kernel's per-CPU variables to support this
as per-CPU is not initialised at times when we need access to the
vtables, so we have to use an array indexed by logical CPU number.

We use an array-of-pointers to avoid having function pointers in
the kernel's read/write .data section.

Reviewed-by: Julien Thierry <julien.thierry@arm.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/include/asm/proc-fns.h | 23 +++++++++++++++++++++++
 arch/arm/kernel/setup.c         |  5 +++++
 arch/arm/kernel/smp.c           | 31 +++++++++++++++++++++++++++++++
 arch/arm/mm/proc-v7-bugs.c      | 17 ++---------------
 4 files changed, 61 insertions(+), 15 deletions(-)

diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h
index c259cc49c6415..e1b6f280ab088 100644
--- a/arch/arm/include/asm/proc-fns.h
+++ b/arch/arm/include/asm/proc-fns.h
@@ -104,12 +104,35 @@ extern void cpu_do_resume(void *);
 #else
 
 extern struct processor processor;
+#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR)
+#include <linux/smp.h>
+/*
+ * This can't be a per-cpu variable because we need to access it before
+ * per-cpu has been initialised.  We have a couple of functions that are
+ * called in a pre-emptible context, and so can't use smp_processor_id()
+ * there, hence PROC_TABLE().  We insist in init_proc_vtable() that the
+ * function pointers for these are identical across all CPUs.
+ */
+extern struct processor *cpu_vtable[];
+#define PROC_VTABLE(f)			cpu_vtable[smp_processor_id()]->f
+#define PROC_TABLE(f)			cpu_vtable[0]->f
+static inline void init_proc_vtable(const struct processor *p)
+{
+	unsigned int cpu = smp_processor_id();
+	*cpu_vtable[cpu] = *p;
+	WARN_ON_ONCE(cpu_vtable[cpu]->dcache_clean_area !=
+		     cpu_vtable[0]->dcache_clean_area);
+	WARN_ON_ONCE(cpu_vtable[cpu]->set_pte_ext !=
+		     cpu_vtable[0]->set_pte_ext);
+}
+#else
 #define PROC_VTABLE(f)			processor.f
 #define PROC_TABLE(f)			processor.f
 static inline void init_proc_vtable(const struct processor *p)
 {
 	processor = *p;
 }
+#endif
 
 #define cpu_proc_init			PROC_VTABLE(_proc_init)
 #define cpu_check_bugs			PROC_VTABLE(check_bugs)
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index c214bd14a1fe3..cd46a595422cf 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -115,6 +115,11 @@ EXPORT_SYMBOL(elf_hwcap2);
 
 #ifdef MULTI_CPU
 struct processor processor __ro_after_init;
+#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR)
+struct processor *cpu_vtable[NR_CPUS] = {
+	[0] = &processor,
+};
+#endif
 #endif
 #ifdef MULTI_TLB
 struct cpu_tlb_fns cpu_tlb __ro_after_init;
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 5ad0b67b9e334..82b879db32ee4 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -42,6 +42,7 @@
 #include <asm/mmu_context.h>
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
+#include <asm/procinfo.h>
 #include <asm/processor.h>
 #include <asm/sections.h>
 #include <asm/tlbflush.h>
@@ -102,6 +103,30 @@ static unsigned long get_arch_pgd(pgd_t *pgd)
 #endif
 }
 
+#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR)
+static int secondary_biglittle_prepare(unsigned int cpu)
+{
+	if (!cpu_vtable[cpu])
+		cpu_vtable[cpu] = kzalloc(sizeof(*cpu_vtable[cpu]), GFP_KERNEL);
+
+	return cpu_vtable[cpu] ? 0 : -ENOMEM;
+}
+
+static void secondary_biglittle_init(void)
+{
+	init_proc_vtable(lookup_processor(read_cpuid_id())->proc);
+}
+#else
+static int secondary_biglittle_prepare(unsigned int cpu)
+{
+	return 0;
+}
+
+static void secondary_biglittle_init(void)
+{
+}
+#endif
+
 int __cpu_up(unsigned int cpu, struct task_struct *idle)
 {
 	int ret;
@@ -109,6 +134,10 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
 	if (!smp_ops.smp_boot_secondary)
 		return -ENOSYS;
 
+	ret = secondary_biglittle_prepare(cpu);
+	if (ret)
+		return ret;
+
 	/*
 	 * We need to tell the secondary core where to find
 	 * its stack and the page tables.
@@ -360,6 +389,8 @@ asmlinkage void secondary_start_kernel(void)
 	struct mm_struct *mm = &init_mm;
 	unsigned int cpu;
 
+	secondary_biglittle_init();
+
 	/*
 	 * The identity mapping is uncached (strongly ordered), so
 	 * switch away from it before attempting any exclusive accesses.
diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c
index 5544b82a2e7a5..9a07916af8dd2 100644
--- a/arch/arm/mm/proc-v7-bugs.c
+++ b/arch/arm/mm/proc-v7-bugs.c
@@ -52,8 +52,6 @@ static void cpu_v7_spectre_init(void)
 	case ARM_CPU_PART_CORTEX_A17:
 	case ARM_CPU_PART_CORTEX_A73:
 	case ARM_CPU_PART_CORTEX_A75:
-		if (processor.switch_mm != cpu_v7_bpiall_switch_mm)
-			goto bl_error;
 		per_cpu(harden_branch_predictor_fn, cpu) =
 			harden_branch_predictor_bpiall;
 		spectre_v2_method = "BPIALL";
@@ -61,8 +59,6 @@ static void cpu_v7_spectre_init(void)
 
 	case ARM_CPU_PART_CORTEX_A15:
 	case ARM_CPU_PART_BRAHMA_B15:
-		if (processor.switch_mm != cpu_v7_iciallu_switch_mm)
-			goto bl_error;
 		per_cpu(harden_branch_predictor_fn, cpu) =
 			harden_branch_predictor_iciallu;
 		spectre_v2_method = "ICIALLU";
@@ -88,11 +84,9 @@ static void cpu_v7_spectre_init(void)
 					  ARM_SMCCC_ARCH_WORKAROUND_1, &res);
 			if ((int)res.a0 != 0)
 				break;
-			if (processor.switch_mm != cpu_v7_hvc_switch_mm && cpu)
-				goto bl_error;
 			per_cpu(harden_branch_predictor_fn, cpu) =
 				call_hvc_arch_workaround_1;
-			processor.switch_mm = cpu_v7_hvc_switch_mm;
+			cpu_do_switch_mm = cpu_v7_hvc_switch_mm;
 			spectre_v2_method = "hypervisor";
 			break;
 
@@ -101,11 +95,9 @@ static void cpu_v7_spectre_init(void)
 					  ARM_SMCCC_ARCH_WORKAROUND_1, &res);
 			if ((int)res.a0 != 0)
 				break;
-			if (processor.switch_mm != cpu_v7_smc_switch_mm && cpu)
-				goto bl_error;
 			per_cpu(harden_branch_predictor_fn, cpu) =
 				call_smc_arch_workaround_1;
-			processor.switch_mm = cpu_v7_smc_switch_mm;
+			cpu_do_switch_mm = cpu_v7_smc_switch_mm;
 			spectre_v2_method = "firmware";
 			break;
 
@@ -119,11 +111,6 @@ static void cpu_v7_spectre_init(void)
 	if (spectre_v2_method)
 		pr_info("CPU%u: Spectre v2: using %s workaround\n",
 			smp_processor_id(), spectre_v2_method);
-	return;
-
-bl_error:
-	pr_err("CPU%u: Spectre v2: incorrect context switching function, system vulnerable\n",
-		cpu);
 }
 #else
 static void cpu_v7_spectre_init(void)
-- 
GitLab


From d99501b8575dc1248bacf1b58d2241cb4b265d49 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 12 Nov 2018 12:26:57 +0100
Subject: [PATCH 0985/1890] ALSA: hda/ca0132 - Call pci_iounmap() instead of
 iounmap()

We need to call pci_iounmap() instead of iounmap() for the regions
obtained via pci_iomap() call for some archs that need special
treatment.

Fixes: aa31704fd81c ("ALSA: hda/ca0132: Add PCI region2 iomap for SBZ")
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_ca0132.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c
index 0a24037184c33..bdbbc51e8d187 100644
--- a/sound/pci/hda/patch_ca0132.c
+++ b/sound/pci/hda/patch_ca0132.c
@@ -8413,7 +8413,7 @@ static void ca0132_free(struct hda_codec *codec)
 
 	snd_hda_power_down(codec);
 	if (spec->mem_base)
-		iounmap(spec->mem_base);
+		pci_iounmap(codec->bus->pci, spec->mem_base);
 	kfree(spec->spec_init_verbs);
 	kfree(codec->spec);
 }
-- 
GitLab


From cc7ed49a7f39deafb33fc58d162a3365344c3458 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Thu, 8 Nov 2018 22:38:42 +0100
Subject: [PATCH 0986/1890] HID: hidraw: enforce minors_lock locking via
 lockdep

lockdep is much more powerful enforcing the locking rules than code comments,
so let's switch to it.

Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hidraw.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c
index 4a44e48e08b22..9fc51eff10790 100644
--- a/drivers/hid/hidraw.c
+++ b/drivers/hid/hidraw.c
@@ -107,8 +107,6 @@ static ssize_t hidraw_read(struct file *file, char __user *buffer, size_t count,
 
 /*
  * The first byte of the report buffer is expected to be a report number.
- *
- * This function is to be called with the minors_lock mutex held.
  */
 static ssize_t hidraw_send_report(struct file *file, const char __user *buffer, size_t count, unsigned char report_type)
 {
@@ -117,6 +115,8 @@ static ssize_t hidraw_send_report(struct file *file, const char __user *buffer,
 	__u8 *buf;
 	int ret = 0;
 
+	lockdep_assert_held(&minors_lock);
+
 	if (!hidraw_table[minor] || !hidraw_table[minor]->exist) {
 		ret = -ENODEV;
 		goto out;
@@ -181,8 +181,6 @@ static ssize_t hidraw_write(struct file *file, const char __user *buffer, size_t
  * of buffer is the report number to request, or 0x0 if the defice does not
  * use numbered reports. The report_type parameter can be HID_FEATURE_REPORT
  * or HID_INPUT_REPORT.
- *
- * This function is to be called with the minors_lock mutex held.
  */
 static ssize_t hidraw_get_report(struct file *file, char __user *buffer, size_t count, unsigned char report_type)
 {
@@ -192,6 +190,8 @@ static ssize_t hidraw_get_report(struct file *file, char __user *buffer, size_t
 	int ret = 0, len;
 	unsigned char report_number;
 
+	lockdep_assert_held(&minors_lock);
+
 	if (!hidraw_table[minor] || !hidraw_table[minor]->exist) {
 		ret = -ENODEV;
 		goto out;
-- 
GitLab


From ab99be4683d9db33b100497d463274ebd23bd67e Mon Sep 17 00:00:00 2001
From: Filippo Sironi <sironi@amazon.de>
Date: Mon, 12 Nov 2018 12:26:30 +0000
Subject: [PATCH 0987/1890] amd/iommu: Fix Guest Virtual APIC Log Tail Address
 Register

This register should have been programmed with the physical address
of the memory location containing the shadow tail pointer for
the guest virtual APIC log instead of the base address.

Fixes: 8bda0cfbdc1a  ('iommu/amd: Detect and initialize guest vAPIC log')
Signed-off-by: Filippo Sironi <sironi@amazon.de>
Signed-off-by: Wei Wang <wawei@amazon.de>
Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd_iommu_init.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index bb2cd29e16588..d8f7000a466aa 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -797,7 +797,8 @@ static int iommu_init_ga_log(struct amd_iommu *iommu)
 	entry = iommu_virt_to_phys(iommu->ga_log) | GA_LOG_SIZE_512;
 	memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_BASE_OFFSET,
 		    &entry, sizeof(entry));
-	entry = (iommu_virt_to_phys(iommu->ga_log) & 0xFFFFFFFFFFFFFULL) & ~7ULL;
+	entry = (iommu_virt_to_phys(iommu->ga_log_tail) &
+		 (BIT_ULL(52)-1)) & ~7ULL;
 	memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_TAIL_OFFSET,
 		    &entry, sizeof(entry));
 	writel(0x00, iommu->mmio_base + MMIO_GA_HEAD_OFFSET);
-- 
GitLab


From fd3e71a9f71e232181a225301a75936373636ccc Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Mon, 5 Nov 2018 03:43:19 +0900
Subject: [PATCH 0988/1890] netfilter: nf_conncount: use spin_lock_bh instead
 of spin_lock

conn_free() holds lock with spin_lock() and it is called by both
nf_conncount_lookup() and nf_conncount_gc_list(). nf_conncount_lookup()
is called from bottom-half context and nf_conncount_gc_list() from
process context. So that spin_lock() call is not safe. Hence
conn_free() should use spin_lock_bh() instead of spin_lock().

test commands:
   %nft add table ip filter
   %nft add chain ip filter input { type filter hook input priority 0\; }
   %nft add rule filter input meter test { ip saddr ct count over 2 } \
	   counter

splat looks like:
[  461.996507] ================================
[  461.998999] WARNING: inconsistent lock state
[  461.998999] 4.19.0-rc6+ #22 Not tainted
[  461.998999] --------------------------------
[  461.998999] inconsistent {IN-SOFTIRQ-W} -> {SOFTIRQ-ON-W} usage.
[  461.998999] kworker/0:2/134 [HC0[0]:SC0[0]:HE1:SE1] takes:
[  461.998999] 00000000a71a559a (&(&list->list_lock)->rlock){+.?.}, at: conn_free+0x69/0x2b0 [nf_conncount]
[  461.998999] {IN-SOFTIRQ-W} state was registered at:
[  461.998999]   _raw_spin_lock+0x30/0x70
[  461.998999]   nf_conncount_add+0x28a/0x520 [nf_conncount]
[  461.998999]   nft_connlimit_eval+0x401/0x580 [nft_connlimit]
[  461.998999]   nft_dynset_eval+0x32b/0x590 [nf_tables]
[  461.998999]   nft_do_chain+0x497/0x1430 [nf_tables]
[  461.998999]   nft_do_chain_ipv4+0x255/0x330 [nf_tables]
[  461.998999]   nf_hook_slow+0xb1/0x160
[ ... ]
[  461.998999] other info that might help us debug this:
[  461.998999]  Possible unsafe locking scenario:
[  461.998999]
[  461.998999]        CPU0
[  461.998999]        ----
[  461.998999]   lock(&(&list->list_lock)->rlock);
[  461.998999]   <Interrupt>
[  461.998999]     lock(&(&list->list_lock)->rlock);
[  461.998999]
[  461.998999]  *** DEADLOCK ***
[  461.998999]
[ ... ]

Fixes: 5c789e131cbb ("netfilter: nf_conncount: Add list lock and gc worker, and RCU for init tree search")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conncount.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index 02ca7df793f5c..71b1f4f99580a 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -106,15 +106,15 @@ nf_conncount_add(struct nf_conncount_list *list,
 	conn->zone = *zone;
 	conn->cpu = raw_smp_processor_id();
 	conn->jiffies32 = (u32)jiffies;
-	spin_lock(&list->list_lock);
+	spin_lock_bh(&list->list_lock);
 	if (list->dead == true) {
 		kmem_cache_free(conncount_conn_cachep, conn);
-		spin_unlock(&list->list_lock);
+		spin_unlock_bh(&list->list_lock);
 		return NF_CONNCOUNT_SKIP;
 	}
 	list_add_tail(&conn->node, &list->head);
 	list->count++;
-	spin_unlock(&list->list_lock);
+	spin_unlock_bh(&list->list_lock);
 	return NF_CONNCOUNT_ADDED;
 }
 EXPORT_SYMBOL_GPL(nf_conncount_add);
@@ -132,10 +132,10 @@ static bool conn_free(struct nf_conncount_list *list,
 {
 	bool free_entry = false;
 
-	spin_lock(&list->list_lock);
+	spin_lock_bh(&list->list_lock);
 
 	if (list->count == 0) {
-		spin_unlock(&list->list_lock);
+		spin_unlock_bh(&list->list_lock);
                 return free_entry;
 	}
 
@@ -144,7 +144,7 @@ static bool conn_free(struct nf_conncount_list *list,
 	if (list->count == 0)
 		free_entry = true;
 
-	spin_unlock(&list->list_lock);
+	spin_unlock_bh(&list->list_lock);
 	call_rcu(&conn->rcu_head, __conn_free);
 	return free_entry;
 }
-- 
GitLab


From 31568ec09ea02a050249921698c9729419539cce Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Mon, 5 Nov 2018 03:44:11 +0900
Subject: [PATCH 0989/1890] netfilter: nf_conncount: fix list_del corruption in
 conn_free

nf_conncount_tuple is an element of nft_connlimit and that is deleted by
conn_free(). Elements can be deleted by both GC routine and data path
functions (nf_conncount_lookup, nf_conncount_add) and they call
conn_free() to free elements. But conn_free() only protects lists, not
each element. So that list_del corruption could occurred.

The conn_free() doesn't check whether element is already deleted. In
order to protect elements, dead flag is added. If an element is deleted,
dead flag is set. The only conn_free() can delete elements so that both
list lock and dead flag are enough to protect it.

test commands:
   %nft add table ip filter
   %nft add chain ip filter input { type filter hook input priority 0\; }
   %nft add rule filter input meter test { ip id ct count over 2 } counter

splat looks like:
[ 1779.495778] list_del corruption, ffff8800b6e12008->prev is LIST_POISON2 (dead000000000200)
[ 1779.505453] ------------[ cut here ]------------
[ 1779.506260] kernel BUG at lib/list_debug.c:50!
[ 1779.515831] invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI
[ 1779.516772] CPU: 0 PID: 33 Comm: kworker/0:2 Not tainted 4.19.0-rc6+ #22
[ 1779.516772] Workqueue: events_power_efficient nft_rhash_gc [nf_tables_set]
[ 1779.516772] RIP: 0010:__list_del_entry_valid+0xd8/0x150
[ 1779.516772] Code: 39 48 83 c4 08 b8 01 00 00 00 5b 5d c3 48 89 ea 48 c7 c7 00 c3 5b 98 e8 0f dc 40 ff 0f 0b 48 c7 c7 60 c3 5b 98 e8 01 dc 40 ff <0f> 0b 48 c7 c7 c0 c3 5b 98 e8 f3 db 40 ff 0f 0b 48 c7 c7 20 c4 5b
[ 1779.516772] RSP: 0018:ffff880119127420 EFLAGS: 00010286
[ 1779.516772] RAX: 000000000000004e RBX: dead000000000200 RCX: 0000000000000000
[ 1779.516772] RDX: 000000000000004e RSI: 0000000000000008 RDI: ffffed0023224e7a
[ 1779.516772] RBP: ffff88011934bc10 R08: ffffed002367cea9 R09: ffffed002367cea9
[ 1779.516772] R10: 0000000000000001 R11: ffffed002367cea8 R12: ffff8800b6e12008
[ 1779.516772] R13: ffff8800b6e12010 R14: ffff88011934bc20 R15: ffff8800b6e12008
[ 1779.516772] FS:  0000000000000000(0000) GS:ffff88011b200000(0000) knlGS:0000000000000000
[ 1779.516772] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 1779.516772] CR2: 00007fc876534010 CR3: 000000010da16000 CR4: 00000000001006f0
[ 1779.516772] Call Trace:
[ 1779.516772]  conn_free+0x9f/0x2b0 [nf_conncount]
[ 1779.516772]  ? nf_ct_tmpl_alloc+0x2a0/0x2a0 [nf_conntrack]
[ 1779.516772]  ? nf_conncount_add+0x520/0x520 [nf_conncount]
[ 1779.516772]  ? do_raw_spin_trylock+0x1a0/0x1a0
[ 1779.516772]  ? do_raw_spin_trylock+0x10/0x1a0
[ 1779.516772]  find_or_evict+0xe5/0x150 [nf_conncount]
[ 1779.516772]  nf_conncount_gc_list+0x162/0x360 [nf_conncount]
[ 1779.516772]  ? nf_conncount_lookup+0xee0/0xee0 [nf_conncount]
[ 1779.516772]  ? _raw_spin_unlock_irqrestore+0x45/0x50
[ 1779.516772]  ? trace_hardirqs_off+0x6b/0x220
[ 1779.516772]  ? trace_hardirqs_on_caller+0x220/0x220
[ 1779.516772]  nft_rhash_gc+0x16b/0x540 [nf_tables_set]
[ ... ]

Fixes: 5c789e131cbb ("netfilter: nf_conncount: Add list lock and gc worker, and RCU for init tree search")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conncount.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index 71b1f4f99580a..cb33709138df2 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -49,6 +49,7 @@ struct nf_conncount_tuple {
 	struct nf_conntrack_zone	zone;
 	int				cpu;
 	u32				jiffies32;
+	bool				dead;
 	struct rcu_head			rcu_head;
 };
 
@@ -106,6 +107,7 @@ nf_conncount_add(struct nf_conncount_list *list,
 	conn->zone = *zone;
 	conn->cpu = raw_smp_processor_id();
 	conn->jiffies32 = (u32)jiffies;
+	conn->dead = false;
 	spin_lock_bh(&list->list_lock);
 	if (list->dead == true) {
 		kmem_cache_free(conncount_conn_cachep, conn);
@@ -134,12 +136,13 @@ static bool conn_free(struct nf_conncount_list *list,
 
 	spin_lock_bh(&list->list_lock);
 
-	if (list->count == 0) {
+	if (conn->dead) {
 		spin_unlock_bh(&list->list_lock);
-                return free_entry;
+		return free_entry;
 	}
 
 	list->count--;
+	conn->dead = true;
 	list_del_rcu(&conn->node);
 	if (list->count == 0)
 		free_entry = true;
-- 
GitLab


From 3c5cdb17c3be76714dfd0d03e384f70579545614 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Mon, 5 Nov 2018 03:44:39 +0900
Subject: [PATCH 0990/1890] netfilter: nf_conncount: fix unexpected permanent
 node of list.

When list->count is 0, the list is deleted by GC. But list->count is
never reached 0 because initial count value is 1 and it is increased
when node is inserted. So that initial value of list->count should be 0.

Originally GC always finds zero count list through deleting node and
decreasing count. However, list may be left empty since node insertion
may fail eg.  allocaton problem. In order to solve this problem, GC
routine also finds zero count list without deleting node.

Fixes: cb2b36f5a97d ("netfilter: nf_conncount: Switch to plain list")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conncount.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index cb33709138df2..8acae4a3e4c00 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -144,8 +144,10 @@ static bool conn_free(struct nf_conncount_list *list,
 	list->count--;
 	conn->dead = true;
 	list_del_rcu(&conn->node);
-	if (list->count == 0)
+	if (list->count == 0) {
+		list->dead = true;
 		free_entry = true;
+	}
 
 	spin_unlock_bh(&list->list_lock);
 	call_rcu(&conn->rcu_head, __conn_free);
@@ -248,7 +250,7 @@ void nf_conncount_list_init(struct nf_conncount_list *list)
 {
 	spin_lock_init(&list->list_lock);
 	INIT_LIST_HEAD(&list->head);
-	list->count = 1;
+	list->count = 0;
 	list->dead = false;
 }
 EXPORT_SYMBOL_GPL(nf_conncount_list_init);
@@ -262,6 +264,7 @@ bool nf_conncount_gc_list(struct net *net,
 	struct nf_conn *found_ct;
 	unsigned int collected = 0;
 	bool free_entry = false;
+	bool ret = false;
 
 	list_for_each_entry_safe(conn, conn_n, &list->head, node) {
 		found = find_or_evict(net, list, conn, &free_entry);
@@ -291,7 +294,15 @@ bool nf_conncount_gc_list(struct net *net,
 		if (collected > CONNCOUNT_GC_MAX_NODES)
 			return false;
 	}
-	return false;
+
+	spin_lock_bh(&list->list_lock);
+	if (!list->count) {
+		list->dead = true;
+		ret = true;
+	}
+	spin_unlock_bh(&list->list_lock);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(nf_conncount_gc_list);
 
@@ -417,6 +428,7 @@ insert_tree(struct net *net,
 	nf_conncount_list_init(&rbconn->list);
 	list_add(&conn->node, &rbconn->list.head);
 	count = 1;
+	rbconn->list.count = count;
 
 	rb_link_node(&rbconn->node, parent, rbnode);
 	rb_insert_color(&rbconn->node, root);
-- 
GitLab


From fb5bbae9b1333d44023713946fdd28db0cd85751 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 5 Nov 2018 09:43:05 +0000
Subject: [PATCH 0991/1890] drm/i915/ringbuffer: Delay after EMIT_INVALIDATE
 for gen4/gen5
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Exercising the gpu reloc path strenuously revealed an issue where the
updated relocations (from MI_STORE_DWORD_IMM) were not being observed
upon execution. After some experiments with adding pipecontrols (a lot
of pipecontrols (32) as gen4/5 do not have a bit to wait on earlier pipe
controls or even the current on), it was discovered that we merely
needed to delay the EMIT_INVALIDATE by several flushes. It is important
to note that it is the EMIT_INVALIDATE as opposed to the EMIT_FLUSH that
needs the delay as opposed to what one might first expect -- that the
delay is required for the TLB invalidation to take effect (one presumes
to purge any CS buffers) as opposed to a delay after flushing to ensure
the writes have landed before triggering invalidation.

Testcase: igt/gem_tiled_fence_blits
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: stable@vger.kernel.org
Reviewed-by: Ville SyrjÃ¤lÃ¤ <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181105094305.5767-1-chris@chris-wilson.co.uk
(cherry picked from commit 55f99bf2a9c331838c981694bc872cd1ec4070b2)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 38 +++++++++++++++++++++++--
 1 file changed, 36 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index d0ef50bf930ad..187bb0ceb4ac4 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -91,6 +91,7 @@ static int
 gen4_render_ring_flush(struct i915_request *rq, u32 mode)
 {
 	u32 cmd, *cs;
+	int i;
 
 	/*
 	 * read/write caches:
@@ -127,12 +128,45 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode)
 			cmd |= MI_INVALIDATE_ISP;
 	}
 
-	cs = intel_ring_begin(rq, 2);
+	i = 2;
+	if (mode & EMIT_INVALIDATE)
+		i += 20;
+
+	cs = intel_ring_begin(rq, i);
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
 	*cs++ = cmd;
-	*cs++ = MI_NOOP;
+
+	/*
+	 * A random delay to let the CS invalidate take effect? Without this
+	 * delay, the GPU relocation path fails as the CS does not see
+	 * the updated contents. Just as important, if we apply the flushes
+	 * to the EMIT_FLUSH branch (i.e. immediately after the relocation
+	 * write and before the invalidate on the next batch), the relocations
+	 * still fail. This implies that is a delay following invalidation
+	 * that is required to reset the caches as opposed to a delay to
+	 * ensure the memory is written.
+	 */
+	if (mode & EMIT_INVALIDATE) {
+		*cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
+		*cs++ = i915_ggtt_offset(rq->engine->scratch) |
+			PIPE_CONTROL_GLOBAL_GTT;
+		*cs++ = 0;
+		*cs++ = 0;
+
+		for (i = 0; i < 12; i++)
+			*cs++ = MI_FLUSH;
+
+		*cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
+		*cs++ = i915_ggtt_offset(rq->engine->scratch) |
+			PIPE_CONTROL_GLOBAL_GTT;
+		*cs++ = 0;
+		*cs++ = 0;
+	}
+
+	*cs++ = cmd;
+
 	intel_ring_advance(rq, cs);
 
 	return 0;
-- 
GitLab


From 7c4512300cfa5a4dcc8c1c52ae61e3fa4bd11a39 Mon Sep 17 00:00:00 2001
From: Lyude Paul <lyude@redhat.com>
Date: Tue, 6 Nov 2018 16:30:12 -0500
Subject: [PATCH 0992/1890] drm/i915: Fix possible race in
 intel_dp_add_mst_connector()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This hasn't caused any issues yet that I'm aware of, but as Ville
SyrjÃ¤lÃ¤ pointed out - we need to make sure that
intel_connector->mst_port is set before initializing MST connectors,
since in theory we could potentially check intel_connector->mst_port in
i915_hpd_poll_init_work() after registering the connector but before
having written it's value.

Signed-off-by: Lyude Paul <lyude@redhat.com>
Reviewed-by: Ville SyrjÃ¤lÃ¤ <ville.syrjala@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: stable@vger.kernel.org
Link: https://patchwork.freedesktop.org/patch/msgid/20181106213017.14563-2-lyude@redhat.com
(cherry picked from commit 66a5ab1034be801630816d1fa6cfc30db1a2f0b0)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_dp_mst.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c
index 1b00f8ea145ba..a911691dbd0fd 100644
--- a/drivers/gpu/drm/i915/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/intel_dp_mst.c
@@ -452,6 +452,10 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo
 	if (!intel_connector)
 		return NULL;
 
+	intel_connector->get_hw_state = intel_dp_mst_get_hw_state;
+	intel_connector->mst_port = intel_dp;
+	intel_connector->port = port;
+
 	connector = &intel_connector->base;
 	ret = drm_connector_init(dev, connector, &intel_dp_mst_connector_funcs,
 				 DRM_MODE_CONNECTOR_DisplayPort);
@@ -462,10 +466,6 @@ static struct drm_connector *intel_dp_add_mst_connector(struct drm_dp_mst_topolo
 
 	drm_connector_helper_add(connector, &intel_dp_mst_connector_helper_funcs);
 
-	intel_connector->get_hw_state = intel_dp_mst_get_hw_state;
-	intel_connector->mst_port = intel_dp;
-	intel_connector->port = port;
-
 	for_each_pipe(dev_priv, pipe) {
 		struct drm_encoder *enc =
 			&intel_dp->mst_encoders[pipe]->base.base;
-- 
GitLab


From 541ff7e96c13cd5d67f6021d233f8e1c3df49278 Mon Sep 17 00:00:00 2001
From: Lyude Paul <lyude@redhat.com>
Date: Tue, 6 Nov 2018 16:30:13 -0500
Subject: [PATCH 0993/1890] drm/i915: Fix NULL deref when re-enabling HPD IRQs
 on systems with MST
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Turns out that if you trigger an HPD storm on a system that has an MST
topology connected to it, you'll end up causing the kernel to eventually
hit a NULL deref:

[  332.339041] BUG: unable to handle kernel NULL pointer dereference at 00000000000000ec
[  332.340906] PGD 0 P4D 0
[  332.342750] Oops: 0000 [#1] SMP PTI
[  332.344579] CPU: 2 PID: 25 Comm: kworker/2:0 Kdump: loaded Tainted: G           O      4.18.0-rc3short-hpd-storm+ #2
[  332.346453] Hardware name: LENOVO 20BWS1KY00/20BWS1KY00, BIOS JBET71WW (1.35 ) 09/14/2018
[  332.348361] Workqueue: events intel_hpd_irq_storm_reenable_work [i915]
[  332.350301] RIP: 0010:intel_hpd_irq_storm_reenable_work.cold.3+0x2f/0x86 [i915]
[  332.352213] Code: 00 00 ba e8 00 00 00 48 c7 c6 c0 aa 5f a0 48 c7 c7 d0 73 62 a0 4c 89 c1 4c 89 04 24 e8 7f f5 af e0 4c 8b 04 24 44 89 f8 29 e8 <41> 39 80 ec 00 00 00 0f 85 43 13 fc ff 41 0f b6 86 b8 04 00 00 41
[  332.354286] RSP: 0018:ffffc90000147e48 EFLAGS: 00010006
[  332.356344] RAX: 0000000000000005 RBX: ffff8802c226c9d4 RCX: 0000000000000006
[  332.358404] RDX: 0000000000000000 RSI: 0000000000000082 RDI: ffff88032dc95570
[  332.360466] RBP: 0000000000000005 R08: 0000000000000000 R09: ffff88031b3dc840
[  332.362528] R10: 0000000000000000 R11: 000000031a069602 R12: ffff8802c226ca20
[  332.364575] R13: ffff8802c2268000 R14: ffff880310661000 R15: 000000000000000a
[  332.366615] FS:  0000000000000000(0000) GS:ffff88032dc80000(0000) knlGS:0000000000000000
[  332.368658] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  332.370690] CR2: 00000000000000ec CR3: 000000000200a003 CR4: 00000000003606e0
[  332.372724] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  332.374773] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[  332.376798] Call Trace:
[  332.378809]  process_one_work+0x1a1/0x350
[  332.380806]  worker_thread+0x30/0x380
[  332.382777]  ? wq_update_unbound_numa+0x10/0x10
[  332.384772]  kthread+0x112/0x130
[  332.386740]  ? kthread_create_worker_on_cpu+0x70/0x70
[  332.388706]  ret_from_fork+0x35/0x40
[  332.390651] Modules linked in: i915(O) vfat fat joydev btusb btrtl btbcm btintel bluetooth ecdh_generic iTCO_wdt wmi_bmof i2c_algo_bit drm_kms_helper intel_rapl syscopyarea sysfillrect x86_pkg_temp_thermal sysimgblt coretemp fb_sys_fops crc32_pclmul drm psmouse pcspkr mei_me mei i2c_i801 lpc_ich mfd_core i2c_core tpm_tis tpm_tis_core thinkpad_acpi wmi tpm rfkill video crc32c_intel serio_raw ehci_pci xhci_pci ehci_hcd xhci_hcd [last unloaded: i915]
[  332.394963] CR2: 00000000000000ec

This appears to be due to the fact that with an MST topology, not all
intel_connector structs will have ->encoder set. So, fix this by
skipping connectors without encoders in
intel_hpd_irq_storm_reenable_work().

For those wondering, this bug was found on accident while simulating HPD
storms using a Chamelium connected to a ThinkPad T450s (Broadwell).

Changes since v1:
- Check intel_connector->mst_port instead of intel_connector->encoder

Signed-off-by: Lyude Paul <lyude@redhat.com>
Reviewed-by: Ville SyrjÃ¤lÃ¤ <ville.syrjala@linux.intel.com>
Cc: stable@vger.kernel.org
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181106213017.14563-3-lyude@redhat.com
(cherry picked from commit fee61deecb1d850bf34f682a6a452e5ee51b7572)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_hotplug.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c
index 648a13c6043c0..8326900a311e4 100644
--- a/drivers/gpu/drm/i915/intel_hotplug.c
+++ b/drivers/gpu/drm/i915/intel_hotplug.c
@@ -228,7 +228,9 @@ static void intel_hpd_irq_storm_reenable_work(struct work_struct *work)
 		drm_for_each_connector_iter(connector, &conn_iter) {
 			struct intel_connector *intel_connector = to_intel_connector(connector);
 
-			if (intel_connector->encoder->hpd_pin == pin) {
+			/* Don't check MST ports, they don't have pins */
+			if (!intel_connector->mst_port &&
+			    intel_connector->encoder->hpd_pin == pin) {
 				if (connector->polled != intel_connector->polled)
 					DRM_DEBUG_DRIVER("Reenabling HPD on connector %s\n",
 							 connector->name);
-- 
GitLab


From c4f224076d00ccf30c7bd3561cceaed82628c8ce Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Fri, 2 Nov 2018 20:22:00 +0200
Subject: [PATCH 0994/1890] drm/i915/icl: Fix power well 2 wrt. DC-off toggling
 order
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To enable DC5/6 power well 2 has to be disabled as for previous
platforms, so fix things up.

Bspec: 4234
Fixes: 67ca07e7ac10 ("drm/i915/icl: Add power well support")
Cc: Animesh Manna <animesh.manna@intel.com>
Cc: Paulo Zanoni <paulo.r.zanoni@intel.com>
Cc: Ville SyrjÃ¤lÃ¤ <ville.syrjala@linux.intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Reviewed-by: Ville SyrjÃ¤lÃ¤ <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181102182200.17219-1-imre.deak@intel.com
(cherry picked from commit a33e1ece777996ddddb1f23a30f8c66422ed0b68)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_runtime_pm.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 0fdabce647ab6..0a4990d8843c4 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -2748,6 +2748,12 @@ static const struct i915_power_well_desc icl_power_wells[] = {
 			.hsw.has_fuses = true,
 		},
 	},
+	{
+		.name = "DC off",
+		.domains = ICL_DISPLAY_DC_OFF_POWER_DOMAINS,
+		.ops = &gen9_dc_off_power_well_ops,
+		.id = DISP_PW_ID_NONE,
+	},
 	{
 		.name = "power well 2",
 		.domains = ICL_PW_2_POWER_DOMAINS,
@@ -2759,12 +2765,6 @@ static const struct i915_power_well_desc icl_power_wells[] = {
 			.hsw.has_fuses = true,
 		},
 	},
-	{
-		.name = "DC off",
-		.domains = ICL_DISPLAY_DC_OFF_POWER_DOMAINS,
-		.ops = &gen9_dc_off_power_well_ops,
-		.id = DISP_PW_ID_NONE,
-	},
 	{
 		.name = "power well 3",
 		.domains = ICL_PW_3_POWER_DOMAINS,
-- 
GitLab


From 0a823e8fd4fd67726697854578f3584ee3a49b1d Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 8 Nov 2018 08:17:38 +0000
Subject: [PATCH 0995/1890] drm/i915/execlists: Force write serialisation into
 context image vs execution

Ensure that the writes into the context image are completed prior to the
register mmio to trigger execution. Although previously we were assured
by the SDM that all writes are flushed before an uncached memory
transaction (our mmio write to submit the context to HW for execution),
we have empirical evidence to believe that this is not actually the
case.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108656
References: https://bugs.freedesktop.org/show_bug.cgi?id=108315
References: https://bugs.freedesktop.org/show_bug.cgi?id=106887
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Acked-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181108081740.25615-1-chris@chris-wilson.co.uk
Cc: stable@vger.kernel.org
(cherry picked from commit 987abd5c62f92ee4970b45aa077f47949974e615)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 43957bb37a422..37c94a54efcbb 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -424,7 +424,8 @@ static u64 execlists_update_context(struct i915_request *rq)
 
 	reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail);
 
-	/* True 32b PPGTT with dynamic page allocation: update PDP
+	/*
+	 * True 32b PPGTT with dynamic page allocation: update PDP
 	 * registers and point the unallocated PDPs to scratch page.
 	 * PML4 is allocated during ppgtt init, so this is not needed
 	 * in 48-bit mode.
@@ -432,6 +433,17 @@ static u64 execlists_update_context(struct i915_request *rq)
 	if (ppgtt && !i915_vm_is_48bit(&ppgtt->vm))
 		execlists_update_context_pdps(ppgtt, reg_state);
 
+	/*
+	 * Make sure the context image is complete before we submit it to HW.
+	 *
+	 * Ostensibly, writes (including the WCB) should be flushed prior to
+	 * an uncached write such as our mmio register access, the empirical
+	 * evidence (esp. on Braswell) suggests that the WC write into memory
+	 * may not be visible to the HW prior to the completion of the UC
+	 * register write and that we may begin execution from the context
+	 * before its image is complete leading to invalid PD chasing.
+	 */
+	wmb();
 	return ce->lrc_desc;
 }
 
-- 
GitLab


From 44a7276b30c3c15f2b7790a5729640597fb6a1df Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Mon, 12 Nov 2018 15:36:16 +0200
Subject: [PATCH 0996/1890] drm/i915: Fix hpd handling for pins with two
 encoders
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In my haste to remove irq_port[] I accidentally changed the
way we deal with hpd pins that are shared by multiple encoders
(DP and HDMI for pre-DDI platforms). Previously we would only
handle such pins via ->hpd_pulse(), but now we queue up the
hotplug work for the HDMI encoder directly. Worse yet, we now
count each hpd twice and this increment the hpd storm count
twice as fast. This can lead to spurious storms being detected.

Go back to the old way of doing things, ie. delegate to
->hpd_pulse() for any pin which has an encoder with that hook
implemented. I don't really like the idea of adding irq_port[]
back so let's loop through the encoders first to check if we
have an encoder with ->hpd_pulse() for the pin, and then go
through all the pins and decided on the correct course of action
based on the earlier findings.

I have occasionally toyed with the idea of unifying the pre-DDI
HDMI and DP encoders into a single encoder as well. Besides the
hotplug processing it would have the other benefit of preventing
userspace from trying to enable both encoders at the same time.
That is simply illegal as they share the same clock/data pins.
We have some testcases that will attempt that and thus fail on
many older machines. But for now let's stick to fixing just the
hotplug code.

Cc: stable@vger.kernel.org # 4.19+
Cc: Lyude Paul <lyude@redhat.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Fixes: b6ca3eee18ba ("drm/i915: Nuke dev_priv->irq_port[]")
Signed-off-by: Ville SyrjÃ¤lÃ¤ <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181108200424.28371-1-ville.syrjala@linux.intel.com
Reviewed-by: Lyude Paul <lyude@redhat.com>
(cherry picked from commit 5a3aeca97af1b6b3498d59a7fd4e8bb95814c108)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_hotplug.c | 66 ++++++++++++++++++++--------
 1 file changed, 47 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c
index 8326900a311e4..9a80181302372 100644
--- a/drivers/gpu/drm/i915/intel_hotplug.c
+++ b/drivers/gpu/drm/i915/intel_hotplug.c
@@ -397,37 +397,54 @@ void intel_hpd_irq_handler(struct drm_i915_private *dev_priv,
 	struct intel_encoder *encoder;
 	bool storm_detected = false;
 	bool queue_dig = false, queue_hp = false;
+	u32 long_hpd_pulse_mask = 0;
+	u32 short_hpd_pulse_mask = 0;
+	enum hpd_pin pin;
 
 	if (!pin_mask)
 		return;
 
 	spin_lock(&dev_priv->irq_lock);
+
+	/*
+	 * Determine whether ->hpd_pulse() exists for each pin, and
+	 * whether we have a short or a long pulse. This is needed
+	 * as each pin may have up to two encoders (HDMI and DP) and
+	 * only the one of them (DP) will have ->hpd_pulse().
+	 */
 	for_each_intel_encoder(&dev_priv->drm, encoder) {
-		enum hpd_pin pin = encoder->hpd_pin;
 		bool has_hpd_pulse = intel_encoder_has_hpd_pulse(encoder);
+		enum port port = encoder->port;
+		bool long_hpd;
 
+		pin = encoder->hpd_pin;
 		if (!(BIT(pin) & pin_mask))
 			continue;
 
-		if (has_hpd_pulse) {
-			bool long_hpd = long_mask & BIT(pin);
-			enum port port = encoder->port;
+		if (!has_hpd_pulse)
+			continue;
 
-			DRM_DEBUG_DRIVER("digital hpd port %c - %s\n", port_name(port),
-					 long_hpd ? "long" : "short");
-			/*
-			 * For long HPD pulses we want to have the digital queue happen,
-			 * but we still want HPD storm detection to function.
-			 */
-			queue_dig = true;
-			if (long_hpd) {
-				dev_priv->hotplug.long_port_mask |= (1 << port);
-			} else {
-				/* for short HPD just trigger the digital queue */
-				dev_priv->hotplug.short_port_mask |= (1 << port);
-				continue;
-			}
+		long_hpd = long_mask & BIT(pin);
+
+		DRM_DEBUG_DRIVER("digital hpd port %c - %s\n", port_name(port),
+				 long_hpd ? "long" : "short");
+		queue_dig = true;
+
+		if (long_hpd) {
+			long_hpd_pulse_mask |= BIT(pin);
+			dev_priv->hotplug.long_port_mask |= BIT(port);
+		} else {
+			short_hpd_pulse_mask |= BIT(pin);
+			dev_priv->hotplug.short_port_mask |= BIT(port);
 		}
+	}
+
+	/* Now process each pin just once */
+	for_each_hpd_pin(pin) {
+		bool long_hpd;
+
+		if (!(BIT(pin) & pin_mask))
+			continue;
 
 		if (dev_priv->hotplug.stats[pin].state == HPD_DISABLED) {
 			/*
@@ -444,11 +461,22 @@ void intel_hpd_irq_handler(struct drm_i915_private *dev_priv,
 		if (dev_priv->hotplug.stats[pin].state != HPD_ENABLED)
 			continue;
 
-		if (!has_hpd_pulse) {
+		/*
+		 * Delegate to ->hpd_pulse() if one of the encoders for this
+		 * pin has it, otherwise let the hotplug_work deal with this
+		 * pin directly.
+		 */
+		if (((short_hpd_pulse_mask | long_hpd_pulse_mask) & BIT(pin))) {
+			long_hpd = long_hpd_pulse_mask & BIT(pin);
+		} else {
 			dev_priv->hotplug.event_bits |= BIT(pin);
+			long_hpd = true;
 			queue_hp = true;
 		}
 
+		if (!long_hpd)
+			continue;
+
 		if (intel_hpd_irq_storm_detect(dev_priv, pin)) {
 			dev_priv->hotplug.event_bits &= ~BIT(pin);
 			storm_detected = true;
-- 
GitLab


From 0fb39bbe43d4481fcf300d2b5822de60942fd189 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 31 Oct 2018 18:26:20 +0100
Subject: [PATCH 0997/1890] netfilter: nf_tables: don't skip inactive chains
 during update

There is no synchronization between packet path and the configuration plane.

The packet path uses two arrays with rules, one contains the current (active)
generation.  The other either contains the last (obsolete) generation or
the future one.

Consider:
cpu1               cpu2
                   nft_do_chain(c);
delete c
net->gen++;
                   genbit = !!net->gen;
                   rules = c->rg[genbit];

cpu1 ignores c when updating if c is not active anymore in the new
generation.

On cpu2, we now use rules from wrong generation, as c->rg[old]
contains the rules matching 'c' whereas c->rg[new] was not updated and
can even point to rules that have been free'd already, causing a crash.

To fix this, make sure that 'current' to the 'next' generation are
identical for chains that are going away so that c->rg[new] will just
use the matching rules even if genbit was incremented already.

Fixes: 0cbc06b3faba7 ("netfilter: nf_tables: remove synchronize_rcu in commit phase")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 42487d01a3eda..dd577e7d100cb 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -6324,7 +6324,7 @@ static void nf_tables_commit_chain_free_rules_old(struct nft_rule **rules)
 	call_rcu(&old->h, __nf_tables_commit_chain_free_rules_old);
 }
 
-static void nf_tables_commit_chain_active(struct net *net, struct nft_chain *chain)
+static void nf_tables_commit_chain(struct net *net, struct nft_chain *chain)
 {
 	struct nft_rule **g0, **g1;
 	bool next_genbit;
@@ -6441,11 +6441,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 
 	/* step 2.  Make rules_gen_X visible to packet path */
 	list_for_each_entry(table, &net->nft.tables, list) {
-		list_for_each_entry(chain, &table->chains, list) {
-			if (!nft_is_active_next(net, chain))
-				continue;
-			nf_tables_commit_chain_active(net, chain);
-		}
+		list_for_each_entry(chain, &table->chains, list)
+			nf_tables_commit_chain(net, chain);
 	}
 
 	/*
-- 
GitLab


From 25d8bcedbf4329895dbaf9dd67baa6f18dad918c Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 31 Oct 2018 18:26:21 +0100
Subject: [PATCH 0998/1890] selftests: add script to stress-test nft packet
 path vs. control plane

Start flood ping for each cpu while loading/flushing rulesets to make
sure we do not access already-free'd rules from nf_tables evaluation loop.

Also add this to TARGETS so 'make run_tests' in selftest dir runs it
automatically.

This would have caught the bug fixed in previous change
("netfilter: nf_tables: do not skip inactive chains during generation update")
sooner.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 tools/testing/selftests/Makefile              |  1 +
 tools/testing/selftests/netfilter/Makefile    |  6 ++
 tools/testing/selftests/netfilter/config      |  2 +
 .../selftests/netfilter/nft_trans_stress.sh   | 78 +++++++++++++++++++
 4 files changed, 87 insertions(+)
 create mode 100644 tools/testing/selftests/netfilter/Makefile
 create mode 100644 tools/testing/selftests/netfilter/config
 create mode 100755 tools/testing/selftests/netfilter/nft_trans_stress.sh

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index f1fe492c8e17d..f0017c831e57b 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -24,6 +24,7 @@ TARGETS += memory-hotplug
 TARGETS += mount
 TARGETS += mqueue
 TARGETS += net
+TARGETS += netfilter
 TARGETS += nsfs
 TARGETS += powerpc
 TARGETS += proc
diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile
new file mode 100644
index 0000000000000..47ed6cef93fb8
--- /dev/null
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for netfilter selftests
+
+TEST_PROGS := nft_trans_stress.sh
+
+include ../lib.mk
diff --git a/tools/testing/selftests/netfilter/config b/tools/testing/selftests/netfilter/config
new file mode 100644
index 0000000000000..1017313e41a85
--- /dev/null
+++ b/tools/testing/selftests/netfilter/config
@@ -0,0 +1,2 @@
+CONFIG_NET_NS=y
+NF_TABLES_INET=y
diff --git a/tools/testing/selftests/netfilter/nft_trans_stress.sh b/tools/testing/selftests/netfilter/nft_trans_stress.sh
new file mode 100755
index 0000000000000..f1affd12c4b17
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_trans_stress.sh
@@ -0,0 +1,78 @@
+#!/bin/bash
+#
+# This test is for stress-testing the nf_tables config plane path vs.
+# packet path processing: Make sure we never release rules that are
+# still visible to other cpus.
+#
+# set -e
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+testns=testns1
+tables="foo bar baz quux"
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without nft tool"
+	exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+tmp=$(mktemp)
+
+for table in $tables; do
+	echo add table inet "$table" >> "$tmp"
+	echo flush table inet "$table" >> "$tmp"
+
+	echo "add chain inet $table INPUT { type filter hook input priority 0; }" >> "$tmp"
+	echo "add chain inet $table OUTPUT { type filter hook output priority 0; }" >> "$tmp"
+	for c in $(seq 1 400); do
+		chain=$(printf "chain%03u" "$c")
+		echo "add chain inet $table $chain" >> "$tmp"
+	done
+
+	for c in $(seq 1 400); do
+		chain=$(printf "chain%03u" "$c")
+		for BASE in INPUT OUTPUT; do
+			echo "add rule inet $table $BASE counter jump $chain" >> "$tmp"
+		done
+		echo "add rule inet $table $chain counter return" >> "$tmp"
+	done
+done
+
+ip netns add "$testns"
+ip -netns "$testns" link set lo up
+
+lscpu | grep ^CPU\(s\): | ( read cpu cpunum ;
+cpunum=$((cpunum-1))
+for i in $(seq 0 $cpunum);do
+	mask=$(printf 0x%x $((1<<$i)))
+        ip netns exec "$testns" taskset $mask ping -4 127.0.0.1 -fq > /dev/null &
+        ip netns exec "$testns" taskset $mask ping -6 ::1 -fq > /dev/null &
+done)
+
+sleep 1
+
+for i in $(seq 1 10) ; do ip netns exec "$testns" nft -f "$tmp" & done
+
+for table in $tables;do
+	randsleep=$((RANDOM%10))
+	sleep $randsleep
+	ip netns exec "$testns" nft delete table inet $table 2>/dev/null
+done
+
+randsleep=$((RANDOM%10))
+sleep $randsleep
+
+pkill -9 ping
+
+wait
+
+rm -f "$tmp"
+ip netns del "$testns"
-- 
GitLab


From 447750f281abef547be44fdcfe3bc4447b3115a8 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sun, 4 Nov 2018 12:07:14 +0100
Subject: [PATCH 0999/1890] netfilter: nf_tables: don't use position attribute
 on rule replacement

Its possible to set both HANDLE and POSITION when replacing a rule.
In this case, the rule at POSITION gets replaced using the
userspace-provided handle.  Rule handles are supposed to be generated
by the kernel only.

Duplicate handles should be harmless, however better disable this "feature"
by only checking for the POSITION attribute on insert operations.

Fixes: 5e94846686d0 ("netfilter: nf_tables: add insert operation")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index dd577e7d100cb..e496030fdc3b8 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2589,17 +2589,14 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
 
 		if (chain->use == UINT_MAX)
 			return -EOVERFLOW;
-	}
-
-	if (nla[NFTA_RULE_POSITION]) {
-		if (!(nlh->nlmsg_flags & NLM_F_CREATE))
-			return -EOPNOTSUPP;
 
-		pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION]));
-		old_rule = __nft_rule_lookup(chain, pos_handle);
-		if (IS_ERR(old_rule)) {
-			NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION]);
-			return PTR_ERR(old_rule);
+		if (nla[NFTA_RULE_POSITION]) {
+			pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION]));
+			old_rule = __nft_rule_lookup(chain, pos_handle);
+			if (IS_ERR(old_rule)) {
+				NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION]);
+				return PTR_ERR(old_rule);
+			}
 		}
 	}
 
-- 
GitLab


From 18e962ac0781bcb70d433de3b2a325ff792b4288 Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Mon, 12 Nov 2018 00:08:46 -0800
Subject: [PATCH 1000/1890] kyber: fix wrong strlcpy() size in
 trace_kyber_latency()

When copying to the latency type, we should be passing LATENCY_TYPE_LEN,
not DOMAIN_LEN (this isn't a problem in practice because we only pass
"total" or "I/O"). Fix it by changing all of the strlcpy() calls to use
sizeof().

Fixes: 6c3b7af1c975 ("kyber: add tracepoints")
Reported-by: Jordan Glover <Golden_Miller83@protonmail.ch>
Tested-by: Jordan Glover <Golden_Miller83@protonmail.ch>
Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/trace/events/kyber.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/trace/events/kyber.h b/include/trace/events/kyber.h
index a9834c37ac400..c0e7d24ca2568 100644
--- a/include/trace/events/kyber.h
+++ b/include/trace/events/kyber.h
@@ -31,8 +31,8 @@ TRACE_EVENT(kyber_latency,
 
 	TP_fast_assign(
 		__entry->dev		= disk_devt(dev_to_disk(kobj_to_dev(q->kobj.parent)));
-		strlcpy(__entry->domain, domain, DOMAIN_LEN);
-		strlcpy(__entry->type, type, DOMAIN_LEN);
+		strlcpy(__entry->domain, domain, sizeof(__entry->domain));
+		strlcpy(__entry->type, type, sizeof(__entry->type));
 		__entry->percentile	= percentile;
 		__entry->numerator	= numerator;
 		__entry->denominator	= denominator;
@@ -60,7 +60,7 @@ TRACE_EVENT(kyber_adjust,
 
 	TP_fast_assign(
 		__entry->dev		= disk_devt(dev_to_disk(kobj_to_dev(q->kobj.parent)));
-		strlcpy(__entry->domain, domain, DOMAIN_LEN);
+		strlcpy(__entry->domain, domain, sizeof(__entry->domain));
 		__entry->depth		= depth;
 	),
 
@@ -82,7 +82,7 @@ TRACE_EVENT(kyber_throttled,
 
 	TP_fast_assign(
 		__entry->dev		= disk_devt(dev_to_disk(kobj_to_dev(q->kobj.parent)));
-		strlcpy(__entry->domain, domain, DOMAIN_LEN);
+		strlcpy(__entry->domain, domain, sizeof(__entry->domain));
 	),
 
 	TP_printk("%d,%d %s", MAJOR(__entry->dev), MINOR(__entry->dev),
-- 
GitLab


From fb50c09e923870a358d68b0d58891bd145b8d7c7 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Mon, 12 Nov 2018 14:00:12 +0100
Subject: [PATCH 1001/1890] perf tools: Fix crash on synthesizing the unit

Adam reported a record command crash for simple session like:

  $ perf record -e cpu-clock ls

with following backtrace:

  Program received signal SIGSEGV, Segmentation fault.
  3543            ev = event_update_event__new(size + 1, PERF_EVENT_UPDATE__UNIT, evsel->id[0]);
  (gdb) bt
  #0  perf_event__synthesize_event_update_unit
  #1  0x000000000051e469 in perf_event__synthesize_extra_attr
  #2  0x00000000004445cb in record__synthesize
  #3  0x0000000000444bc5 in __cmd_record
  ...

We synthesize an update event that needs to touch the evsel id array,
which is not defined at that time. Fix this by forcing the id allocation
for events with their unit defined.

Reflecting possible read_format ID bit in the attr tests.

Reported-by: Yongxin Liu <yongxin.liu@outlook.com>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Adam Lee <leeadamrobert@gmail.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=201477
Fixes: bfd8f72c2778 ("perf record: Synthesize unit/scale/... in event update")
Link: http://lkml.kernel.org/r/20181112130012.5424-1-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/attr/base-record | 2 +-
 tools/perf/util/evsel.c           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/attr/base-record
index 37940665f736c..efd0157b9d223 100644
--- a/tools/perf/tests/attr/base-record
+++ b/tools/perf/tests/attr/base-record
@@ -9,7 +9,7 @@ size=112
 config=0
 sample_period=*
 sample_type=263
-read_format=0
+read_format=0|4
 disabled=1
 inherit=1
 pinned=0
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index d37bb1566cd9d..dbc0466db3680 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1092,7 +1092,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
 		attr->exclude_user   = 1;
 	}
 
-	if (evsel->own_cpus)
+	if (evsel->own_cpus || evsel->unit)
 		evsel->attr.read_format |= PERF_FORMAT_ID;
 
 	/*
-- 
GitLab


From 77e461d14ed141253573eeeb4d34eccc51e38328 Mon Sep 17 00:00:00 2001
From: Sudarsana Reddy Kalluru <sudarsana.kalluru@cavium.com>
Date: Sun, 11 Nov 2018 18:27:34 -0800
Subject: [PATCH 1002/1890] bnx2x: Assign unique DMAE channel number for FW
 DMAE transactions.

Driver assigns DMAE channel 0 for FW as part of START_RAMROD command. FW
uses this channel for DMAE operations (e.g., TIME_SYNC implementation).
Driver also uses the same channel 0 for DMAE operations for some of the PFs
(e.g., PF0 on Port0). This could lead to concurrent access to the DMAE
channel by FW and driver which is not legal. Hence need to assign unique
DMAE id for FW.
Currently following DMAE channels are used by the clients,
  MFW - OCBB/OCSD functionality uses DMAE channel 14/15
  Driver 0-3 and 8-11 (for PF dmae operations)
         4 and 12 (for stats requests)
Assigning unique dmae_id '13' to the FW.

Changes from previous version:
------------------------------
v2: Incorporated the review comments.

Signed-off-by: Sudarsana Reddy Kalluru <Sudarsana.Kalluru@cavium.com>
Signed-off-by: Michal Kalderon <Michal.Kalderon@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x.h    | 7 +++++++
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c | 1 +
 2 files changed, 8 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index be1506169076f..0de487a8f0eb2 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -2191,6 +2191,13 @@ void bnx2x_igu_clear_sb_gen(struct bnx2x *bp, u8 func, u8 idu_sb_id,
 #define PMF_DMAE_C(bp)			(BP_PORT(bp) * MAX_DMAE_C_PER_PORT + \
 					 E1HVN_MAX)
 
+/* Following is the DMAE channel number allocation for the clients.
+ *   MFW: OCBB/OCSD implementations use DMAE channels 14/15 respectively.
+ *   Driver: 0-3 and 8-11 (for PF dmae operations)
+ *           4 and 12 (for stats requests)
+ */
+#define BNX2X_FW_DMAE_C                 13 /* Channel for FW DMAE operations */
+
 /* PCIE link and speed */
 #define PCICFG_LINK_WIDTH		0x1f00000
 #define PCICFG_LINK_WIDTH_SHIFT		20
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
index 3f4d2c8da21a3..a9eaaf3e73a4c 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
@@ -6149,6 +6149,7 @@ static inline int bnx2x_func_send_start(struct bnx2x *bp,
 	rdata->sd_vlan_tag	= cpu_to_le16(start_params->sd_vlan_tag);
 	rdata->path_id		= BP_PATH(bp);
 	rdata->network_cos_mode	= start_params->network_cos_mode;
+	rdata->dmae_cmd_id	= BNX2X_FW_DMAE_C;
 
 	rdata->vxlan_dst_port	= cpu_to_le16(start_params->vxlan_dst_port);
 	rdata->geneve_dst_port	= cpu_to_le16(start_params->geneve_dst_port);
-- 
GitLab


From ca474b73896bf6e0c1eb8787eb217b0f80221610 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.com>
Date: Mon, 12 Nov 2018 10:35:25 -0700
Subject: [PATCH 1003/1890] block: copy ioprio in __bio_clone_fast() and bounce

We need to copy the io priority, too; otherwise the clone will run
with a different priority than the original one.

Fixes: 43b62ce3ff0a ("block: move bio io prio to a new field")
Signed-off-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Jean Delvare <jdelvare@suse.de>

Fixed up subject, and ordered stores.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c    | 1 +
 block/bounce.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/block/bio.c b/block/bio.c
index a50d59236b197..4f4d9884443b6 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -605,6 +605,7 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
 	if (bio_flagged(bio_src, BIO_THROTTLED))
 		bio_set_flag(bio, BIO_THROTTLED);
 	bio->bi_opf = bio_src->bi_opf;
+	bio->bi_ioprio = bio_src->bi_ioprio;
 	bio->bi_write_hint = bio_src->bi_write_hint;
 	bio->bi_iter = bio_src->bi_iter;
 	bio->bi_io_vec = bio_src->bi_io_vec;
diff --git a/block/bounce.c b/block/bounce.c
index 36869afc258cc..559c55bda040e 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -248,6 +248,7 @@ static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask,
 		return NULL;
 	bio->bi_disk		= bio_src->bi_disk;
 	bio->bi_opf		= bio_src->bi_opf;
+	bio->bi_ioprio		= bio_src->bi_ioprio;
 	bio->bi_write_hint	= bio_src->bi_write_hint;
 	bio->bi_iter.bi_sector	= bio_src->bi_iter.bi_sector;
 	bio->bi_iter.bi_size	= bio_src->bi_iter.bi_size;
-- 
GitLab


From 2c519f583e84eb578d4db48e38160f58aafad2ac Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Date: Wed, 10 Oct 2018 16:46:17 +0200
Subject: [PATCH 1004/1890] ARC: remove redundant 'default n' from Kconfig

'default n' is the default value for any bool or tristate Kconfig
setting so there is no need to write it explicitly.

Also since commit f467c5640c29 ("kconfig: only write '# CONFIG_FOO
is not set' for visible symbols") the Kconfig behavior is the same
regardless of 'default n' being present or not:

    ...
    One side effect of (and the main motivation for) this change is making
    the following two definitions behave exactly the same:

        config FOO
                bool

        config FOO
                bool
                default n

    With this change, neither of these will generate a
    '# CONFIG_FOO is not set' line (assuming FOO isn't selected/implied).
    That might make it clearer to people that a bare 'default n' is
    redundant.
    ...

Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/Kconfig | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index c9e2a13235363..5fcbda6b37ccf 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -176,13 +176,11 @@ endchoice
 
 config CPU_BIG_ENDIAN
 	bool "Enable Big Endian Mode"
-	default n
 	help
 	  Build kernel for Big Endian Mode of ARC CPU
 
 config SMP
 	bool "Symmetric Multi-Processing"
-	default n
 	select ARC_MCIP if ISA_ARCV2
 	help
 	  This enables support for systems with more than one CPU.
@@ -254,7 +252,6 @@ config ARC_CACHE_PAGES
 config ARC_CACHE_VIPT_ALIASING
 	bool "Support VIPT Aliasing D$"
 	depends on ARC_HAS_DCACHE && ISA_ARCOMPACT
-	default n
 
 endif	#ARC_CACHE
 
@@ -262,7 +259,6 @@ config ARC_HAS_ICCM
 	bool "Use ICCM"
 	help
 	  Single Cycle RAMS to store Fast Path Code
-	default n
 
 config ARC_ICCM_SZ
 	int "ICCM Size in KB"
@@ -273,7 +269,6 @@ config ARC_HAS_DCCM
 	bool "Use DCCM"
 	help
 	  Single Cycle RAMS to store Fast Path Data
-	default n
 
 config ARC_DCCM_SZ
 	int "DCCM Size in KB"
@@ -366,13 +361,11 @@ if ISA_ARCOMPACT
 
 config ARC_COMPACT_IRQ_LEVELS
 	bool "Setup Timer IRQ as high Priority"
-	default n
 	# if SMP, LV2 enabled ONLY if ARC implementation has LV2 re-entrancy
 	depends on !SMP
 
 config ARC_FPU_SAVE_RESTORE
 	bool "Enable FPU state persistence across context switch"
-	default n
 	help
 	  Double Precision Floating Point unit had dedicated regs which
 	  need to be saved/restored across context-switch.
@@ -453,7 +446,6 @@ config HIGHMEM
 
 config ARC_HAS_PAE40
 	bool "Support for the 40-bit Physical Address Extension"
-	default n
 	depends on ISA_ARCV2
 	select HIGHMEM
 	select PHYS_ADDR_T_64BIT
@@ -496,7 +488,6 @@ config HZ
 
 config ARC_METAWARE_HLINK
 	bool "Support for Metaware debugger assisted Host access"
-	default n
 	help
 	  This options allows a Linux userland apps to directly access
 	  host file system (open/creat/read/write etc) with help from
@@ -524,13 +515,11 @@ config ARC_DW2_UNWIND
 
 config ARC_DBG_TLB_PARANOIA
 	bool "Paranoia Checks in Low Level TLB Handlers"
-	default n
 
 endif
 
 config ARC_UBOOT_SUPPORT
 	bool "Support uboot arg Handling"
-	default n
 	help
 	  ARC Linux by default checks for uboot provided args as pointers to
 	  external cmdline or DTB. This however breaks in absence of uboot,
-- 
GitLab


From 3624379d90ad2b65f9dbb30d7f7ce5498d2fe322 Mon Sep 17 00:00:00 2001
From: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
Date: Thu, 4 Oct 2018 16:12:12 +0300
Subject: [PATCH 1005/1890] ARC: IOC: panic if kernel was started with
 previously enabled IOC

If IOC was already enabled (due to bootloader) it technically needs to
be reconfigured with aperture base,size corresponding to Linux memory map
which will certainly be different than uboot's. But disabling and
reenabling IOC when DMA might be potentially active is tricky business.
To avoid random memory issues later, just panic here and ask user to
upgrade bootloader to one which doesn't enable IOC

This was actually seen as issue on some of the HSDK board with a version
of uboot which enabled IOC. There were random issues later with starting
of X or peripherals etc.

Also while I'm at it, replace hardcoded bits in ARC_REG_IO_COH_PARTIAL
and ARC_REG_IO_COH_ENABLE registers by definitions.

Inspired by: https://lkml.org/lkml/2018/1/19/557
Signed-off-by: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/cache.h |  2 ++
 arch/arc/mm/cache.c          | 20 +++++++++++++++++---
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
index ff7d3232764a2..f393b663413e4 100644
--- a/arch/arc/include/asm/cache.h
+++ b/arch/arc/include/asm/cache.h
@@ -113,7 +113,9 @@ extern unsigned long perip_base, perip_end;
 
 /* IO coherency related Auxiliary registers */
 #define ARC_REG_IO_COH_ENABLE	0x500
+#define ARC_IO_COH_ENABLE_BIT	BIT(0)
 #define ARC_REG_IO_COH_PARTIAL	0x501
+#define ARC_IO_COH_PARTIAL_BIT	BIT(0)
 #define ARC_REG_IO_COH_AP0_BASE	0x508
 #define ARC_REG_IO_COH_AP0_SIZE	0x509
 
diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index f2701c13a66b2..cf9619d4efb4f 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -1144,6 +1144,20 @@ noinline void __init arc_ioc_setup(void)
 {
 	unsigned int ioc_base, mem_sz;
 
+	/*
+	 * If IOC was already enabled (due to bootloader) it technically needs to
+	 * be reconfigured with aperture base,size corresponding to Linux memory map
+	 * which will certainly be different than uboot's. But disabling and
+	 * reenabling IOC when DMA might be potentially active is tricky business.
+	 * To avoid random memory issues later, just panic here and ask user to
+	 * upgrade bootloader to one which doesn't enable IOC
+	 */
+	if (read_aux_reg(ARC_REG_IO_COH_ENABLE) & ARC_IO_COH_ENABLE_BIT)
+		panic("IOC already enabled, please upgrade bootloader!\n");
+
+	if (!ioc_enable)
+		return;
+
 	/*
 	 * As for today we don't support both IOC and ZONE_HIGHMEM enabled
 	 * simultaneously. This happens because as of today IOC aperture covers
@@ -1187,8 +1201,8 @@ noinline void __init arc_ioc_setup(void)
 		panic("IOC Aperture start must be aligned to the size of the aperture");
 
 	write_aux_reg(ARC_REG_IO_COH_AP0_BASE, ioc_base >> 12);
-	write_aux_reg(ARC_REG_IO_COH_PARTIAL, 1);
-	write_aux_reg(ARC_REG_IO_COH_ENABLE, 1);
+	write_aux_reg(ARC_REG_IO_COH_PARTIAL, ARC_IO_COH_PARTIAL_BIT);
+	write_aux_reg(ARC_REG_IO_COH_ENABLE, ARC_IO_COH_ENABLE_BIT);
 
 	/* Re-enable L1 dcache */
 	__dc_enable();
@@ -1265,7 +1279,7 @@ void __init arc_cache_init_master(void)
 	if (is_isa_arcv2() && l2_line_sz && !slc_enable)
 		arc_slc_disable();
 
-	if (is_isa_arcv2() && ioc_enable)
+	if (is_isa_arcv2() && ioc_exists)
 		arc_ioc_setup();
 
 	if (is_isa_arcv2() && l2_line_sz && slc_enable) {
-- 
GitLab


From afba5d157fe1f2e64bc3df46fe83841657ec8fdd Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Mon, 29 Oct 2018 09:44:18 -0700
Subject: [PATCH 1006/1890] ARCv2: boot log unaligned access in use

ARC gcc 8.x generates unaligned accesses by default, so call that out

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/kernel/setup.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index b2cae79a25d71..eea8c5ce63350 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -243,7 +243,7 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len)
 {
 	struct cpuinfo_arc *cpu = &cpuinfo_arc700[cpu_id];
 	struct bcr_identity *core = &cpu->core;
-	int i, n = 0;
+	int i, n = 0, ua = 0;
 
 	FIX_PTR(cpu);
 
@@ -263,10 +263,13 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len)
 		       IS_AVAIL2(cpu->extn.rtc, "RTC [UP 64-bit] ", CONFIG_ARC_TIMERS_64BIT),
 		       IS_AVAIL2(cpu->extn.gfrc, "GFRC [SMP 64-bit] ", CONFIG_ARC_TIMERS_64BIT));
 
-	n += i = scnprintf(buf + n, len - n, "%s%s%s%s%s",
+#ifdef __ARC_UNALIGNED__
+	ua = 1;
+#endif
+	n += i = scnprintf(buf + n, len - n, "%s%s%s%s%s%s",
 			   IS_AVAIL2(cpu->isa.atomic, "atomic ", CONFIG_ARC_HAS_LLSC),
 			   IS_AVAIL2(cpu->isa.ldd, "ll64 ", CONFIG_ARC_HAS_LL64),
-			   IS_AVAIL1(cpu->isa.unalign, "unalign (not used)"));
+			   IS_AVAIL1(cpu->isa.unalign, "unalign "), IS_USED_RUN(ua));
 
 	if (i)
 		n += scnprintf(buf + n, len - n, "\n\t\t: ");
-- 
GitLab


From 4592f11e47a2b28562d6cfe165d5ea7495ff4dca Mon Sep 17 00:00:00 2001
From: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
Date: Tue, 23 Oct 2018 15:09:19 +0300
Subject: [PATCH 1007/1890] ARC: [plat-hsdk] Enable DW APB GPIO support

Enable GPIO support on HSDK. HSDK SoC includes Synopsys
DesignWare DW_apb_gpio IP with 24 GPIOs mapped onto port A.

Signed-off-by: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
Acked-by: Alexey Brodkin <abrodkin@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/boot/dts/hsdk.dts      | 15 +++++++++++++++
 arch/arc/configs/hsdk_defconfig |  3 +++
 2 files changed, 18 insertions(+)

diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts
index ef149f59929ae..43f17b51ee89c 100644
--- a/arch/arc/boot/dts/hsdk.dts
+++ b/arch/arc/boot/dts/hsdk.dts
@@ -222,6 +222,21 @@ mmc@a000 {
 			bus-width = <4>;
 			dma-coherent;
 		};
+
+		gpio: gpio@3000 {
+			compatible = "snps,dw-apb-gpio";
+			reg = <0x3000 0x20>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			gpio_port_a: gpio-controller@0 {
+				compatible = "snps,dw-apb-gpio-port";
+				gpio-controller;
+				#gpio-cells = <2>;
+				snps,nr-gpios = <24>;
+				reg = <0>;
+			};
+		};
 	};
 
 	memory@80000000 {
diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig
index 1dec2b4bc5e6e..eca10b8baea5a 100644
--- a/arch/arc/configs/hsdk_defconfig
+++ b/arch/arc/configs/hsdk_defconfig
@@ -45,6 +45,9 @@ CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_8250_DW=y
 CONFIG_SERIAL_OF_PLATFORM=y
 # CONFIG_HW_RANDOM is not set
+CONFIG_GPIOLIB=y
+CONFIG_GPIO_SYSFS=y
+CONFIG_GPIO_DWAPB=y
 # CONFIG_HWMON is not set
 CONFIG_DRM=y
 # CONFIG_DRM_FBDEV_EMULATION is not set
-- 
GitLab


From 121e38e5acdc8e1e4cdb750fcdcc72f94e420968 Mon Sep 17 00:00:00 2001
From: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
Date: Wed, 7 Nov 2018 15:12:49 +0300
Subject: [PATCH 1008/1890] ARC: mm: fix uninitialised signal code in
 do_page_fault

Commit 15773ae938d8 ("signal/arc: Use force_sig_fault where
appropriate") introduced undefined behaviour by leaving si_code
unitiailized and leaking random kernel values to user space.

Fixes: 15773ae938d8 ("signal/arc: Use force_sig_fault where appropriate")
Signed-off-by: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/mm/fault.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
index c9da6102eb4fb..e2d9fc3fea01e 100644
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@@ -66,7 +66,7 @@ void do_page_fault(unsigned long address, struct pt_regs *regs)
 	struct vm_area_struct *vma = NULL;
 	struct task_struct *tsk = current;
 	struct mm_struct *mm = tsk->mm;
-	int si_code;
+	int si_code = 0;
 	int ret;
 	vm_fault_t fault;
 	int write = regs->ecr_cause & ECR_C_PROTV_STORE;  /* ST/EX */
-- 
GitLab


From 82fba2df7f7c019627f24c5036dc99f41731d770 Mon Sep 17 00:00:00 2001
From: Aaro Koskinen <aaro.koskinen@iki.fi>
Date: Sun, 11 Nov 2018 00:06:12 +0200
Subject: [PATCH 1009/1890] MIPS: OCTEON: cavium_octeon_defconfig: re-enable
 OCTEON USB driver

Re-enable OCTEON USB driver which is needed on older hardware
(e.g. EdgeRouter Lite) for mass storage etc. This got accidentally
deleted when config options were changed for OCTEON2/3 USB.

Signed-off-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Fixes: f922bc0ad08b ("MIPS: Octeon: cavium_octeon_defconfig: Enable more drivers")
Patchwork: https://patchwork.linux-mips.org/patch/21077/
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Cc: stable@vger.kernel.org # 4.14+
---
 arch/mips/configs/cavium_octeon_defconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/mips/configs/cavium_octeon_defconfig b/arch/mips/configs/cavium_octeon_defconfig
index 490b12af103c1..c52d0efacd146 100644
--- a/arch/mips/configs/cavium_octeon_defconfig
+++ b/arch/mips/configs/cavium_octeon_defconfig
@@ -140,6 +140,7 @@ CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_DS1307=y
 CONFIG_STAGING=y
 CONFIG_OCTEON_ETHERNET=y
+CONFIG_OCTEON_USB=y
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_RAS=y
 CONFIG_EXT4_FS=y
-- 
GitLab


From 25517ed4e99b3be4244dfd61d1e5c753b09faf2c Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhc@lemote.com>
Date: Sat, 10 Nov 2018 11:50:14 +0800
Subject: [PATCH 1010/1890] MIPS: Let early memblock_alloc*() allocate memories
 bottom-up

After switched to NO_BOOTMEM, there are several boot failures. Some of
them have been fixed and some of them haven't. I find that many of them
are because of memory allocations are top-down, while the old behavior
is bottom-up. This patch let early memblock_alloc*() allocate memories
bottom-up to avoid some potential problems.

Signed-off-by: Huacai Chen <chenhc@lemote.com>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Fixes: bcec54bf3118 ("mips: switch to NO_BOOTMEM")
Patchwork: https://patchwork.linux-mips.org/patch/21069/
References: https://patchwork.linux-mips.org/patch/21031/
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: James Hogan <james.hogan@mips.com>
Cc: Steven J . Hill <Steven.Hill@cavium.com>
Cc: linux-mips@linux-mips.org
Cc: Fuxin Zhang <zhangfx@lemote.com>
Cc: Zhangjin Wu <wuzhangjin@gmail.com>
---
 arch/mips/kernel/setup.c | 1 +
 arch/mips/kernel/traps.c | 3 +--
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index ea09ed6a80a9f..8c6c48ed786a1 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -794,6 +794,7 @@ static void __init arch_mem_init(char **cmdline_p)
 
 	/* call board setup routine */
 	plat_mem_setup();
+	memblock_set_bottom_up(true);
 
 	/*
 	 * Make sure all kernel memory is in the maps.  The "UP" and
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 0f852e1b58919..15e103c6d799e 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -2260,10 +2260,8 @@ void __init trap_init(void)
 		unsigned long size = 0x200 + VECTORSPACING*64;
 		phys_addr_t ebase_pa;
 
-		memblock_set_bottom_up(true);
 		ebase = (unsigned long)
 			memblock_alloc_from(size, 1 << fls(size), 0);
-		memblock_set_bottom_up(false);
 
 		/*
 		 * Try to ensure ebase resides in KSeg0 if possible.
@@ -2307,6 +2305,7 @@ void __init trap_init(void)
 	if (board_ebase_setup)
 		board_ebase_setup();
 	per_cpu_trap_init(true);
+	memblock_set_bottom_up(false);
 
 	/*
 	 * Copy the generic exception handlers to their final destination.
-- 
GitLab


From 410b5c7b48368317af95f0113692561d01d8144e Mon Sep 17 00:00:00 2001
From: Diego Viola <diego.viola@gmail.com>
Date: Mon, 12 Nov 2018 17:22:52 -0200
Subject: [PATCH 1011/1890] libata: blacklist SAMSUNG MZ7TD256HAFV-000L9 SSD

med_power_with_dipm still causes freezes after updating the firmware to
the latest version (DXT04L5Q).

Set model_rev to NULL and blacklist the device.

Cc: stable@vger.kernel.org
Signed-off-by: Diego Viola <diego.viola@gmail.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/ata/libata-core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 6e594644cb1d3..a7f5202a48152 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4553,7 +4553,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
 	/* These specific Samsung models/firmware-revs do not handle LPM well */
 	{ "SAMSUNG MZMPC128HBFU-000MV", "CXM14M1Q", ATA_HORKAGE_NOLPM, },
 	{ "SAMSUNG SSD PM830 mSATA *",  "CXM13D1Q", ATA_HORKAGE_NOLPM, },
-	{ "SAMSUNG MZ7TD256HAFV-000L9", "DXT02L5Q", ATA_HORKAGE_NOLPM, },
+	{ "SAMSUNG MZ7TD256HAFV-000L9", NULL,       ATA_HORKAGE_NOLPM, },
 
 	/* devices that don't properly handle queued TRIM commands */
 	{ "Micron_M500IT_*",		"MU01",	ATA_HORKAGE_NO_NCQ_TRIM |
-- 
GitLab


From d55bda1b3e7c5a87f10da54fdda866a9a9cef30b Mon Sep 17 00:00:00 2001
From: Christian Hoff <christian_hoff@gmx.net>
Date: Mon, 12 Nov 2018 11:11:29 -0800
Subject: [PATCH 1012/1890] Input: matrix_keypad - check for errors from
 of_get_named_gpio()

"of_get_named_gpio()" returns a negative error value if it fails
and drivers should check for this. This missing check was now
added to the matrix_keypad driver.

In my case "of_get_named_gpio()" returned -EPROBE_DEFER because
the referenced GPIOs belong to an I/O expander, which was not yet
probed at the point in time when the matrix_keypad driver was
loading. Because the driver did not check for errors from the
"of_get_named_gpio()" routine, it was assuming that "-EPROBE_DEFER"
is actually a GPIO number and continued as usual, which led to further
errors like this later on:

WARNING: CPU: 3 PID: 167 at drivers/gpio/gpiolib.c:114
gpio_to_desc+0xc8/0xd0
invalid GPIO -517

Note that the "GPIO number" -517 in the error message above is
actually "-EPROBE_DEFER".

As part of the patch a misleading error message "no platform data defined"
was also removed. This does not lead to information loss because the other
error paths in matrix_keypad_parse_dt() already print an error.

Signed-off-by: Christian Hoff <christian_hoff@gmx.net>
Suggested-by: Sebastian Reichel <sre@kernel.org>
Reviewed-by: Sebastian Reichel <sre@kernel.org>
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/keyboard/matrix_keypad.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/drivers/input/keyboard/matrix_keypad.c b/drivers/input/keyboard/matrix_keypad.c
index 41614c1859182..782dda68d93ae 100644
--- a/drivers/input/keyboard/matrix_keypad.c
+++ b/drivers/input/keyboard/matrix_keypad.c
@@ -407,7 +407,7 @@ matrix_keypad_parse_dt(struct device *dev)
 	struct matrix_keypad_platform_data *pdata;
 	struct device_node *np = dev->of_node;
 	unsigned int *gpios;
-	int i, nrow, ncol;
+	int ret, i, nrow, ncol;
 
 	if (!np) {
 		dev_err(dev, "device lacks DT data\n");
@@ -452,12 +452,19 @@ matrix_keypad_parse_dt(struct device *dev)
 		return ERR_PTR(-ENOMEM);
 	}
 
-	for (i = 0; i < pdata->num_row_gpios; i++)
-		gpios[i] = of_get_named_gpio(np, "row-gpios", i);
+	for (i = 0; i < nrow; i++) {
+		ret = of_get_named_gpio(np, "row-gpios", i);
+		if (ret < 0)
+			return ERR_PTR(ret);
+		gpios[i] = ret;
+	}
 
-	for (i = 0; i < pdata->num_col_gpios; i++)
-		gpios[pdata->num_row_gpios + i] =
-			of_get_named_gpio(np, "col-gpios", i);
+	for (i = 0; i < ncol; i++) {
+		ret = of_get_named_gpio(np, "col-gpios", i);
+		if (ret < 0)
+			return ERR_PTR(ret);
+		gpios[nrow + i] = ret;
+	}
 
 	pdata->row_gpios = gpios;
 	pdata->col_gpios = &gpios[pdata->num_row_gpios];
@@ -484,10 +491,8 @@ static int matrix_keypad_probe(struct platform_device *pdev)
 	pdata = dev_get_platdata(&pdev->dev);
 	if (!pdata) {
 		pdata = matrix_keypad_parse_dt(&pdev->dev);
-		if (IS_ERR(pdata)) {
-			dev_err(&pdev->dev, "no platform data defined\n");
+		if (IS_ERR(pdata))
 			return PTR_ERR(pdata);
-		}
 	} else if (!pdata->keymap_data) {
 		dev_err(&pdev->dev, "no keymap data defined\n");
 		return -EINVAL;
-- 
GitLab


From 3ed64da3b790be7c63601e8ca6341b7dff74a660 Mon Sep 17 00:00:00 2001
From: Patrick Gaskin <patrick@pgaskin.net>
Date: Mon, 12 Nov 2018 11:12:24 -0800
Subject: [PATCH 1013/1890] Input: elan_i2c - add ELAN0620 to the ACPI table

Add ELAN0620 to the ACPI table to support the elan touchpad in
the Lenovo IdeaPad 130-15IKB.

Signed-off-by: Patrick Gaskin <patrick@pgaskin.net>
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/mouse/elan_i2c_core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c
index b0f9d19b3410a..99227807a5840 100644
--- a/drivers/input/mouse/elan_i2c_core.c
+++ b/drivers/input/mouse/elan_i2c_core.c
@@ -1348,6 +1348,7 @@ static const struct acpi_device_id elan_acpi_id[] = {
 	{ "ELAN0618", 0 },
 	{ "ELAN061C", 0 },
 	{ "ELAN061D", 0 },
+	{ "ELAN0620", 0 },
 	{ "ELAN0622", 0 },
 	{ "ELAN1000", 0 },
 	{ }
-- 
GitLab


From ac5722c1643a2fb75224c79b578214956d34f989 Mon Sep 17 00:00:00 2001
From: Brian Norris <briannorris@chromium.org>
Date: Mon, 12 Nov 2018 11:23:39 -0800
Subject: [PATCH 1014/1890] Input: cros_ec_keyb - fix button/switch capability
 reports

The cros_ec_keyb_bs array lists buttons and switches together, expecting
that its users will match the appropriate type and bit fields. But
cros_ec_keyb_register_bs() only checks the 'bit' field, which causes
misreported input capabilities in some cases. For example, tablets
(e.g., Scarlet -- a.k.a. Acer Chromebook Tab 10) were reporting a SW_LID
capability, because EC_MKBP_POWER_BUTTON and EC_MKBP_LID_OPEN happen to
share the same bit.

(This has comedic effect on a tablet, in which a power-management daemon
then thinks this "lid" is closed, and so puts the system to sleep as
soon as it boots!)

To fix this, check both the 'ev_type' and 'bit' fields before reporting
the capability.

Tested with a lid (Kevin / Samsung Chromebook Plus) and without a lid
(Scarlet / Acer Chromebook Tab 10).

This error got introduced when porting the feature from the downstream
Chromium OS kernel to be upstreamed.

Fixes: cdd7950e7aa4 ("input: cros_ec_keyb: Add non-matrix buttons and switches")
Cc: <stable@vger.kernel.org>
Signed-off-by: Brian Norris <briannorris@chromium.org>
Reviewed-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/keyboard/cros_ec_keyb.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/input/keyboard/cros_ec_keyb.c b/drivers/input/keyboard/cros_ec_keyb.c
index 81be6f781f0b6..d560011815983 100644
--- a/drivers/input/keyboard/cros_ec_keyb.c
+++ b/drivers/input/keyboard/cros_ec_keyb.c
@@ -493,7 +493,8 @@ static int cros_ec_keyb_register_bs(struct cros_ec_keyb *ckdev)
 	for (i = 0; i < ARRAY_SIZE(cros_ec_keyb_bs); i++) {
 		const struct cros_ec_bs_map *map = &cros_ec_keyb_bs[i];
 
-		if (buttons & BIT(map->bit))
+		if ((map->ev_type == EV_KEY && (buttons & BIT(map->bit))) ||
+		    (map->ev_type == EV_SW && (switches & BIT(map->bit))))
 			input_set_capability(idev, map->ev_type, map->code);
 	}
 
-- 
GitLab


From 6c4b88288abf908d6fe9fc71fdfeb69cb4135193 Mon Sep 17 00:00:00 2001
From: Ding Tao <miyatsu@qq.com>
Date: Mon, 12 Nov 2018 11:27:11 -0800
Subject: [PATCH 1015/1890] Input: dt-bindings - fix a typo in file
 input-reset.txt

Replace sysrq-reset-seq with keyset.

Signed-off-by: Ding Tao <miyatsu@qq.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 Documentation/devicetree/bindings/input/input-reset.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/input/input-reset.txt b/Documentation/devicetree/bindings/input/input-reset.txt
index 2bb2626fdb78b..1ca6cc5ebf8ed 100644
--- a/Documentation/devicetree/bindings/input/input-reset.txt
+++ b/Documentation/devicetree/bindings/input/input-reset.txt
@@ -12,7 +12,7 @@ The /chosen node should contain a 'linux,sysrq-reset-seq' child node to define
 a set of keys.
 
 Required property:
-sysrq-reset-seq: array of Linux keycodes, one keycode per cell.
+keyset: array of Linux keycodes, one keycode per cell.
 
 Optional property:
 timeout-ms: duration keys must be pressed together in milliseconds before
-- 
GitLab


From dda2af7418db9dd51913cb71d9ec9266d4f7924d Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 12 Nov 2018 11:29:27 -0800
Subject: [PATCH 1016/1890] Input: migor_ts - convert to SPDX identifiers

This patch updates license to use SPDX-License-Identifier
instead of verbose license text.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/touchscreen/migor_ts.c | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

diff --git a/drivers/input/touchscreen/migor_ts.c b/drivers/input/touchscreen/migor_ts.c
index 02fb119858197..42d3fd7e04d7c 100644
--- a/drivers/input/touchscreen/migor_ts.c
+++ b/drivers/input/touchscreen/migor_ts.c
@@ -1,23 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * Touch Screen driver for Renesas MIGO-R Platform
  *
  * Copyright (c) 2008 Magnus Damm
  * Copyright (c) 2007 Ujjwal Pande <ujjwal@kenati.com>,
  *  Kenati Technologies Pvt Ltd.
- *
- * This file is free software; you can redistribute it and/or
- * modify it under the terms of the GNU  General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This file is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 #include <linux/module.h>
 #include <linux/kernel.h>
-- 
GitLab


From 4aeba6418a160d8e58501cd57800252ba349ad1a Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 12 Nov 2018 11:30:06 -0800
Subject: [PATCH 1017/1890] Input: st1232 - convert to SPDX identifiers

This patch updates license to use SPDX-License-Identifier
instead of verbose license text.

As original license mentioned, it is GPL-2.0 in SPDX.
Then, MODULE_LICENSE() should be "GPL v2" instead of "GPL".
See ${LINUX}/include/linux/module.h

	"GPL"		[GNU Public License v2 or later]
	"GPL v2"	[GNU Public License v2]

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/touchscreen/st1232.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/drivers/input/touchscreen/st1232.c b/drivers/input/touchscreen/st1232.c
index b71673911aac3..11ff32c680250 100644
--- a/drivers/input/touchscreen/st1232.c
+++ b/drivers/input/touchscreen/st1232.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * ST1232 Touchscreen Controller Driver
  *
@@ -7,15 +8,6 @@
  * Using code from:
  *  - android.git.kernel.org: projects/kernel/common.git: synaptics_i2c_rmi.c
  *	Copyright (C) 2007 Google, Inc.
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/delay.h>
@@ -295,4 +287,4 @@ module_i2c_driver(st1232_ts_driver);
 
 MODULE_AUTHOR("Tony SIM <chinyeow.sim.xt@renesas.com>");
 MODULE_DESCRIPTION("SITRONIX ST1232 Touchscreen Controller Driver");
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("GPL v2");
-- 
GitLab


From e94b9f12fa2ceb4d5067aa2a7580b02850fb272e Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 12 Nov 2018 11:41:04 -0800
Subject: [PATCH 1018/1890] Input: atkbd - clean up indentation issue

Trivial fix to clean up indentation issues, add missing tab

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/keyboard/atkbd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c
index 7e75835e220f2..850bb259c20eb 100644
--- a/drivers/input/keyboard/atkbd.c
+++ b/drivers/input/keyboard/atkbd.c
@@ -841,7 +841,7 @@ static int atkbd_select_set(struct atkbd *atkbd, int target_set, int allow_extra
 	if (param[0] != 3) {
 		param[0] = 2;
 		if (ps2_command(ps2dev, param, ATKBD_CMD_SSCANSET))
-		return 2;
+			return 2;
 	}
 
 	ps2_command(ps2dev, param, ATKBD_CMD_SETALL_MBR);
-- 
GitLab


From 5581c670fb7ec267fc79215c6d5176b07e5f6dad Mon Sep 17 00:00:00 2001
From: shaoyunl <shaoyun.liu@amd.com>
Date: Mon, 12 Nov 2018 11:19:24 -0500
Subject: [PATCH 1019/1890] drm/amdgpu: set system aperture to cover whole FB
 region
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In XGMI configuration, the FB region covers vram region from peer
device, adjust system aperture to cover all of them

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Christian KÃ¶nig <christian.koenig@amd.com>
Signed-off-by: shaoyunl <shaoyun.liu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 6 +++---
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c  | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
index ceb7847b504f7..bfa317ad20a95 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -72,7 +72,7 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
 
 	/* Program the system aperture low logical page number. */
 	WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
-		     min(adev->gmc.vram_start, adev->gmc.agp_start) >> 18);
+		     min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
 
 	if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8)
 		/*
@@ -82,11 +82,11 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
 		 * to get rid of the VM fault and hardware hang.
 		 */
 		WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
-			     max((adev->gmc.vram_end >> 18) + 0x1,
+			     max((adev->gmc.fb_end >> 18) + 0x1,
 				 adev->gmc.agp_end >> 18));
 	else
 		WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
-			     max(adev->gmc.vram_end, adev->gmc.agp_end) >> 18);
+			     max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
 
 	/* Set default page address. */
 	value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index fd23ba1226a57..a0db67adc34ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -90,7 +90,7 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
 
 	/* Program the system aperture low logical page number. */
 	WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
-		     min(adev->gmc.vram_start, adev->gmc.agp_start) >> 18);
+		     min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
 
 	if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8)
 		/*
@@ -100,11 +100,11 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
 		 * to get rid of the VM fault and hardware hang.
 		 */
 		WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
-			     max((adev->gmc.vram_end >> 18) + 0x1,
+			     max((adev->gmc.fb_end >> 18) + 0x1,
 				 adev->gmc.agp_end >> 18));
 	else
 		WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
-			     max(adev->gmc.vram_end, adev->gmc.agp_end) >> 18);
+			     max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
 
 	/* Set default page address. */
 	value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start +
-- 
GitLab


From 21a446cf186570168b7281b154b1993968598aca Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Mon, 5 Nov 2018 11:10:50 -0500
Subject: [PATCH 1020/1890] NFSv4: Don't exit the state manager without
 clearing NFS4CLNT_MANAGER_RUNNING

If we exit the NFSv4 state manager due to a umount, then we can end up
leaving the NFS4CLNT_MANAGER_RUNNING flag set. If another mount causes
the nfs4_client to be rereferenced before it is destroyed, then we end
up never being able to recover state.

Fixes: 47c2199b6eb5 ("NFSv4.1: Ensure state manager thread dies on last ...")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Cc: stable@vger.kernel.org # v4.15+
---
 fs/nfs/nfs4state.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 62ae0fd345ad6..98d1b6a6646a0 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -2601,11 +2601,12 @@ static void nfs4_state_manager(struct nfs_client *clp)
 		nfs4_clear_state_manager_bit(clp);
 		/* Did we race with an attempt to give us more work? */
 		if (clp->cl_state == 0)
-			break;
+			return;
 		if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
-			break;
+			return;
 	} while (refcount_read(&clp->cl_count) > 1);
-	return;
+	goto out_drain;
+
 out_error:
 	if (strlen(section))
 		section_sep = ": ";
@@ -2613,6 +2614,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
 			" with error %d\n", section_sep, section,
 			clp->cl_hostname, -status);
 	ssleep(1);
+out_drain:
 	nfs4_end_drain_session(clp);
 	nfs4_clear_state_manager_bit(clp);
 }
-- 
GitLab


From a1aa09be21fa344d1f5585aab8164bfae55f57e3 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Mon, 5 Nov 2018 12:17:01 -0500
Subject: [PATCH 1021/1890] NFSv4: Ensure that the state manager exits the loop
 on SIGKILL

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/nfs4state.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 98d1b6a6646a0..ffea578853949 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -2604,7 +2604,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
 			return;
 		if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
 			return;
-	} while (refcount_read(&clp->cl_count) > 1);
+	} while (refcount_read(&clp->cl_count) > 1 && !signalled());
 	goto out_drain;
 
 out_error:
-- 
GitLab


From a652a4bc21695a57c3b8d13d222a6f8b41f100aa Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Mon, 12 Nov 2018 15:30:52 -0500
Subject: [PATCH 1022/1890] SUNRPC: Fix a Oops when destroying the RPCSEC_GSS
 credential cache

Commit 07d02a67b7fa causes a use-after free in the RPCSEC_GSS credential
destroy code, because the call to get_rpccred() in gss_destroying_context()
will now always fail to increment the refcount.

While we could just replace the get_rpccred() with a refcount_set(), that
would have the unfortunate consequence of resurrecting a credential in
the credential cache for which we are in the process of destroying the
RPCSEC_GSS context. Rather than do this, we choose to make a copy that
is never added to the cache and use that to destroy the context.

Fixes: 07d02a67b7fa ("SUNRPC: Simplify lookup code")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 net/sunrpc/auth_gss/auth_gss.c | 61 +++++++++++++++++++++++-----------
 1 file changed, 42 insertions(+), 19 deletions(-)

diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 30f970cdc7f66..5d3f252659f19 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1239,36 +1239,59 @@ gss_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
 	return &gss_auth->rpc_auth;
 }
 
+static struct gss_cred *
+gss_dup_cred(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
+{
+	struct gss_cred *new;
+
+	/* Make a copy of the cred so that we can reference count it */
+	new = kzalloc(sizeof(*gss_cred), GFP_NOIO);
+	if (new) {
+		struct auth_cred acred = {
+			.uid = gss_cred->gc_base.cr_uid,
+		};
+		struct gss_cl_ctx *ctx =
+			rcu_dereference_protected(gss_cred->gc_ctx, 1);
+
+		rpcauth_init_cred(&new->gc_base, &acred,
+				&gss_auth->rpc_auth,
+				&gss_nullops);
+		new->gc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE;
+		new->gc_service = gss_cred->gc_service;
+		new->gc_principal = gss_cred->gc_principal;
+		kref_get(&gss_auth->kref);
+		rcu_assign_pointer(new->gc_ctx, ctx);
+		gss_get_ctx(ctx);
+	}
+	return new;
+}
+
 /*
- * gss_destroying_context will cause the RPCSEC_GSS to send a NULL RPC call
+ * gss_send_destroy_context will cause the RPCSEC_GSS to send a NULL RPC call
  * to the server with the GSS control procedure field set to
  * RPC_GSS_PROC_DESTROY. This should normally cause the server to release
  * all RPCSEC_GSS state associated with that context.
  */
-static int
-gss_destroying_context(struct rpc_cred *cred)
+static void
+gss_send_destroy_context(struct rpc_cred *cred)
 {
 	struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base);
 	struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth);
 	struct gss_cl_ctx *ctx = rcu_dereference_protected(gss_cred->gc_ctx, 1);
+	struct gss_cred *new;
 	struct rpc_task *task;
 
-	if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) == 0)
-		return 0;
-
-	ctx->gc_proc = RPC_GSS_PROC_DESTROY;
-	cred->cr_ops = &gss_nullops;
-
-	/* Take a reference to ensure the cred will be destroyed either
-	 * by the RPC call or by the put_rpccred() below */
-	get_rpccred(cred);
+	new = gss_dup_cred(gss_auth, gss_cred);
+	if (new) {
+		ctx->gc_proc = RPC_GSS_PROC_DESTROY;
 
-	task = rpc_call_null(gss_auth->client, cred, RPC_TASK_ASYNC|RPC_TASK_SOFT);
-	if (!IS_ERR(task))
-		rpc_put_task(task);
+		task = rpc_call_null(gss_auth->client, &new->gc_base,
+				RPC_TASK_ASYNC|RPC_TASK_SOFT);
+		if (!IS_ERR(task))
+			rpc_put_task(task);
 
-	put_rpccred(cred);
-	return 1;
+		put_rpccred(&new->gc_base);
+	}
 }
 
 /* gss_destroy_cred (and gss_free_ctx) are used to clean up after failure
@@ -1330,8 +1353,8 @@ static void
 gss_destroy_cred(struct rpc_cred *cred)
 {
 
-	if (gss_destroying_context(cred))
-		return;
+	if (test_and_clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0)
+		gss_send_destroy_context(cred);
 	gss_destroy_nullcred(cred);
 }
 
-- 
GitLab


From e3d5e573a54dabdc0f9f3cb039d799323372b251 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Mon, 12 Nov 2018 16:06:51 -0500
Subject: [PATCH 1023/1890] SUNRPC: Fix a bogus get/put in
 generic_key_to_expire()

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 net/sunrpc/auth_generic.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index d8831b988b1e7..ab4a3be1542a0 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -281,13 +281,7 @@ static bool generic_key_to_expire(struct rpc_cred *cred)
 {
 	struct auth_cred *acred = &container_of(cred, struct generic_cred,
 						gc_base)->acred;
-	bool ret;
-
-	get_rpccred(cred);
-	ret = test_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags);
-	put_rpccred(cred);
-
-	return ret;
+	return test_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags);
 }
 
 static const struct rpc_credops generic_credops = {
-- 
GitLab


From f8397d69daef06d358430d3054662fb597e37c00 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Tue, 6 Nov 2018 16:40:20 +0200
Subject: [PATCH 1024/1890] btrfs: Always try all copies when reading extent
 buffers

When a metadata read is served the endio routine btree_readpage_end_io_hook
is called which eventually runs the tree-checker. If tree-checker fails
to validate the read eb then it sets EXTENT_BUFFER_CORRUPT flag. This
leads to btree_read_extent_buffer_pages wrongly assuming that all
available copies of this extent buffer are wrong and failing prematurely.
Fix this modify btree_read_extent_buffer_pages to read all copies of
the data.

This failure was exhibitted in xfstests btrfs/124 which would
spuriously fail its balance operations. The reason was that when balance
was run following re-introduction of the missing raid1 disk
__btrfs_map_block would map the read request to stripe 0, which
corresponded to devid 2 (the disk which is being removed in the test):

    item 2 key (FIRST_CHUNK_TREE CHUNK_ITEM 3553624064) itemoff 15975 itemsize 112
	length 1073741824 owner 2 stripe_len 65536 type DATA|RAID1
	io_align 65536 io_width 65536 sector_size 4096
	num_stripes 2 sub_stripes 1
		stripe 0 devid 2 offset 2156920832
		dev_uuid 8466c350-ed0c-4c3b-b17d-6379b445d5c8
		stripe 1 devid 1 offset 3553624064
		dev_uuid 1265d8db-5596-477e-af03-df08eb38d2ca

This caused read requests for a checksum item that to be routed to the
stale disk which triggered the aforementioned logic involving
EXTENT_BUFFER_CORRUPT flag. This then triggered cascading failures of
the balance operation.

Fixes: a826d6dcb32d ("Btrfs: check items for correctness as we search")
CC: stable@vger.kernel.org # 4.4+
Suggested-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 3f0b6d1936e8e..6d776717d8b39 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -477,9 +477,9 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info,
 	int mirror_num = 0;
 	int failed_mirror = 0;
 
-	clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
 	io_tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
 	while (1) {
+		clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
 		ret = read_extent_buffer_pages(io_tree, eb, WAIT_COMPLETE,
 					       mirror_num);
 		if (!ret) {
@@ -493,15 +493,6 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info,
 				break;
 		}
 
-		/*
-		 * This buffer's crc is fine, but its contents are corrupted, so
-		 * there is no reason to read the other copies, they won't be
-		 * any less wrong.
-		 */
-		if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags) ||
-		    ret == -EUCLEAN)
-			break;
-
 		num_copies = btrfs_num_copies(fs_info,
 					      eb->start, eb->len);
 		if (num_copies == 1)
-- 
GitLab


From 4ab49461d9d9b8274cc72d8656fe685ed394b8f7 Mon Sep 17 00:00:00 2001
From: Anup Patel <anup@brainfault.org>
Date: Thu, 1 Nov 2018 10:40:33 +0530
Subject: [PATCH 1025/1890] RISC-V: defconfig: Enable printk timestamps

The printk timestamps are very useful information to visually see
where kernel is spending time during boot. It also helps us see
the timing of hotplug events at runtime.

This patch enables printk timestamps in RISC-V defconfig so that
we have it enabled by default (similar to other architectures
such as x86_64, arm64, etc).

Signed-off-by: Anup Patel <anup@brainfault.org>
Acked-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 arch/riscv/configs/defconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index 07fa9ea75fea1..ef4f15df9adf0 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -76,4 +76,5 @@ CONFIG_NFS_V4_1=y
 CONFIG_NFS_V4_2=y
 CONFIG_ROOT_NFS=y
 CONFIG_CRYPTO_USER_API_HASH=y
+CONFIG_PRINTK_TIME=y
 # CONFIG_RCU_TRACE is not set
-- 
GitLab


From 10febb3ecace4b557eaa0d52c9d2c3531c1a715a Mon Sep 17 00:00:00 2001
From: David Abdurachmanov <david.abdurachmanov@gmail.com>
Date: Mon, 5 Nov 2018 15:40:04 +0100
Subject: [PATCH 1026/1890] riscv: fix spacing in struct pt_regs

Replace 8 spaces with tab to match styling.

Signed-off-by: David Abdurachmanov <david.abdurachmanov@gmail.com>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 arch/riscv/include/asm/ptrace.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/riscv/include/asm/ptrace.h b/arch/riscv/include/asm/ptrace.h
index 2c5df945d43c9..bbe1862e8f80c 100644
--- a/arch/riscv/include/asm/ptrace.h
+++ b/arch/riscv/include/asm/ptrace.h
@@ -56,8 +56,8 @@ struct pt_regs {
 	unsigned long sstatus;
 	unsigned long sbadaddr;
 	unsigned long scause;
-        /* a0 value before the syscall */
-        unsigned long orig_a0;
+	/* a0 value before the syscall */
+	unsigned long orig_a0;
 };
 
 #ifdef CONFIG_64BIT
-- 
GitLab


From f157d411a9eb170d2ee6b766da7a381962017cc9 Mon Sep 17 00:00:00 2001
From: David Abdurachmanov <david.abdurachmanov@gmail.com>
Date: Mon, 5 Nov 2018 15:35:37 +0100
Subject: [PATCH 1027/1890] riscv: add missing vdso_install target

Building kernel 4.20 for Fedora as RPM fails, because riscv is missing
vdso_install target in arch/riscv/Makefile.

Signed-off-by: David Abdurachmanov <david.abdurachmanov@gmail.com>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 arch/riscv/Makefile | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index d10146197533a..4af153a182b07 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -77,4 +77,8 @@ core-y += arch/riscv/kernel/ arch/riscv/mm/
 
 libs-y += arch/riscv/lib/
 
+PHONY += vdso_install
+vdso_install:
+	$(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso $@
+
 all: vmlinux
-- 
GitLab


From 85d90b91807bb0c4a0fcff6a144e73f11cda782a Mon Sep 17 00:00:00 2001
From: Olof Johansson <olof@lixom.net>
Date: Tue, 30 Oct 2018 23:47:07 -0700
Subject: [PATCH 1028/1890] RISC-V: lib: Fix build error for 64-bit

Fixes the following build error from tinyconfig:

riscv64-unknown-linux-gnu-ld: kernel/sched/fair.o: in function `.L8':
fair.c:(.text+0x70): undefined reference to `__lshrti3'
riscv64-unknown-linux-gnu-ld: kernel/time/clocksource.o: in function `.L0 ':
clocksource.c:(.text+0x334): undefined reference to `__lshrti3'

Fixes: 7f47c73b355f ("RISC-V: Build tishift only on 64-bit")
Signed-off-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 arch/riscv/lib/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index 5739bd05d289e..4e2e600f7d538 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -3,6 +3,6 @@ lib-y	+= memcpy.o
 lib-y	+= memset.o
 lib-y	+= uaccess.o
 
-lib-(CONFIG_64BIT) += tishift.o
+lib-$(CONFIG_64BIT) += tishift.o
 
 lib-$(CONFIG_32BIT) += udivdi3.o
-- 
GitLab


From ef3a61406618291c46da168ff91acaa28d85944c Mon Sep 17 00:00:00 2001
From: Olof Johansson <olof@lixom.net>
Date: Tue, 30 Oct 2018 23:47:09 -0700
Subject: [PATCH 1029/1890] RISC-V: Silence some module warnings on 32-bit

Fixes:

arch/riscv/kernel/module.c: In function 'apply_r_riscv_32_rela':
./include/linux/kern_levels.h:5:18: warning: format '%llx' expects argument of type 'long long unsigned int', but argument 3 has type 'Elf32_Addr' {aka 'unsigned int'} [-Wformat=]
arch/riscv/kernel/module.c:23:27: note: format string is defined here
arch/riscv/kernel/module.c: In function 'apply_r_riscv_pcrel_hi20_rela':
./include/linux/kern_levels.h:5:18: warning: format '%llx' expects argument of type 'long long unsigned int', but argument 3 has type 'Elf32_Addr' {aka 'unsigned int'} [-Wformat=]
arch/riscv/kernel/module.c:104:23: note: format string is defined here
arch/riscv/kernel/module.c: In function 'apply_r_riscv_hi20_rela':
./include/linux/kern_levels.h:5:18: warning: format '%llx' expects argument of type 'long long unsigned int', but argument 3 has type 'Elf32_Addr' {aka 'unsigned int'} [-Wformat=]
arch/riscv/kernel/module.c:146:23: note: format string is defined here
arch/riscv/kernel/module.c: In function 'apply_r_riscv_got_hi20_rela':
./include/linux/kern_levels.h:5:18: warning: format '%llx' expects argument of type 'long long unsigned int', but argument 3 has type 'Elf32_Addr' {aka 'unsigned int'} [-Wformat=]
arch/riscv/kernel/module.c:190:60: note: format string is defined here
arch/riscv/kernel/module.c: In function 'apply_r_riscv_call_plt_rela':
./include/linux/kern_levels.h:5:18: warning: format '%llx' expects argument of type 'long long unsigned int', but argument 3 has type 'Elf32_Addr' {aka 'unsigned int'} [-Wformat=]
arch/riscv/kernel/module.c:214:24: note: format string is defined here
arch/riscv/kernel/module.c: In function 'apply_r_riscv_call_rela':
./include/linux/kern_levels.h:5:18: warning: format '%llx' expects argument of type 'long long unsigned int', but argument 3 has type 'Elf32_Addr' {aka 'unsigned int'} [-Wformat=]
arch/riscv/kernel/module.c:236:23: note: format string is defined here

Signed-off-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 arch/riscv/kernel/module.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c
index 3303ed2cd4193..7dd308129b40f 100644
--- a/arch/riscv/kernel/module.c
+++ b/arch/riscv/kernel/module.c
@@ -21,7 +21,7 @@ static int apply_r_riscv_32_rela(struct module *me, u32 *location, Elf_Addr v)
 {
 	if (v != (u32)v) {
 		pr_err("%s: value %016llx out of range for 32-bit field\n",
-		       me->name, v);
+		       me->name, (long long)v);
 		return -EINVAL;
 	}
 	*location = v;
@@ -102,7 +102,7 @@ static int apply_r_riscv_pcrel_hi20_rela(struct module *me, u32 *location,
 	if (offset != (s32)offset) {
 		pr_err(
 		  "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n",
-		  me->name, v, location);
+		  me->name, (long long)v, location);
 		return -EINVAL;
 	}
 
@@ -144,7 +144,7 @@ static int apply_r_riscv_hi20_rela(struct module *me, u32 *location,
 	if (IS_ENABLED(CMODEL_MEDLOW)) {
 		pr_err(
 		  "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n",
-		  me->name, v, location);
+		  me->name, (long long)v, location);
 		return -EINVAL;
 	}
 
@@ -188,7 +188,7 @@ static int apply_r_riscv_got_hi20_rela(struct module *me, u32 *location,
 	} else {
 		pr_err(
 		  "%s: can not generate the GOT entry for symbol = %016llx from PC = %p\n",
-		  me->name, v, location);
+		  me->name, (long long)v, location);
 		return -EINVAL;
 	}
 
@@ -212,7 +212,7 @@ static int apply_r_riscv_call_plt_rela(struct module *me, u32 *location,
 		} else {
 			pr_err(
 			  "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n",
-			  me->name, v, location);
+			  me->name, (long long)v, location);
 			return -EINVAL;
 		}
 	}
@@ -234,7 +234,7 @@ static int apply_r_riscv_call_rela(struct module *me, u32 *location,
 	if (offset != fill_v) {
 		pr_err(
 		  "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n",
-		  me->name, v, location);
+		  me->name, (long long)v, location);
 		return -EINVAL;
 	}
 
-- 
GitLab


From ba038546ff9e15d54d1134b5c5d2355648c00dec Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Wed, 7 Nov 2018 10:04:22 +0530
Subject: [PATCH 1030/1890] opp: ti-opp-supply: Dynamically update u_volt_min

The voltage range (min, max) provided in the device tree is from
the data manual and is pretty big, catering to a wide range of devices.
On a i2c read/write failure the regulator_set_voltage_triplet function
falls back to set voltage between min and max. The min value from Device
Tree can be lesser than the optimal value and in that case that can lead
to a hang or crash. Hence set the u_volt_min dynamically to the optimal
voltage value.

Cc: 4.16+ <stable@vger.kernel.org> # v4.16+
Fixes: 9a835fa6e47 ("PM / OPP: Add ti-opp-supply driver")
Signed-off-by: Keerthy <j-keerthy@ti.com>
Acked-by: Dave Gerlach <d-gerlach@ti.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/opp/ti-opp-supply.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/opp/ti-opp-supply.c b/drivers/opp/ti-opp-supply.c
index 9e5a9a3112c9c..29e08a49d13bc 100644
--- a/drivers/opp/ti-opp-supply.c
+++ b/drivers/opp/ti-opp-supply.c
@@ -290,6 +290,9 @@ static int ti_opp_supply_set_opp(struct dev_pm_set_opp_data *data)
 	vdd_uv = _get_optimal_vdd_voltage(dev, &opp_data,
 					  new_supply_vbb->u_volt);
 
+	if (new_supply_vdd->u_volt_min < vdd_uv)
+		new_supply_vdd->u_volt_min = vdd_uv;
+
 	/* Scaling up? Scale voltage before frequency */
 	if (freq > old_freq) {
 		ret = _opp_set_voltage(dev, new_supply_vdd, vdd_uv, vdd_reg,
-- 
GitLab


From 622fecbccfe86a8052dea6b512d3821dcce29994 Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Wed, 7 Nov 2018 10:04:23 +0530
Subject: [PATCH 1031/1890] opp: ti-opp-supply: Correct the supply in
 _get_optimal_vdd_voltage call

_get_optimal_vdd_voltage call provides new_supply_vbb->u_volt
as the reference voltage while it should be really new_supply_vdd->u_volt.

Cc: 4.16+ <stable@vger.kernel.org> # v4.16+
Fixes: 9a835fa6e47 ("PM / OPP: Add ti-opp-supply driver")
Signed-off-by: Keerthy <j-keerthy@ti.com>
Acked-by: Dave Gerlach <d-gerlach@ti.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/opp/ti-opp-supply.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/opp/ti-opp-supply.c b/drivers/opp/ti-opp-supply.c
index 29e08a49d13bc..3f4fb4dbbe33b 100644
--- a/drivers/opp/ti-opp-supply.c
+++ b/drivers/opp/ti-opp-supply.c
@@ -288,7 +288,7 @@ static int ti_opp_supply_set_opp(struct dev_pm_set_opp_data *data)
 	int ret;
 
 	vdd_uv = _get_optimal_vdd_voltage(dev, &opp_data,
-					  new_supply_vbb->u_volt);
+					  new_supply_vdd->u_volt);
 
 	if (new_supply_vdd->u_volt_min < vdd_uv)
 		new_supply_vdd->u_volt_min = vdd_uv;
-- 
GitLab


From 0fbcc5b568edab7d848b7c7fa66d44ffbd4133c0 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Fri, 19 Oct 2018 00:27:57 +0900
Subject: [PATCH 1032/1890] netfilter: xt_RATEEST: remove netns exit routine

xt_rateest_net_exit() was added to check whether rules are flushed
successfully. but ->net_exit() callback is called earlier than
->destroy() callback.
So that ->net_exit() callback can't check that.

test commands:
   %ip netns add vm1
   %ip netns exec vm1 iptables -t mangle -I PREROUTING -p udp \
	   --dport 1111 -j RATEEST --rateest-name ap \
	   --rateest-interval 250ms --rateest-ewma 0.5s
   %ip netns del vm1

splat looks like:
[  668.813518] WARNING: CPU: 0 PID: 87 at net/netfilter/xt_RATEEST.c:210 xt_rateest_net_exit+0x210/0x340 [xt_RATEEST]
[  668.813518] Modules linked in: xt_RATEEST xt_tcpudp iptable_mangle bpfilter ip_tables x_tables
[  668.813518] CPU: 0 PID: 87 Comm: kworker/u4:2 Not tainted 4.19.0-rc7+ #21
[  668.813518] Workqueue: netns cleanup_net
[  668.813518] RIP: 0010:xt_rateest_net_exit+0x210/0x340 [xt_RATEEST]
[  668.813518] Code: 00 48 8b 85 30 ff ff ff 4c 8b 23 80 38 00 0f 85 24 01 00 00 48 8b 85 30 ff ff ff 4d 85 e4 4c 89 a5 58 ff ff ff c6 00 f8 74 b2 <0f> 0b 48 83 c3 08 4c 39 f3 75 b0 48 b8 00 00 00 00 00 fc ff df 49
[  668.813518] RSP: 0018:ffff8801156c73f8 EFLAGS: 00010282
[  668.813518] RAX: ffffed0022ad8e85 RBX: ffff880118928e98 RCX: 5db8012a00000000
[  668.813518] RDX: ffff8801156c7428 RSI: 00000000cb1d185f RDI: ffff880115663b74
[  668.813518] RBP: ffff8801156c74d0 R08: ffff8801156633c0 R09: 1ffff100236440be
[  668.813518] R10: 0000000000000001 R11: ffffed002367d852 R12: ffff880115142b08
[  668.813518] R13: 1ffff10022ad8e81 R14: ffff880118928ea8 R15: dffffc0000000000
[  668.813518] FS:  0000000000000000(0000) GS:ffff88011b200000(0000) knlGS:0000000000000000
[  668.813518] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  668.813518] CR2: 0000563aa69f4f28 CR3: 0000000105a16000 CR4: 00000000001006f0
[  668.813518] Call Trace:
[  668.813518]  ? unregister_netdevice_many+0xe0/0xe0
[  668.813518]  ? xt_rateest_net_init+0x2c0/0x2c0 [xt_RATEEST]
[  668.813518]  ? default_device_exit+0x1ca/0x270
[  668.813518]  ? remove_proc_entry+0x1cd/0x390
[  668.813518]  ? dev_change_net_namespace+0xd00/0xd00
[  668.813518]  ? __init_waitqueue_head+0x130/0x130
[  668.813518]  ops_exit_list.isra.10+0x94/0x140
[  668.813518]  cleanup_net+0x45b/0x900
[  668.813518]  ? net_drop_ns+0x110/0x110
[  668.813518]  ? swapgs_restore_regs_and_return_to_usermode+0x3c/0x80
[  668.813518]  ? save_trace+0x300/0x300
[  668.813518]  ? lock_acquire+0x196/0x470
[  668.813518]  ? lock_acquire+0x196/0x470
[  668.813518]  ? process_one_work+0xb60/0x1de0
[  668.813518]  ? _raw_spin_unlock_irq+0x29/0x40
[  668.813518]  ? _raw_spin_unlock_irq+0x29/0x40
[  668.813518]  ? __lock_acquire+0x4500/0x4500
[  668.813518]  ? __lock_is_held+0xb4/0x140
[  668.813518]  process_one_work+0xc13/0x1de0
[  668.813518]  ? pwq_dec_nr_in_flight+0x3c0/0x3c0
[  668.813518]  ? set_load_weight+0x270/0x270
[ ... ]

Fixes: 3427b2ab63fa ("netfilter: make xt_rateest hash table per net")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/xt_RATEEST.c | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index dec843cadf462..9e05c86ba5c45 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -201,18 +201,8 @@ static __net_init int xt_rateest_net_init(struct net *net)
 	return 0;
 }
 
-static void __net_exit xt_rateest_net_exit(struct net *net)
-{
-	struct xt_rateest_net *xn = net_generic(net, xt_rateest_id);
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(xn->hash); i++)
-		WARN_ON_ONCE(!hlist_empty(&xn->hash[i]));
-}
-
 static struct pernet_operations xt_rateest_net_ops = {
 	.init = xt_rateest_net_init,
-	.exit = xt_rateest_net_exit,
 	.id   = &xt_rateest_id,
 	.size = sizeof(struct xt_rateest_net),
 };
-- 
GitLab


From 29e3880109e357fdc607b4393f8308cef6af9413 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 12 Nov 2018 22:43:45 +0100
Subject: [PATCH 1033/1890] netfilter: nf_tables: fix use-after-free when
 deleting compat expressions

nft_compat ops do not have static storage duration, unlike all other
expressions.

When nf_tables_expr_destroy() returns, expr->ops might have been
free'd already, so we need to store next address before calling
expression destructor.

For same reason, we can't deref match pointer after nft_xt_put().

This can be easily reproduced by adding msleep() before
nft_match_destroy() returns.

Fixes: 0ca743a55991 ("netfilter: nf_tables: add compatibility layer for x_tables")
Reported-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 5 +++--
 net/netfilter/nft_compat.c    | 3 ++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index e496030fdc3b8..ddeaa1990e1e9 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2457,7 +2457,7 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
 static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
 				   struct nft_rule *rule)
 {
-	struct nft_expr *expr;
+	struct nft_expr *expr, *next;
 
 	/*
 	 * Careful: some expressions might not be initialized in case this
@@ -2465,8 +2465,9 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
 	 */
 	expr = nft_expr_first(rule);
 	while (expr != nft_expr_last(rule) && expr->ops) {
+		next = nft_expr_next(expr);
 		nf_tables_expr_destroy(ctx, expr);
-		expr = nft_expr_next(expr);
+		expr = next;
 	}
 	kfree(rule);
 }
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 9d0ede4742240..7334e0b80a5ef 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -520,6 +520,7 @@ __nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr,
 		    void *info)
 {
 	struct xt_match *match = expr->ops->data;
+	struct module *me = match->me;
 	struct xt_mtdtor_param par;
 
 	par.net = ctx->net;
@@ -530,7 +531,7 @@ __nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr,
 		par.match->destroy(&par);
 
 	if (nft_xt_put(container_of(expr->ops, struct nft_xt, ops)))
-		module_put(match->me);
+		module_put(me);
 }
 
 static void
-- 
GitLab


From adf59dd2408c4536d490bee649784f0465562444 Mon Sep 17 00:00:00 2001
From: Jorge Ramirez-Ortiz <jorge.ramirez.ortiz@gmail.com>
Date: Mon, 12 Nov 2018 19:41:09 +0100
Subject: [PATCH 1034/1890] drm/meson: venc: dmt mode must use encp

The video mode for DMT is only populated to support encp.

Signed-off-by: Jorge Ramirez-Ortiz <jorge.ramirez.ortiz@gmail.com>
Acked-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1542048069-22603-1-git-send-email-jramirez@baylibre.com
---
 drivers/gpu/drm/meson/meson_venc.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/meson/meson_venc.c b/drivers/gpu/drm/meson/meson_venc.c
index 514245e69b384..acbbad3e322ca 100644
--- a/drivers/gpu/drm/meson/meson_venc.c
+++ b/drivers/gpu/drm/meson/meson_venc.c
@@ -854,6 +854,13 @@ void meson_venc_hdmi_mode_set(struct meson_drm *priv, int vic,
 	unsigned int sof_lines;
 	unsigned int vsync_lines;
 
+	/* Use VENCI for 480i and 576i and double HDMI pixels */
+	if (mode->flags & DRM_MODE_FLAG_DBLCLK) {
+		hdmi_repeat = true;
+		use_enci = true;
+		venc_hdmi_latency = 1;
+	}
+
 	if (meson_venc_hdmi_supported_vic(vic)) {
 		vmode = meson_venc_hdmi_get_vic_vmode(vic);
 		if (!vmode) {
@@ -865,13 +872,7 @@ void meson_venc_hdmi_mode_set(struct meson_drm *priv, int vic,
 	} else {
 		meson_venc_hdmi_get_dmt_vmode(mode, &vmode_dmt);
 		vmode = &vmode_dmt;
-	}
-
-	/* Use VENCI for 480i and 576i and double HDMI pixels */
-	if (mode->flags & DRM_MODE_FLAG_DBLCLK) {
-		hdmi_repeat = true;
-		use_enci = true;
-		venc_hdmi_latency = 1;
+		use_enci = false;
 	}
 
 	/* Repeat VENC pixels for 480/576i/p, 720p50/60 and 1080p50/60 */
-- 
GitLab


From 52df7837560e51a47903b49243291fb8039653d2 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.ibm.com>
Date: Mon, 15 Oct 2018 13:31:39 +0200
Subject: [PATCH 1035/1890] s390/cio: make vfio_ccw_io_region static

Fix the following sparse warning:
drivers/s390/cio/vfio_ccw_drv.c:25:19: warning: symbol 'vfio_ccw_io_region'
was not declared. Should it be static?

Signed-off-by: Sebastian Ott <sebott@linux.ibm.com>
Message-Id: <alpine.LFD.2.21.1810151328570.1636@schleppi.aag-de.ibmmobiledemo.com>
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
---
 drivers/s390/cio/vfio_ccw_drv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
index f47d16b5810b9..edbf542d82afb 100644
--- a/drivers/s390/cio/vfio_ccw_drv.c
+++ b/drivers/s390/cio/vfio_ccw_drv.c
@@ -22,7 +22,7 @@
 #include "vfio_ccw_private.h"
 
 struct workqueue_struct *vfio_ccw_work_q;
-struct kmem_cache *vfio_ccw_io_region;
+static struct kmem_cache *vfio_ccw_io_region;
 
 /*
  * Helpers
-- 
GitLab


From 55e93ecdc1ef9256279e0a0b08edf72cc47fc2f6 Mon Sep 17 00:00:00 2001
From: Pierre Morel <pmorel@linux.ibm.com>
Date: Thu, 25 Oct 2018 19:15:20 +0200
Subject: [PATCH 1036/1890] vfio: ccw: Register mediated device once all
 structures are initialized

Let's register the mediated device when all the data structures
which could be used are initialized.

Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
Reviewed-by: Eric Farman <farman@linux.ibm.com>
Message-Id: <1540487720-11634-3-git-send-email-pmorel@linux.ibm.com>
Acked-by: Halil Pasic <pasic@linux.ibm.com>
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
---
 drivers/s390/cio/vfio_ccw_drv.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
index edbf542d82afb..a10cec0e86eb4 100644
--- a/drivers/s390/cio/vfio_ccw_drv.c
+++ b/drivers/s390/cio/vfio_ccw_drv.c
@@ -134,14 +134,14 @@ static int vfio_ccw_sch_probe(struct subchannel *sch)
 	if (ret)
 		goto out_free;
 
-	ret = vfio_ccw_mdev_reg(sch);
-	if (ret)
-		goto out_disable;
-
 	INIT_WORK(&private->io_work, vfio_ccw_sch_io_todo);
 	atomic_set(&private->avail, 1);
 	private->state = VFIO_CCW_STATE_STANDBY;
 
+	ret = vfio_ccw_mdev_reg(sch);
+	if (ret)
+		goto out_disable;
+
 	return 0;
 
 out_disable:
-- 
GitLab


From 806212f91c874b24cf9eb4a9f180323671b6c5ed Mon Sep 17 00:00:00 2001
From: Eric Farman <farman@linux.ibm.com>
Date: Fri, 9 Nov 2018 03:39:28 +0100
Subject: [PATCH 1037/1890] s390/cio: Fix cleanup of pfn_array alloc failure

If pfn_array_alloc fails somehow, we need to release the pfn_array_table
that was malloc'd earlier.

Signed-off-by: Eric Farman <farman@linux.ibm.com>
Message-Id: <20181109023937.96105-2-farman@linux.ibm.com>
Acked-by: Halil Pasic <pasic@linux.ibm.com>
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
---
 drivers/s390/cio/vfio_ccw_cp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c
index fd77e46eb3b21..ef5ab45d94b3b 100644
--- a/drivers/s390/cio/vfio_ccw_cp.c
+++ b/drivers/s390/cio/vfio_ccw_cp.c
@@ -528,7 +528,7 @@ static int ccwchain_fetch_direct(struct ccwchain *chain,
 
 	ret = pfn_array_alloc_pin(pat->pat_pa, cp->mdev, ccw->cda, ccw->count);
 	if (ret < 0)
-		goto out_init;
+		goto out_unpin;
 
 	/* Translate this direct ccw to a idal ccw. */
 	idaws = kcalloc(ret, sizeof(*idaws), GFP_DMA | GFP_KERNEL);
-- 
GitLab


From b89e242eee8d4cd8261d8d821c62c5d1efc454d0 Mon Sep 17 00:00:00 2001
From: Eric Farman <farman@linux.ibm.com>
Date: Fri, 9 Nov 2018 03:39:29 +0100
Subject: [PATCH 1038/1890] s390/cio: Fix cleanup when unsupported IDA format
 is used

Direct returns from within a loop are rude, but it doesn't mean it gets
to avoid releasing the memory acquired beforehand.

Signed-off-by: Eric Farman <farman@linux.ibm.com>
Message-Id: <20181109023937.96105-3-farman@linux.ibm.com>
Reviewed-by: Farhan Ali <alifm@linux.ibm.com>
Reviewed-by: Pierre Morel <pmorel@linux.ibm.com>
Acked-by: Halil Pasic <pasic@linux.ibm.com>
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
---
 drivers/s390/cio/vfio_ccw_cp.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c
index ef5ab45d94b3b..70a006ba4d050 100644
--- a/drivers/s390/cio/vfio_ccw_cp.c
+++ b/drivers/s390/cio/vfio_ccw_cp.c
@@ -387,8 +387,10 @@ static int ccwchain_calc_length(u64 iova, struct channel_program *cp)
 		 * orb specified one of the unsupported formats, we defer
 		 * checking for IDAWs in unsupported formats to here.
 		 */
-		if ((!cp->orb.cmd.c64 || cp->orb.cmd.i2k) && ccw_is_idal(ccw))
+		if ((!cp->orb.cmd.c64 || cp->orb.cmd.i2k) && ccw_is_idal(ccw)) {
+			kfree(p);
 			return -EOPNOTSUPP;
+		}
 
 		if ((!ccw_is_chain(ccw)) && (!ccw_is_tic(ccw)))
 			break;
-- 
GitLab


From aab15e8ec25765cf7968c72cbec7583acf99d8a4 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Mon, 12 Nov 2018 10:23:58 +0000
Subject: [PATCH 1039/1890] Btrfs: fix rare chances for data loss when doing a
 fast fsync

After the simplification of the fast fsync patch done recently by commit
b5e6c3e170b7 ("btrfs: always wait on ordered extents at fsync time") and
commit e7175a692765 ("btrfs: remove the wait ordered logic in the
log_one_extent path"), we got a very short time window where we can get
extents logged without writeback completing first or extents logged
without logging the respective data checksums. Both issues can only happen
when doing a non-full (fast) fsync.

As soon as we enter btrfs_sync_file() we trigger writeback, then lock the
inode and then wait for the writeback to complete before starting to log
the inode. However before we acquire the inode's lock and after we started
writeback, it's possible that more writes happened and dirtied more pages.
If that happened and those pages get writeback triggered while we are
logging the inode (for example, the VM subsystem triggering it due to
memory pressure, or another concurrent fsync), we end up seeing the
respective extent maps in the inode's list of modified extents and will
log matching file extent items without waiting for the respective
ordered extents to complete, meaning that either of the following will
happen:

1) We log an extent after its writeback finishes but before its checksums
   are added to the csum tree, leading to -EIO errors when attempting to
   read the extent after a log replay.

2) We log an extent before its writeback finishes.
   Therefore after the log replay we will have a file extent item pointing
   to an unwritten extent (and without the respective data checksums as
   well).

This could not happen before the fast fsync patch simplification, because
for any extent we found in the list of modified extents, we would wait for
its respective ordered extent to finish writeback or collect its checksums
for logging if it did not complete yet.

Fix this by triggering writeback again after acquiring the inode's lock
and before waiting for ordered extents to complete.

Fixes: e7175a692765 ("btrfs: remove the wait ordered logic in the log_one_extent path")
Fixes: b5e6c3e170b7 ("btrfs: always wait on ordered extents at fsync time")
CC: stable@vger.kernel.org # 4.19+
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/file.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 97c7a086f7bd6..b92b7f05c3d5f 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2088,6 +2088,30 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 
 	atomic_inc(&root->log_batch);
 
+	/*
+	 * Before we acquired the inode's lock, someone may have dirtied more
+	 * pages in the target range. We need to make sure that writeback for
+	 * any such pages does not start while we are logging the inode, because
+	 * if it does, any of the following might happen when we are not doing a
+	 * full inode sync:
+	 *
+	 * 1) We log an extent after its writeback finishes but before its
+	 *    checksums are added to the csum tree, leading to -EIO errors
+	 *    when attempting to read the extent after a log replay.
+	 *
+	 * 2) We can end up logging an extent before its writeback finishes.
+	 *    Therefore after the log replay we will have a file extent item
+	 *    pointing to an unwritten extent (and no data checksums as well).
+	 *
+	 * So trigger writeback for any eventual new dirty pages and then we
+	 * wait for all ordered extents to complete below.
+	 */
+	ret = start_ordered_ops(inode, start, end);
+	if (ret) {
+		inode_unlock(inode);
+		goto out;
+	}
+
 	/*
 	 * We have to do this here to avoid the priority inversion of waiting on
 	 * IO of a lower priority task while holding a transaciton open.
-- 
GitLab


From c1d91f86a1b4c9c05854d59c6a0abd5d0f75b849 Mon Sep 17 00:00:00 2001
From: Christoph Muellner <christoph.muellner@theobroma-systems.com>
Date: Tue, 13 Nov 2018 11:25:35 +0100
Subject: [PATCH 1040/1890] arm64: dts: rockchip: Fix PCIe reset polarity for
 rk3399-puma-haikou.

This patch fixes the wrong polarity setting for the PCIe host driver's
pre-reset pin for rk3399-puma-haikou. Without this patch link training
will most likely fail.

Fixes: 60fd9f72ce8a ("arm64: dts: rockchip: add Haikou baseboard with RK3399-Q7 SoM")
Cc: stable@vger.kernel.org
Signed-off-by: Christoph Muellner <christoph.muellner@theobroma-systems.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
---
 arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts
index 2dceeea29b835..1e6a71066c163 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts
@@ -153,7 +153,7 @@ &pcie_phy {
 };
 
 &pcie0 {
-	ep-gpios = <&gpio4 RK_PC6 GPIO_ACTIVE_LOW>;
+	ep-gpios = <&gpio4 RK_PC6 GPIO_ACTIVE_HIGH>;
 	num-lanes = <4>;
 	pinctrl-names = "default";
 	pinctrl-0 = <&pcie_clkreqn_cpm>;
-- 
GitLab


From 0d76bcc960e6057750fcf556b65da13f8bbdfd2b Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Tue, 13 Nov 2018 08:38:17 -0600
Subject: [PATCH 1041/1890] Revert "ACPI/PCI: Pay attention to device-specific
 _PXM node values"

This reverts commit bad7dcd94f3956bcfc0a69ef71fdf0fcca3de4a8.

bad7dcd94f39 ("ACPI/PCI: Pay attention to device-specific _PXM node
values") caused boot failures (no console output at all) for Martin [1]
and Ingo [2] on AMD ThreadRipper systems.

Revert the commit until we figure out how to safely use these
device-specific _PXM values.

[1] https://lore.kernel.org/linux-pci/20180912152140.3676-2-Jonathan.Cameron@huawei.com
[2] https://lore.kernel.org/linux-pci/20181113071712.GA2353@gmail.com
Fixes: bad7dcd94f39 ("ACPI/PCI: Pay attention to device-specific _PXM node values")
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pci-acpi.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 2a4aa64685794..921db6f803403 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -793,15 +793,10 @@ static void pci_acpi_setup(struct device *dev)
 {
 	struct pci_dev *pci_dev = to_pci_dev(dev);
 	struct acpi_device *adev = ACPI_COMPANION(dev);
-	int node;
 
 	if (!adev)
 		return;
 
-	node = acpi_get_node(adev->handle);
-	if (node != NUMA_NO_NODE)
-		set_dev_node(dev, node);
-
 	pci_acpi_optimize_delay(pci_dev, adev->handle);
 
 	pci_acpi_add_pm_notifier(adev, pci_dev);
-- 
GitLab


From c837243ff4017f493c7d6f4ab57278d812a86859 Mon Sep 17 00:00:00 2001
From: Philip Yang <Philip.Yang@amd.com>
Date: Mon, 12 Nov 2018 14:00:45 -0500
Subject: [PATCH 1042/1890] drm/amdgpu: fix bug with IH ring setup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The bug limits the IH ring wptr address to 40bit. When the system memory
is bigger than 1TB, the bus address is more than 40bit, this causes the
interrupt cannot be handled and cleared correctly.

Reviewed-by: Christian KÃ¶nig <christian.koenig@amd.com>
Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index a99f71797aa35..a0fda6f9252a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -129,7 +129,7 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev)
 	else
 		wptr_off = adev->wb.gpu_addr + (adev->irq.ih.wptr_offs * 4);
 	WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO, lower_32_bits(wptr_off));
-	WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_HI, upper_32_bits(wptr_off) & 0xFF);
+	WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_HI, upper_32_bits(wptr_off) & 0xFFFF);
 
 	/* set rptr, wptr to 0 */
 	WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, 0);
-- 
GitLab


From 4d454e9ffdb1ef5a51ebc147b5389c96048db683 Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Tue, 13 Nov 2018 11:15:56 +0800
Subject: [PATCH 1043/1890] drm/amd/pp: Fix truncated clock value when set
 watermark

the clk value should be tranferred to MHz first and
then transfer to uint16. otherwise, the clock value
will be truncated.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reported-by: Hersen Wu <hersenxs.wu@amd.com>
Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 .../gpu/drm/amd/powerplay/hwmgr/smu_helper.c  | 32 +++++++++----------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c
index 99a33c33a32c9..101c09b212ade 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu_helper.c
@@ -713,20 +713,20 @@ int smu_set_watermarks_for_clocks_ranges(void *wt_table,
 	for (i = 0; i < wm_with_clock_ranges->num_wm_dmif_sets; i++) {
 		table->WatermarkRow[1][i].MinClock =
 			cpu_to_le16((uint16_t)
-			(wm_with_clock_ranges->wm_dmif_clocks_ranges[i].wm_min_dcfclk_clk_in_khz) /
-			1000);
+			(wm_with_clock_ranges->wm_dmif_clocks_ranges[i].wm_min_dcfclk_clk_in_khz /
+			1000));
 		table->WatermarkRow[1][i].MaxClock =
 			cpu_to_le16((uint16_t)
-			(wm_with_clock_ranges->wm_dmif_clocks_ranges[i].wm_max_dcfclk_clk_in_khz) /
-			1000);
+			(wm_with_clock_ranges->wm_dmif_clocks_ranges[i].wm_max_dcfclk_clk_in_khz /
+			1000));
 		table->WatermarkRow[1][i].MinUclk =
 			cpu_to_le16((uint16_t)
-			(wm_with_clock_ranges->wm_dmif_clocks_ranges[i].wm_min_mem_clk_in_khz) /
-			1000);
+			(wm_with_clock_ranges->wm_dmif_clocks_ranges[i].wm_min_mem_clk_in_khz /
+			1000));
 		table->WatermarkRow[1][i].MaxUclk =
 			cpu_to_le16((uint16_t)
-			(wm_with_clock_ranges->wm_dmif_clocks_ranges[i].wm_max_mem_clk_in_khz) /
-			1000);
+			(wm_with_clock_ranges->wm_dmif_clocks_ranges[i].wm_max_mem_clk_in_khz /
+			1000));
 		table->WatermarkRow[1][i].WmSetting = (uint8_t)
 				wm_with_clock_ranges->wm_dmif_clocks_ranges[i].wm_set_id;
 	}
@@ -734,20 +734,20 @@ int smu_set_watermarks_for_clocks_ranges(void *wt_table,
 	for (i = 0; i < wm_with_clock_ranges->num_wm_mcif_sets; i++) {
 		table->WatermarkRow[0][i].MinClock =
 			cpu_to_le16((uint16_t)
-			(wm_with_clock_ranges->wm_mcif_clocks_ranges[i].wm_min_socclk_clk_in_khz) /
-			1000);
+			(wm_with_clock_ranges->wm_mcif_clocks_ranges[i].wm_min_socclk_clk_in_khz /
+			1000));
 		table->WatermarkRow[0][i].MaxClock =
 			cpu_to_le16((uint16_t)
-			(wm_with_clock_ranges->wm_mcif_clocks_ranges[i].wm_max_socclk_clk_in_khz) /
-			1000);
+			(wm_with_clock_ranges->wm_mcif_clocks_ranges[i].wm_max_socclk_clk_in_khz /
+			1000));
 		table->WatermarkRow[0][i].MinUclk =
 			cpu_to_le16((uint16_t)
-			(wm_with_clock_ranges->wm_mcif_clocks_ranges[i].wm_min_mem_clk_in_khz) /
-			1000);
+			(wm_with_clock_ranges->wm_mcif_clocks_ranges[i].wm_min_mem_clk_in_khz /
+			1000));
 		table->WatermarkRow[0][i].MaxUclk =
 			cpu_to_le16((uint16_t)
-			(wm_with_clock_ranges->wm_mcif_clocks_ranges[i].wm_max_mem_clk_in_khz) /
-			1000);
+			(wm_with_clock_ranges->wm_mcif_clocks_ranges[i].wm_max_mem_clk_in_khz /
+			1000));
 		table->WatermarkRow[0][i].WmSetting = (uint8_t)
 				wm_with_clock_ranges->wm_mcif_clocks_ranges[i].wm_set_id;
 	}
-- 
GitLab


From c1a17777eb45d9f3821f35e9869c0a08cd2e664e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Mon, 12 Nov 2018 18:08:31 +0100
Subject: [PATCH 1044/1890] drm/amdgpu: fix huge page handling on Vega10
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We accidentially set the huge flag on the parent instead of the childs.
This caused some VM faults under memory pressure.

Signed-off-by: Christian KÃ¶nig <christian.koenig@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Tested-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 352b304090602..dad0e2342df9d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1632,13 +1632,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 			continue;
 		}
 
-		/* First check if the entry is already handled */
-		if (cursor.pfn < frag_start) {
-			cursor.entry->huge = true;
-			amdgpu_vm_pt_next(adev, &cursor);
-			continue;
-		}
-
 		/* If it isn't already handled it can't be a huge page */
 		if (cursor.entry->huge) {
 			/* Add the entry to the relocated list to update it. */
@@ -1701,8 +1694,17 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 			}
 		} while (frag_start < entry_end);
 
-		if (frag >= shift)
+		if (amdgpu_vm_pt_descendant(adev, &cursor)) {
+			/* Mark all child entries as huge */
+			while (cursor.pfn < frag_start) {
+				cursor.entry->huge = true;
+				amdgpu_vm_pt_next(adev, &cursor);
+			}
+
+		} else if (frag >= shift) {
+			/* or just move on to the next on the same level. */
 			amdgpu_vm_pt_next(adev, &cursor);
+		}
 	}
 
 	return 0;
-- 
GitLab


From c138325fb8713472d5a0c3c7258b9131bab40725 Mon Sep 17 00:00:00 2001
From: Ondrej Mosnacek <omosnace@redhat.com>
Date: Tue, 13 Nov 2018 16:16:08 +0100
Subject: [PATCH 1045/1890] selinux: check length properly in SCTP bind hook

selinux_sctp_bind_connect() must verify if the address buffer has
sufficient length before accessing the 'sa_family' field. See
__sctp_connect() for a similar check.

The length of the whole address ('len') is already checked in the
callees.

Reported-by: Qian Cai <cai@gmx.us>
Fixes: d452930fd3b9 ("selinux: Add SCTP support")
Cc: <stable@vger.kernel.org> # 4.17+
Cc: Richard Haines <richard_c_haines@btinternet.com>
Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
Tested-by: Qian Cai <cai@gmx.us>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 security/selinux/hooks.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 18b98b5e1e3c4..fe251c6f09f10 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -5318,6 +5318,9 @@ static int selinux_sctp_bind_connect(struct sock *sk, int optname,
 	addr_buf = address;
 
 	while (walk_size < addrlen) {
+		if (walk_size + sizeof(sa_family_t) > addrlen)
+			return -EINVAL;
+
 		addr = addr_buf;
 		switch (addr->sa_family) {
 		case AF_UNSPEC:
-- 
GitLab


From 9aaa4e8ba12972d674caeefbc5f88d83235dd697 Mon Sep 17 00:00:00 2001
From: Denis Bolotin <denis.bolotin@cavium.com>
Date: Mon, 12 Nov 2018 12:50:20 +0200
Subject: [PATCH 1046/1890] qed: Fix PTT leak in qed_drain()

Release PTT before entering error flow.

Signed-off-by: Denis Bolotin <denis.bolotin@cavium.com>
Signed-off-by: Michal Kalderon <michal.kalderon@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 35fd0db6a6777..fff7f04d4525c 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -1782,9 +1782,9 @@ static int qed_drain(struct qed_dev *cdev)
 			return -EBUSY;
 		}
 		rc = qed_mcp_drain(hwfn, ptt);
+		qed_ptt_release(hwfn, ptt);
 		if (rc)
 			return rc;
-		qed_ptt_release(hwfn, ptt);
 	}
 
 	return 0;
-- 
GitLab


From e90202ed1cf9672c48a363c84a929932ebfe6fc0 Mon Sep 17 00:00:00 2001
From: Denis Bolotin <denis.bolotin@cavium.com>
Date: Mon, 12 Nov 2018 12:50:21 +0200
Subject: [PATCH 1047/1890] qed: Fix overriding offload_tc by protocols without
 APP TLV

The TC received from APP TLV is stored in offload_tc, and should not be
set by protocols which did not receive an APP TLV. Fixed the condition
when overriding the offload_tc.

Signed-off-by: Denis Bolotin <denis.bolotin@cavium.com>
Signed-off-by: Michal Kalderon <michal.kalderon@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_dcbx.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
index 8e8fa823d6118..69966dfc6e3d1 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
@@ -191,7 +191,7 @@ qed_dcbx_dp_protocol(struct qed_hwfn *p_hwfn, struct qed_dcbx_results *p_data)
 static void
 qed_dcbx_set_params(struct qed_dcbx_results *p_data,
 		    struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
-		    bool enable, u8 prio, u8 tc,
+		    bool app_tlv, bool enable, u8 prio, u8 tc,
 		    enum dcbx_protocol_type type,
 		    enum qed_pci_personality personality)
 {
@@ -210,7 +210,7 @@ qed_dcbx_set_params(struct qed_dcbx_results *p_data,
 		p_data->arr[type].dont_add_vlan0 = true;
 
 	/* QM reconf data */
-	if (p_hwfn->hw_info.personality == personality)
+	if (app_tlv && p_hwfn->hw_info.personality == personality)
 		qed_hw_info_set_offload_tc(&p_hwfn->hw_info, tc);
 
 	/* Configure dcbx vlan priority in doorbell block for roce EDPM */
@@ -225,7 +225,7 @@ qed_dcbx_set_params(struct qed_dcbx_results *p_data,
 static void
 qed_dcbx_update_app_info(struct qed_dcbx_results *p_data,
 			 struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
-			 bool enable, u8 prio, u8 tc,
+			 bool app_tlv, bool enable, u8 prio, u8 tc,
 			 enum dcbx_protocol_type type)
 {
 	enum qed_pci_personality personality;
@@ -240,7 +240,7 @@ qed_dcbx_update_app_info(struct qed_dcbx_results *p_data,
 
 		personality = qed_dcbx_app_update[i].personality;
 
-		qed_dcbx_set_params(p_data, p_hwfn, p_ptt, enable,
+		qed_dcbx_set_params(p_data, p_hwfn, p_ptt, app_tlv, enable,
 				    prio, tc, type, personality);
 	}
 }
@@ -319,8 +319,8 @@ qed_dcbx_process_tlv(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 				enable = true;
 			}
 
-			qed_dcbx_update_app_info(p_data, p_hwfn, p_ptt, enable,
-						 priority, tc, type);
+			qed_dcbx_update_app_info(p_data, p_hwfn, p_ptt, true,
+						 enable, priority, tc, type);
 		}
 	}
 
@@ -341,7 +341,7 @@ qed_dcbx_process_tlv(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 			continue;
 
 		enable = (type == DCBX_PROTOCOL_ETH) ? false : !!dcbx_version;
-		qed_dcbx_update_app_info(p_data, p_hwfn, p_ptt, enable,
+		qed_dcbx_update_app_info(p_data, p_hwfn, p_ptt, false, enable,
 					 priority, tc, type);
 	}
 
-- 
GitLab


From 291d57f67d2449737d1e370ab5b9a583818eaa0c Mon Sep 17 00:00:00 2001
From: Michal Kalderon <michal.kalderon@cavium.com>
Date: Mon, 12 Nov 2018 12:50:22 +0200
Subject: [PATCH 1048/1890] qed: Fix rdma_info structure allocation

Certain flows need to access the rdma-info structure, for example dcbx
update flows. In some cases there can be a race between the allocation or
deallocation of the structure which was done in roce start / roce stop and
an asynchrounous dcbx event that tries to access the structure.
For this reason, we move the allocation of the rdma_info structure to be
similar to the iscsi/fcoe info structures which are allocated during device
setup.
We add a new field of "active" to the struct to define whether roce has
already been started or not, and this is checked instead of whether the
pointer to the info structure.

Fixes: 51ff17251c9c ("qed: Add support for RoCE hw init")
Signed-off-by: Michal Kalderon <michal.kalderon@cavium.com>
Signed-off-by: Denis Bolotin <denis.bolotin@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_dev.c  | 15 +++++--
 drivers/net/ethernet/qlogic/qed/qed_rdma.c | 50 +++++++++++++---------
 drivers/net/ethernet/qlogic/qed/qed_rdma.h |  5 +++
 3 files changed, 45 insertions(+), 25 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 7ceb2b97538d2..cff141077558c 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -185,6 +185,10 @@ void qed_resc_free(struct qed_dev *cdev)
 			qed_iscsi_free(p_hwfn);
 			qed_ooo_free(p_hwfn);
 		}
+
+		if (QED_IS_RDMA_PERSONALITY(p_hwfn))
+			qed_rdma_info_free(p_hwfn);
+
 		qed_iov_free(p_hwfn);
 		qed_l2_free(p_hwfn);
 		qed_dmae_info_free(p_hwfn);
@@ -1081,6 +1085,12 @@ int qed_resc_alloc(struct qed_dev *cdev)
 				goto alloc_err;
 		}
 
+		if (QED_IS_RDMA_PERSONALITY(p_hwfn)) {
+			rc = qed_rdma_info_alloc(p_hwfn);
+			if (rc)
+				goto alloc_err;
+		}
+
 		/* DMA info initialization */
 		rc = qed_dmae_info_alloc(p_hwfn);
 		if (rc)
@@ -2102,11 +2112,8 @@ int qed_hw_start_fastpath(struct qed_hwfn *p_hwfn)
 	if (!p_ptt)
 		return -EAGAIN;
 
-	/* If roce info is allocated it means roce is initialized and should
-	 * be enabled in searcher.
-	 */
 	if (p_hwfn->p_rdma_info &&
-	    p_hwfn->b_rdma_enabled_in_prs)
+	    p_hwfn->p_rdma_info->active && p_hwfn->b_rdma_enabled_in_prs)
 		qed_wr(p_hwfn, p_ptt, p_hwfn->rdma_prs_search_reg, 0x1);
 
 	/* Re-open incoming traffic */
diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
index 62113438c8809..7873d6dfd91f5 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
@@ -140,22 +140,34 @@ static u32 qed_rdma_get_sb_id(void *p_hwfn, u32 rel_sb_id)
 	return FEAT_NUM((struct qed_hwfn *)p_hwfn, QED_PF_L2_QUE) + rel_sb_id;
 }
 
-static int qed_rdma_alloc(struct qed_hwfn *p_hwfn,
-			  struct qed_ptt *p_ptt,
-			  struct qed_rdma_start_in_params *params)
+int qed_rdma_info_alloc(struct qed_hwfn *p_hwfn)
 {
 	struct qed_rdma_info *p_rdma_info;
-	u32 num_cons, num_tasks;
-	int rc = -ENOMEM;
 
-	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocating RDMA\n");
-
-	/* Allocate a struct with current pf rdma info */
 	p_rdma_info = kzalloc(sizeof(*p_rdma_info), GFP_KERNEL);
 	if (!p_rdma_info)
-		return rc;
+		return -ENOMEM;
+
+	spin_lock_init(&p_rdma_info->lock);
 
 	p_hwfn->p_rdma_info = p_rdma_info;
+	return 0;
+}
+
+void qed_rdma_info_free(struct qed_hwfn *p_hwfn)
+{
+	kfree(p_hwfn->p_rdma_info);
+	p_hwfn->p_rdma_info = NULL;
+}
+
+static int qed_rdma_alloc(struct qed_hwfn *p_hwfn)
+{
+	struct qed_rdma_info *p_rdma_info = p_hwfn->p_rdma_info;
+	u32 num_cons, num_tasks;
+	int rc = -ENOMEM;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocating RDMA\n");
+
 	if (QED_IS_IWARP_PERSONALITY(p_hwfn))
 		p_rdma_info->proto = PROTOCOLID_IWARP;
 	else
@@ -183,7 +195,7 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn,
 	/* Allocate a struct with device params and fill it */
 	p_rdma_info->dev = kzalloc(sizeof(*p_rdma_info->dev), GFP_KERNEL);
 	if (!p_rdma_info->dev)
-		goto free_rdma_info;
+		return rc;
 
 	/* Allocate a struct with port params and fill it */
 	p_rdma_info->port = kzalloc(sizeof(*p_rdma_info->port), GFP_KERNEL);
@@ -298,8 +310,6 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn,
 	kfree(p_rdma_info->port);
 free_rdma_dev:
 	kfree(p_rdma_info->dev);
-free_rdma_info:
-	kfree(p_rdma_info);
 
 	return rc;
 }
@@ -370,8 +380,6 @@ static void qed_rdma_resc_free(struct qed_hwfn *p_hwfn)
 
 	kfree(p_rdma_info->port);
 	kfree(p_rdma_info->dev);
-
-	kfree(p_rdma_info);
 }
 
 static void qed_rdma_free_tid(void *rdma_cxt, u32 itid)
@@ -679,8 +687,6 @@ static int qed_rdma_setup(struct qed_hwfn *p_hwfn,
 
 	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "RDMA setup\n");
 
-	spin_lock_init(&p_hwfn->p_rdma_info->lock);
-
 	qed_rdma_init_devinfo(p_hwfn, params);
 	qed_rdma_init_port(p_hwfn);
 	qed_rdma_init_events(p_hwfn, params);
@@ -727,7 +733,7 @@ static int qed_rdma_stop(void *rdma_cxt)
 	/* Disable RoCE search */
 	qed_wr(p_hwfn, p_ptt, p_hwfn->rdma_prs_search_reg, 0);
 	p_hwfn->b_rdma_enabled_in_prs = false;
-
+	p_hwfn->p_rdma_info->active = 0;
 	qed_wr(p_hwfn, p_ptt, PRS_REG_ROCE_DEST_QP_MAX_PF, 0);
 
 	ll2_ethertype_en = qed_rd(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN);
@@ -1236,7 +1242,8 @@ qed_rdma_create_qp(void *rdma_cxt,
 	u8 max_stats_queues;
 	int rc;
 
-	if (!rdma_cxt || !in_params || !out_params || !p_hwfn->p_rdma_info) {
+	if (!rdma_cxt || !in_params || !out_params ||
+	    !p_hwfn->p_rdma_info->active) {
 		DP_ERR(p_hwfn->cdev,
 		       "qed roce create qp failed due to NULL entry (rdma_cxt=%p, in=%p, out=%p, roce_info=?\n",
 		       rdma_cxt, in_params, out_params);
@@ -1802,8 +1809,8 @@ bool qed_rdma_allocated_qps(struct qed_hwfn *p_hwfn)
 {
 	bool result;
 
-	/* if rdma info has not been allocated, naturally there are no qps */
-	if (!p_hwfn->p_rdma_info)
+	/* if rdma wasn't activated yet, naturally there are no qps */
+	if (!p_hwfn->p_rdma_info->active)
 		return false;
 
 	spin_lock_bh(&p_hwfn->p_rdma_info->lock);
@@ -1849,7 +1856,7 @@ static int qed_rdma_start(void *rdma_cxt,
 	if (!p_ptt)
 		goto err;
 
-	rc = qed_rdma_alloc(p_hwfn, p_ptt, params);
+	rc = qed_rdma_alloc(p_hwfn);
 	if (rc)
 		goto err1;
 
@@ -1858,6 +1865,7 @@ static int qed_rdma_start(void *rdma_cxt,
 		goto err2;
 
 	qed_ptt_release(p_hwfn, p_ptt);
+	p_hwfn->p_rdma_info->active = 1;
 
 	return rc;
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.h b/drivers/net/ethernet/qlogic/qed/qed_rdma.h
index 6f722ee8ee945..50d609c0e108d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_rdma.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.h
@@ -102,6 +102,7 @@ struct qed_rdma_info {
 	u16 max_queue_zones;
 	enum protocol_type proto;
 	struct qed_iwarp_info iwarp;
+	u8 active:1;
 };
 
 struct qed_rdma_qp {
@@ -176,10 +177,14 @@ struct qed_rdma_qp {
 #if IS_ENABLED(CONFIG_QED_RDMA)
 void qed_rdma_dpm_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
 void qed_rdma_dpm_conf(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+int qed_rdma_info_alloc(struct qed_hwfn *p_hwfn);
+void qed_rdma_info_free(struct qed_hwfn *p_hwfn);
 #else
 static inline void qed_rdma_dpm_conf(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) {}
 static inline void qed_rdma_dpm_bar(struct qed_hwfn *p_hwfn,
 				    struct qed_ptt *p_ptt) {}
+static inline int qed_rdma_info_alloc(struct qed_hwfn *p_hwfn) {return -EINVAL}
+static inline void qed_rdma_info_free(struct qed_hwfn *p_hwfn) {}
 #endif
 
 int
-- 
GitLab


From ed4eac20dcffdad47709422e0cb925981b056668 Mon Sep 17 00:00:00 2001
From: Denis Bolotin <denis.bolotin@cavium.com>
Date: Mon, 12 Nov 2018 12:50:23 +0200
Subject: [PATCH 1049/1890] qed: Fix reading wrong value in loop condition

The value of "sb_index" is written by the hardware. Reading its value and
writing it to "index" must finish before checking the loop condition.

Signed-off-by: Denis Bolotin <denis.bolotin@cavium.com>
Signed-off-by: Michal Kalderon <michal.kalderon@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_int.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c
index 0f0aba793352c..b22f464ea3fa7 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.c
@@ -992,6 +992,8 @@ static int qed_int_attentions(struct qed_hwfn *p_hwfn)
 	 */
 	do {
 		index = p_sb_attn->sb_index;
+		/* finish reading index before the loop condition */
+		dma_rmb();
 		attn_bits = le32_to_cpu(p_sb_attn->atten_bits);
 		attn_acks = le32_to_cpu(p_sb_attn->atten_ack);
 	} while (index != p_sb_attn->sb_index);
-- 
GitLab


From 70df9ebbd82c794ddfbb49d45b337f18d5588dc2 Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicoleotsuka@gmail.com>
Date: Fri, 9 Nov 2018 16:42:14 -0800
Subject: [PATCH 1050/1890] hwmon (ina2xx) Fix NULL id pointer in probe()

When using DT configurations, the id pointer might turn out to
be NULL. Then the driver encounters NULL pointer access:

  Unable to handle kernel read from unreadable memory at vaddr 00000018
  [...]
  PC is at ina2xx_probe+0x114/0x200
  LR is at ina2xx_probe+0x10c/0x200
  [...]
  Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b

The reason is that i2c core returns the id pointer by matching
id_table with client->name, while the client->name is actually
using the name from the first string in the DT compatible list,
not the best one. So i2c core would fail to match the id_table
if the best matched compatible string isn't the first one, and
then would return a NULL id pointer.

This probably should be fixed in i2c core. But it doesn't hurt
to make the driver robust. So this patch fixes it by using the
"chip" that's added to unify both DT and non-DT configurations.

Additionally, since id pointer could be null, so as id->name:
  ina2xx 10-0047: power monitor (null) (Rshunt = 1000 uOhm)
  ina2xx 10-0048: power monitor (null) (Rshunt = 10000 uOhm)

So this patch also fixes NULL name pointer, using client->name
to play safe and to align with hwmon->name.

Fixes: bd0ddd4d0883 ("hwmon: (ina2xx) Add OF device ID table")
Signed-off-by: Nicolin Chen <nicoleotsuka@gmail.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/ina2xx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/hwmon/ina2xx.c b/drivers/hwmon/ina2xx.c
index 71d3445ba869c..c2252cf452f50 100644
--- a/drivers/hwmon/ina2xx.c
+++ b/drivers/hwmon/ina2xx.c
@@ -491,7 +491,7 @@ static int ina2xx_probe(struct i2c_client *client,
 	}
 
 	data->groups[group++] = &ina2xx_group;
-	if (id->driver_data == ina226)
+	if (chip == ina226)
 		data->groups[group++] = &ina226_group;
 
 	hwmon_dev = devm_hwmon_device_register_with_groups(dev, client->name,
@@ -500,7 +500,7 @@ static int ina2xx_probe(struct i2c_client *client,
 		return PTR_ERR(hwmon_dev);
 
 	dev_info(dev, "power monitor %s (Rshunt = %li uOhm)\n",
-		 id->name, data->rshunt);
+		 client->name, data->rshunt);
 
 	return 0;
 }
-- 
GitLab


From 35fdc3902179366489a12cae4cb3ccc3b95f0afe Mon Sep 17 00:00:00 2001
From: Stefan Wahren <stefan.wahren@i2se.com>
Date: Sun, 11 Nov 2018 13:01:11 +0100
Subject: [PATCH 1051/1890] hwmon: (raspberrypi) Fix initial notify
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In case an under-voltage happens before probing the driver wont
write the critical warning into the kernel log. So don't init
of last_throttled during probe and fix this issue.

Fixes: 74d1e007915f ("hwmon: Add support for RPi voltage sensor")
Reported-by: "Noralf TrÃ¸nnes" <noralf@tronnes.org>
Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/raspberrypi-hwmon.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/hwmon/raspberrypi-hwmon.c b/drivers/hwmon/raspberrypi-hwmon.c
index be5ba46908953..0d0457245e7d0 100644
--- a/drivers/hwmon/raspberrypi-hwmon.c
+++ b/drivers/hwmon/raspberrypi-hwmon.c
@@ -115,7 +115,6 @@ static int rpi_hwmon_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct rpi_hwmon_data *data;
-	int ret;
 
 	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
 	if (!data)
@@ -124,11 +123,6 @@ static int rpi_hwmon_probe(struct platform_device *pdev)
 	/* Parent driver assure that firmware is correct */
 	data->fw = dev_get_drvdata(dev->parent);
 
-	/* Init throttled */
-	ret = rpi_firmware_property(data->fw, RPI_FIRMWARE_GET_THROTTLED,
-				    &data->last_throttled,
-				    sizeof(data->last_throttled));
-
 	data->hwmon_dev = devm_hwmon_device_register_with_info(dev, "rpi_volt",
 							       data,
 							       &rpi_chip_info,
-- 
GitLab


From 16a8ee4c80b45984b6de1f90a49edcf336b7c621 Mon Sep 17 00:00:00 2001
From: Katsuhiro Suzuki <katsuhiro@katsuster.net>
Date: Sun, 11 Nov 2018 00:18:44 +0900
Subject: [PATCH 1052/1890] ASoC: rockchip: add missing slave_config setting
 for I2S

This patch adds missing prepare_sleve_config that is needed for
setup the DMA slave channel for I2S.

Signed-off-by: Katsuhiro Suzuki <katsuhiro@katsuster.net>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/rockchip/rockchip_pcm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/soc/rockchip/rockchip_pcm.c b/sound/soc/rockchip/rockchip_pcm.c
index 9e7b5fa4cf596..4ac78d7a4b2da 100644
--- a/sound/soc/rockchip/rockchip_pcm.c
+++ b/sound/soc/rockchip/rockchip_pcm.c
@@ -33,6 +33,7 @@ static const struct snd_pcm_hardware snd_rockchip_hardware = {
 
 static const struct snd_dmaengine_pcm_config rk_dmaengine_pcm_config = {
 	.pcm_hardware = &snd_rockchip_hardware,
+	.prepare_slave_config = snd_dmaengine_pcm_prepare_slave_config,
 	.prealloc_buffer_size = 32 * 1024,
 };
 
-- 
GitLab


From 20e00db2f59bdddf8a8e241473ef8be94631d3ae Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.cirrus.com>
Date: Mon, 12 Nov 2018 13:36:38 +0000
Subject: [PATCH 1053/1890] ASoC: wm_adsp: Fix dma-unsafe read of scratch
 registers

Stack memory isn't DMA-safe so it isn't safe to use either
regmap_raw_read or regmap_bulk_read to read into stack memory.

The two functions to read the scratch registers were using
stack memory and regmap_raw_read. It's not worth allocating
memory just for this trivial read, and it isn't time-critical.
A simple regmap_read for each register is sufficient.

Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/wm_adsp.c | 37 ++++++++++++++++++++-----------------
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c
index a53dc174bbf07..66501b8dc46fb 100644
--- a/sound/soc/codecs/wm_adsp.c
+++ b/sound/soc/codecs/wm_adsp.c
@@ -765,38 +765,41 @@ static unsigned int wm_adsp_region_to_reg(struct wm_adsp_region const *mem,
 
 static void wm_adsp2_show_fw_status(struct wm_adsp *dsp)
 {
-	u16 scratch[4];
+	unsigned int scratch[4];
+	unsigned int addr = dsp->base + ADSP2_SCRATCH0;
+	unsigned int i;
 	int ret;
 
-	ret = regmap_raw_read(dsp->regmap, dsp->base + ADSP2_SCRATCH0,
-				scratch, sizeof(scratch));
-	if (ret) {
-		adsp_err(dsp, "Failed to read SCRATCH regs: %d\n", ret);
-		return;
+	for (i = 0; i < ARRAY_SIZE(scratch); ++i) {
+		ret = regmap_read(dsp->regmap, addr + i, &scratch[i]);
+		if (ret) {
+			adsp_err(dsp, "Failed to read SCRATCH%u: %d\n", i, ret);
+			return;
+		}
 	}
 
 	adsp_dbg(dsp, "FW SCRATCH 0:0x%x 1:0x%x 2:0x%x 3:0x%x\n",
-		 be16_to_cpu(scratch[0]),
-		 be16_to_cpu(scratch[1]),
-		 be16_to_cpu(scratch[2]),
-		 be16_to_cpu(scratch[3]));
+		 scratch[0], scratch[1], scratch[2], scratch[3]);
 }
 
 static void wm_adsp2v2_show_fw_status(struct wm_adsp *dsp)
 {
-	u32 scratch[2];
+	unsigned int scratch[2];
 	int ret;
 
-	ret = regmap_raw_read(dsp->regmap, dsp->base + ADSP2V2_SCRATCH0_1,
-			      scratch, sizeof(scratch));
-
+	ret = regmap_read(dsp->regmap, dsp->base + ADSP2V2_SCRATCH0_1,
+			  &scratch[0]);
 	if (ret) {
-		adsp_err(dsp, "Failed to read SCRATCH regs: %d\n", ret);
+		adsp_err(dsp, "Failed to read SCRATCH0_1: %d\n", ret);
 		return;
 	}
 
-	scratch[0] = be32_to_cpu(scratch[0]);
-	scratch[1] = be32_to_cpu(scratch[1]);
+	ret = regmap_read(dsp->regmap, dsp->base + ADSP2V2_SCRATCH2_3,
+			  &scratch[1]);
+	if (ret) {
+		adsp_err(dsp, "Failed to read SCRATCH2_3: %d\n", ret);
+		return;
+	}
 
 	adsp_dbg(dsp, "FW SCRATCH 0:0x%x 1:0x%x 2:0x%x 3:0x%x\n",
 		 scratch[0] & 0xFFFF,
-- 
GitLab


From 4c10473d6ddf12ec124c9ff71a5d23bb5388478b Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Fri, 9 Nov 2018 13:39:23 -0600
Subject: [PATCH 1054/1890] ASoC: Intel: Power down links before turning off
 display audio power

On certain platforms, Display HDMI HDA codec was not going to sleep state
after the use when links are powered down after turning off the display
power. As per the HW recommendation, links are powered down before turning
off the display power to ensure that the codec goes to sleep state.

This patch was updated from an earlier version submitted upstream [1]
which conflicted with the changes merged for HDaudio codec support
with the Intel DSP.

[1] https://patchwork.kernel.org/patch/10540213/

Signed-off-by: Sriram Periyasamy <sriramx.periyasamy@intel.com>
Signed-off-by: Sanyog Kale <sanyog.r.kale@intel.com>
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/hdac_hdmi.c  | 11 +++++------
 sound/soc/intel/skylake/skl.c | 12 ++++++------
 2 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c
index 4e9854889a957..e63d6e33df487 100644
--- a/sound/soc/codecs/hdac_hdmi.c
+++ b/sound/soc/codecs/hdac_hdmi.c
@@ -2187,11 +2187,6 @@ static int hdac_hdmi_runtime_suspend(struct device *dev)
 	 */
 	snd_hdac_codec_read(hdev, hdev->afg, 0,	AC_VERB_SET_POWER_STATE,
 							AC_PWRST_D3);
-	err = snd_hdac_display_power(bus, false);
-	if (err < 0) {
-		dev_err(dev, "Cannot turn on display power on i915\n");
-		return err;
-	}
 
 	hlink = snd_hdac_ext_bus_get_link(bus, dev_name(dev));
 	if (!hlink) {
@@ -2201,7 +2196,11 @@ static int hdac_hdmi_runtime_suspend(struct device *dev)
 
 	snd_hdac_ext_bus_link_put(bus, hlink);
 
-	return 0;
+	err = snd_hdac_display_power(bus, false);
+	if (err < 0)
+		dev_err(dev, "Cannot turn off display power on i915\n");
+
+	return err;
 }
 
 static int hdac_hdmi_runtime_resume(struct device *dev)
diff --git a/sound/soc/intel/skylake/skl.c b/sound/soc/intel/skylake/skl.c
index 29225623b4b40..1586c97d94508 100644
--- a/sound/soc/intel/skylake/skl.c
+++ b/sound/soc/intel/skylake/skl.c
@@ -815,6 +815,12 @@ static void skl_probe_work(struct work_struct *work)
 		}
 	}
 
+	/*
+	 * we are done probing so decrement link counts
+	 */
+	list_for_each_entry(hlink, &bus->hlink_list, list)
+		snd_hdac_ext_bus_link_put(bus, hlink);
+
 	if (IS_ENABLED(CONFIG_SND_SOC_HDAC_HDMI)) {
 		err = snd_hdac_display_power(bus, false);
 		if (err < 0) {
@@ -824,12 +830,6 @@ static void skl_probe_work(struct work_struct *work)
 		}
 	}
 
-	/*
-	 * we are done probing so decrement link counts
-	 */
-	list_for_each_entry(hlink, &bus->hlink_list, list)
-		snd_hdac_ext_bus_link_put(bus, hlink);
-
 	/* configure PM */
 	pm_runtime_put_noidle(bus->dev);
 	pm_runtime_allow(bus->dev);
-- 
GitLab


From cd5e6d79384eb2dd47de6cb9569150053d4a6803 Mon Sep 17 00:00:00 2001
From: "Tudor.Ambarus@microchip.com" <Tudor.Ambarus@microchip.com>
Date: Fri, 9 Nov 2018 16:56:48 +0000
Subject: [PATCH 1055/1890] mtd: spi-nor: don't drop sfdp data if optional
 parsers fail

JESD216C states that just the Basic Flash Parameter Table is mandatory.
Already defined (or future) additional parameter headers and tables are
optional.

Don't drop already collected sfdp data in case an optional table
parser fails. In case of failing, each optional parser is responsible
to roll back to the previously known spi_nor data.

Fixes: b038e8e3be72 ("mtd: spi-nor: parse SFDP Sector Map Parameter Table")
Reported-by: Yogesh Gaur <yogeshnarayan.gaur@nxp.com>
Suggested-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Tested-by: Yogesh Gaur <yogeshnarayan.gaur@nxp.com>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
---
 drivers/mtd/spi-nor/spi-nor.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 3e54e31889c7b..a3ab318406ec7 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -3125,7 +3125,7 @@ static int spi_nor_parse_sfdp(struct spi_nor *nor,
 	if (err)
 		goto exit;
 
-	/* Parse other parameter headers. */
+	/* Parse optional parameter tables. */
 	for (i = 0; i < header.nph; i++) {
 		param_header = &param_headers[i];
 
@@ -3138,8 +3138,17 @@ static int spi_nor_parse_sfdp(struct spi_nor *nor,
 			break;
 		}
 
-		if (err)
-			goto exit;
+		if (err) {
+			dev_warn(dev, "Failed to parse optional parameter table: %04x\n",
+				 SFDP_PARAM_HEADER_ID(param_header));
+			/*
+			 * Let's not drop all information we extracted so far
+			 * if optional table parsers fail. In case of failing,
+			 * each optional parser is responsible to roll back to
+			 * the previously known spi_nor data.
+			 */
+			err = 0;
+		}
 	}
 
 exit:
-- 
GitLab


From c797bd81d10e648ec8c84c041191527113e63e85 Mon Sep 17 00:00:00 2001
From: "Tudor.Ambarus@microchip.com" <Tudor.Ambarus@microchip.com>
Date: Fri, 9 Nov 2018 16:56:50 +0000
Subject: [PATCH 1056/1890] mtd: spi-nor: fix iteration over smpt array

Iterate over smpt array using its starting address and length
instead of the blind iterations that used data found in the array.

This prevents possible memory accesses outside of the smpt array
boundaries in case software, or manufacturers, misrepresent smpt
array fields.

Fixes: b038e8e3be72 ("mtd: spi-nor: parse SFDP Sector Map Parameter Table")
Suggested-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
---
 drivers/mtd/spi-nor/spi-nor.c | 40 ++++++++++++++++++++++++++---------
 1 file changed, 30 insertions(+), 10 deletions(-)

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index a3ab318406ec7..fc5cbc42b8c1e 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -2855,12 +2855,15 @@ static u8 spi_nor_smpt_read_dummy(const struct spi_nor *nor, const u32 settings)
  * spi_nor_get_map_in_use() - get the configuration map in use
  * @nor:	pointer to a 'struct spi_nor'
  * @smpt:	pointer to the sector map parameter table
+ * @smpt_len:	sector map parameter table length
  */
-static const u32 *spi_nor_get_map_in_use(struct spi_nor *nor, const u32 *smpt)
+static const u32 *spi_nor_get_map_in_use(struct spi_nor *nor, const u32 *smpt,
+					 u8 smpt_len)
 {
 	const u32 *ret = NULL;
-	u32 i, addr;
+	u32 addr;
 	int err;
+	u8 i;
 	u8 addr_width, read_opcode, read_dummy;
 	u8 read_data_mask, data_byte, map_id;
 
@@ -2869,9 +2872,11 @@ static const u32 *spi_nor_get_map_in_use(struct spi_nor *nor, const u32 *smpt)
 	read_opcode = nor->read_opcode;
 
 	map_id = 0;
-	i = 0;
 	/* Determine if there are any optional Detection Command Descriptors */
-	while (!(smpt[i] & SMPT_DESC_TYPE_MAP)) {
+	for (i = 0; i < smpt_len; i += 2) {
+		if (smpt[i] & SMPT_DESC_TYPE_MAP)
+			break;
+
 		read_data_mask = SMPT_CMD_READ_DATA(smpt[i]);
 		nor->addr_width = spi_nor_smpt_addr_width(nor, smpt[i]);
 		nor->read_dummy = spi_nor_smpt_read_dummy(nor, smpt[i]);
@@ -2887,18 +2892,33 @@ static const u32 *spi_nor_get_map_in_use(struct spi_nor *nor, const u32 *smpt)
 		 * Configuration that is currently in use.
 		 */
 		map_id = map_id << 1 | !!(data_byte & read_data_mask);
-		i = i + 2;
 	}
 
-	/* Find the matching configuration map */
-	while (SMPT_MAP_ID(smpt[i]) != map_id) {
+	/*
+	 * If command descriptors are provided, they always precede map
+	 * descriptors in the table. There is no need to start the iteration
+	 * over smpt array all over again.
+	 *
+	 * Find the matching configuration map.
+	 */
+	while (i < smpt_len) {
+		if (SMPT_MAP_ID(smpt[i]) == map_id) {
+			ret = smpt + i;
+			break;
+		}
+
+		/*
+		 * If there are no more configuration map descriptors and no
+		 * configuration ID matched the configuration identifier, the
+		 * sector address map is unknown.
+		 */
 		if (smpt[i] & SMPT_DESC_END)
-			goto out;
+			break;
+
 		/* increment the table index to the next map */
 		i += SMPT_MAP_REGION_COUNT(smpt[i]) + 1;
 	}
 
-	ret = smpt + i;
 	/* fall through */
 out:
 	nor->addr_width = addr_width;
@@ -3020,7 +3040,7 @@ static int spi_nor_parse_smpt(struct spi_nor *nor,
 	for (i = 0; i < smpt_header->length; i++)
 		smpt[i] = le32_to_cpu(smpt[i]);
 
-	sector_map = spi_nor_get_map_in_use(nor, smpt);
+	sector_map = spi_nor_get_map_in_use(nor, smpt, smpt_header->length);
 	if (!sector_map) {
 		ret = -EINVAL;
 		goto out;
-- 
GitLab


From b9f07cc8207a2a69496beec3f5a5a8372bacdfdc Mon Sep 17 00:00:00 2001
From: "Tudor.Ambarus@microchip.com" <Tudor.Ambarus@microchip.com>
Date: Fri, 9 Nov 2018 16:56:52 +0000
Subject: [PATCH 1057/1890] mtd: spi-nor: don't overwrite errno in
 spi_nor_get_map_in_use()

Don't overwrite the errno from spi_nor_read_raw().

Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
---
 drivers/mtd/spi-nor/spi-nor.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index fc5cbc42b8c1e..7f03be9b54bcf 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -2856,11 +2856,13 @@ static u8 spi_nor_smpt_read_dummy(const struct spi_nor *nor, const u32 settings)
  * @nor:	pointer to a 'struct spi_nor'
  * @smpt:	pointer to the sector map parameter table
  * @smpt_len:	sector map parameter table length
+ *
+ * Return: pointer to the map in use, ERR_PTR(-errno) otherwise.
  */
 static const u32 *spi_nor_get_map_in_use(struct spi_nor *nor, const u32 *smpt,
 					 u8 smpt_len)
 {
-	const u32 *ret = NULL;
+	const u32 *ret;
 	u32 addr;
 	int err;
 	u8 i;
@@ -2884,8 +2886,10 @@ static const u32 *spi_nor_get_map_in_use(struct spi_nor *nor, const u32 *smpt,
 		addr = smpt[i + 1];
 
 		err = spi_nor_read_raw(nor, addr, 1, &data_byte);
-		if (err)
+		if (err) {
+			ret = ERR_PTR(err);
 			goto out;
+		}
 
 		/*
 		 * Build an index value that is used to select the Sector Map
@@ -2901,6 +2905,7 @@ static const u32 *spi_nor_get_map_in_use(struct spi_nor *nor, const u32 *smpt,
 	 *
 	 * Find the matching configuration map.
 	 */
+	ret = ERR_PTR(-EINVAL);
 	while (i < smpt_len) {
 		if (SMPT_MAP_ID(smpt[i]) == map_id) {
 			ret = smpt + i;
@@ -3041,8 +3046,8 @@ static int spi_nor_parse_smpt(struct spi_nor *nor,
 		smpt[i] = le32_to_cpu(smpt[i]);
 
 	sector_map = spi_nor_get_map_in_use(nor, smpt, smpt_header->length);
-	if (!sector_map) {
-		ret = -EINVAL;
+	if (IS_ERR(sector_map)) {
+		ret = PTR_ERR(sector_map);
 		goto out;
 	}
 
-- 
GitLab


From 1d5ceff25aa1edcaf84e7ee26fdcc746cb245af8 Mon Sep 17 00:00:00 2001
From: "Tudor.Ambarus@microchip.com" <Tudor.Ambarus@microchip.com>
Date: Fri, 9 Nov 2018 16:56:54 +0000
Subject: [PATCH 1058/1890] mtd: spi_nor: pass DMA-able buffer to
 spi_nor_read_raw()

spi_nor_read_raw() calls nor->read() which might be implemented
by the m25p80 driver. m25p80 uses the spi-mem layer which requires
DMA-able in/out buffers. Pass kmalloc'ed dma buffer to spi_nor_read_raw().

Fixes: b038e8e3be72 ("mtd: spi-nor: parse SFDP Sector Map Parameter Table")
Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
---
 drivers/mtd/spi-nor/spi-nor.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 7f03be9b54bcf..eb7bb596416b0 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -2156,7 +2156,7 @@ spi_nor_set_pp_settings(struct spi_nor_pp_command *pp,
  * @nor:	pointer to a 'struct spi_nor'
  * @addr:	offset in the serial flash memory
  * @len:	number of bytes to read
- * @buf:	buffer where the data is copied into
+ * @buf:	buffer where the data is copied into (dma-safe memory)
  *
  * Return: 0 on success, -errno otherwise.
  */
@@ -2863,11 +2863,17 @@ static const u32 *spi_nor_get_map_in_use(struct spi_nor *nor, const u32 *smpt,
 					 u8 smpt_len)
 {
 	const u32 *ret;
+	u8 *buf;
 	u32 addr;
 	int err;
 	u8 i;
 	u8 addr_width, read_opcode, read_dummy;
-	u8 read_data_mask, data_byte, map_id;
+	u8 read_data_mask, map_id;
+
+	/* Use a kmalloc'ed bounce buffer to guarantee it is DMA-able. */
+	buf = kmalloc(sizeof(*buf), GFP_KERNEL);
+	if (!buf)
+		return ERR_PTR(-ENOMEM);
 
 	addr_width = nor->addr_width;
 	read_dummy = nor->read_dummy;
@@ -2885,7 +2891,7 @@ static const u32 *spi_nor_get_map_in_use(struct spi_nor *nor, const u32 *smpt,
 		nor->read_opcode = SMPT_CMD_OPCODE(smpt[i]);
 		addr = smpt[i + 1];
 
-		err = spi_nor_read_raw(nor, addr, 1, &data_byte);
+		err = spi_nor_read_raw(nor, addr, 1, buf);
 		if (err) {
 			ret = ERR_PTR(err);
 			goto out;
@@ -2895,7 +2901,7 @@ static const u32 *spi_nor_get_map_in_use(struct spi_nor *nor, const u32 *smpt,
 		 * Build an index value that is used to select the Sector Map
 		 * Configuration that is currently in use.
 		 */
-		map_id = map_id << 1 | !!(data_byte & read_data_mask);
+		map_id = map_id << 1 | !!(*buf & read_data_mask);
 	}
 
 	/*
@@ -2926,6 +2932,7 @@ static const u32 *spi_nor_get_map_in_use(struct spi_nor *nor, const u32 *smpt,
 
 	/* fall through */
 out:
+	kfree(buf);
 	nor->addr_width = addr_width;
 	nor->read_dummy = read_dummy;
 	nor->read_opcode = read_opcode;
-- 
GitLab


From dded2e159208a9edc21dd5c5f583afa28d378d39 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Thu, 27 Sep 2018 17:17:49 +0000
Subject: [PATCH 1059/1890] kdb: use correct pointer when 'btc' calls 'btt'

On a powerpc 8xx, 'btc' fails as follows:

Entering kdb (current=0x(ptrval), pid 282) due to Keyboard Entry
kdb> btc
btc: cpu status: Currently on cpu 0
Available cpus: 0
kdb_getarea: Bad address 0x0

when booting the kernel with 'debug_boot_weak_hash', it fails as well

Entering kdb (current=0xba99ad80, pid 284) due to Keyboard Entry
kdb> btc
btc: cpu status: Currently on cpu 0
Available cpus: 0
kdb_getarea: Bad address 0xba99ad80

On other platforms, Oopses have been observed too, see
https://github.com/linuxppc/linux/issues/139

This is due to btc calling 'btt' with %p pointer as an argument.

This patch replaces %p by %px to get the real pointer value as
expected by 'btt'

Fixes: ad67b74d2469 ("printk: hash addresses printed with %p")
Cc: <stable@vger.kernel.org>
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Reviewed-by: Daniel Thompson <daniel.thompson@linaro.org>
Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
---
 kernel/debug/kdb/kdb_bt.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c
index 6ad4a9fcbd6f7..7921ae4fca8de 100644
--- a/kernel/debug/kdb/kdb_bt.c
+++ b/kernel/debug/kdb/kdb_bt.c
@@ -179,14 +179,14 @@ kdb_bt(int argc, const char **argv)
 				kdb_printf("no process for cpu %ld\n", cpu);
 				return 0;
 			}
-			sprintf(buf, "btt 0x%p\n", KDB_TSK(cpu));
+			sprintf(buf, "btt 0x%px\n", KDB_TSK(cpu));
 			kdb_parse(buf);
 			return 0;
 		}
 		kdb_printf("btc: cpu status: ");
 		kdb_parse("cpu\n");
 		for_each_online_cpu(cpu) {
-			sprintf(buf, "btt 0x%p\n", KDB_TSK(cpu));
+			sprintf(buf, "btt 0x%px\n", KDB_TSK(cpu));
 			kdb_parse(buf);
 			touch_nmi_watchdog();
 		}
-- 
GitLab


From 568fb6f42ac6851320adaea25f8f1b94de14e40a Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@c-s.fr>
Date: Thu, 27 Sep 2018 17:17:57 +0000
Subject: [PATCH 1060/1890] kdb: print real address of pointers instead of
 hashed addresses

Since commit ad67b74d2469 ("printk: hash addresses printed with %p"),
all pointers printed with %p are printed with hashed addresses
instead of real addresses in order to avoid leaking addresses in
dmesg and syslog. But this applies to kdb too, with is unfortunate:

    Entering kdb (current=0x(ptrval), pid 329) due to Keyboard Entry
    kdb> ps
    15 sleeping system daemon (state M) processes suppressed,
    use 'ps A' to see all.
    Task Addr       Pid   Parent [*] cpu State Thread     Command
    0x(ptrval)      329      328  1    0   R  0x(ptrval) *sh

    0x(ptrval)        1        0  0    0   S  0x(ptrval)  init
    0x(ptrval)        3        2  0    0   D  0x(ptrval)  rcu_gp
    0x(ptrval)        4        2  0    0   D  0x(ptrval)  rcu_par_gp
    0x(ptrval)        5        2  0    0   D  0x(ptrval)  kworker/0:0
    0x(ptrval)        6        2  0    0   D  0x(ptrval)  kworker/0:0H
    0x(ptrval)        7        2  0    0   D  0x(ptrval)  kworker/u2:0
    0x(ptrval)        8        2  0    0   D  0x(ptrval)  mm_percpu_wq
    0x(ptrval)       10        2  0    0   D  0x(ptrval)  rcu_preempt

The whole purpose of kdb is to debug, and for debugging real addresses
need to be known. In addition, data displayed by kdb doesn't go into
dmesg.

This patch replaces all %p by %px in kdb in order to display real
addresses.

Fixes: ad67b74d2469 ("printk: hash addresses printed with %p")
Cc: <stable@vger.kernel.org>
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
---
 kernel/debug/kdb/kdb_main.c    | 14 +++++++-------
 kernel/debug/kdb/kdb_support.c | 12 ++++++------
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index bb4fe4e1a601b..959242084b404 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -1192,7 +1192,7 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs,
 	if (reason == KDB_REASON_DEBUG) {
 		/* special case below */
 	} else {
-		kdb_printf("\nEntering kdb (current=0x%p, pid %d) ",
+		kdb_printf("\nEntering kdb (current=0x%px, pid %d) ",
 			   kdb_current, kdb_current ? kdb_current->pid : 0);
 #if defined(CONFIG_SMP)
 		kdb_printf("on processor %d ", raw_smp_processor_id());
@@ -1208,7 +1208,7 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs,
 		 */
 		switch (db_result) {
 		case KDB_DB_BPT:
-			kdb_printf("\nEntering kdb (0x%p, pid %d) ",
+			kdb_printf("\nEntering kdb (0x%px, pid %d) ",
 				   kdb_current, kdb_current->pid);
 #if defined(CONFIG_SMP)
 			kdb_printf("on processor %d ", raw_smp_processor_id());
@@ -2048,7 +2048,7 @@ static int kdb_lsmod(int argc, const char **argv)
 		if (mod->state == MODULE_STATE_UNFORMED)
 			continue;
 
-		kdb_printf("%-20s%8u  0x%p ", mod->name,
+		kdb_printf("%-20s%8u  0x%px ", mod->name,
 			   mod->core_layout.size, (void *)mod);
 #ifdef CONFIG_MODULE_UNLOAD
 		kdb_printf("%4d ", module_refcount(mod));
@@ -2059,7 +2059,7 @@ static int kdb_lsmod(int argc, const char **argv)
 			kdb_printf(" (Loading)");
 		else
 			kdb_printf(" (Live)");
-		kdb_printf(" 0x%p", mod->core_layout.base);
+		kdb_printf(" 0x%px", mod->core_layout.base);
 
 #ifdef CONFIG_MODULE_UNLOAD
 		{
@@ -2341,7 +2341,7 @@ void kdb_ps1(const struct task_struct *p)
 		return;
 
 	cpu = kdb_process_cpu(p);
-	kdb_printf("0x%p %8d %8d  %d %4d   %c  0x%p %c%s\n",
+	kdb_printf("0x%px %8d %8d  %d %4d   %c  0x%px %c%s\n",
 		   (void *)p, p->pid, p->parent->pid,
 		   kdb_task_has_cpu(p), kdb_process_cpu(p),
 		   kdb_task_state_char(p),
@@ -2354,7 +2354,7 @@ void kdb_ps1(const struct task_struct *p)
 		} else {
 			if (KDB_TSK(cpu) != p)
 				kdb_printf("  Error: does not match running "
-				   "process table (0x%p)\n", KDB_TSK(cpu));
+				   "process table (0x%px)\n", KDB_TSK(cpu));
 		}
 	}
 }
@@ -2687,7 +2687,7 @@ int kdb_register_flags(char *cmd,
 	for_each_kdbcmd(kp, i) {
 		if (kp->cmd_name && (strcmp(kp->cmd_name, cmd) == 0)) {
 			kdb_printf("Duplicate kdb command registered: "
-				"%s, func %p help %s\n", cmd, func, help);
+				"%s, func %px help %s\n", cmd, func, help);
 			return 1;
 		}
 	}
diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c
index 990b3cc526c80..987eb73284d2d 100644
--- a/kernel/debug/kdb/kdb_support.c
+++ b/kernel/debug/kdb/kdb_support.c
@@ -40,7 +40,7 @@
 int kdbgetsymval(const char *symname, kdb_symtab_t *symtab)
 {
 	if (KDB_DEBUG(AR))
-		kdb_printf("kdbgetsymval: symname=%s, symtab=%p\n", symname,
+		kdb_printf("kdbgetsymval: symname=%s, symtab=%px\n", symname,
 			   symtab);
 	memset(symtab, 0, sizeof(*symtab));
 	symtab->sym_start = kallsyms_lookup_name(symname);
@@ -88,7 +88,7 @@ int kdbnearsym(unsigned long addr, kdb_symtab_t *symtab)
 	char *knt1 = NULL;
 
 	if (KDB_DEBUG(AR))
-		kdb_printf("kdbnearsym: addr=0x%lx, symtab=%p\n", addr, symtab);
+		kdb_printf("kdbnearsym: addr=0x%lx, symtab=%px\n", addr, symtab);
 	memset(symtab, 0, sizeof(*symtab));
 
 	if (addr < 4096)
@@ -149,7 +149,7 @@ int kdbnearsym(unsigned long addr, kdb_symtab_t *symtab)
 		symtab->mod_name = "kernel";
 	if (KDB_DEBUG(AR))
 		kdb_printf("kdbnearsym: returns %d symtab->sym_start=0x%lx, "
-		   "symtab->mod_name=%p, symtab->sym_name=%p (%s)\n", ret,
+		   "symtab->mod_name=%px, symtab->sym_name=%px (%s)\n", ret,
 		   symtab->sym_start, symtab->mod_name, symtab->sym_name,
 		   symtab->sym_name);
 
@@ -887,13 +887,13 @@ void debug_kusage(void)
 		   __func__, dah_first);
 	if (dah_first) {
 		h_used = (struct debug_alloc_header *)debug_alloc_pool;
-		kdb_printf("%s: h_used %p size %d\n", __func__, h_used,
+		kdb_printf("%s: h_used %px size %d\n", __func__, h_used,
 			   h_used->size);
 	}
 	do {
 		h_used = (struct debug_alloc_header *)
 			  ((char *)h_free + dah_overhead + h_free->size);
-		kdb_printf("%s: h_used %p size %d caller %p\n",
+		kdb_printf("%s: h_used %px size %d caller %px\n",
 			   __func__, h_used, h_used->size, h_used->caller);
 		h_free = (struct debug_alloc_header *)
 			  (debug_alloc_pool + h_free->next);
@@ -902,7 +902,7 @@ void debug_kusage(void)
 		  ((char *)h_free + dah_overhead + h_free->size);
 	if ((char *)h_used - debug_alloc_pool !=
 	    sizeof(debug_alloc_pool_aligned))
-		kdb_printf("%s: h_used %p size %d caller %p\n",
+		kdb_printf("%s: h_used %px size %d caller %px\n",
 			   __func__, h_used, h_used->size, h_used->caller);
 out:
 	spin_unlock(&dap_lock);
-- 
GitLab


From c2b94c72d93d0929f48157eef128c4f9d2e603ce Mon Sep 17 00:00:00 2001
From: Prarit Bhargava <prarit@redhat.com>
Date: Thu, 20 Sep 2018 08:59:14 -0400
Subject: [PATCH 1061/1890] kdb: Use strscpy with destination buffer size
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

gcc 8.1.0 warns with:

kernel/debug/kdb/kdb_support.c: In function â€˜kallsyms_symbol_nextâ€™:
kernel/debug/kdb/kdb_support.c:239:4: warning: â€˜strncpyâ€™ specified bound depends on the length of the source argument [-Wstringop-overflow=]
     strncpy(prefix_name, name, strlen(name)+1);
     ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
kernel/debug/kdb/kdb_support.c:239:31: note: length computed here

Use strscpy() with the destination buffer size, and use ellipses when
displaying truncated symbols.

v2: Use strscpy()

Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Cc: Jonathan Toppins <jtoppins@redhat.com>
Cc: Jason Wessel <jason.wessel@windriver.com>
Cc: Daniel Thompson <daniel.thompson@linaro.org>
Cc: kgdb-bugreport@lists.sourceforge.net
Reviewed-by: Daniel Thompson <daniel.thompson@linaro.org>
Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
---
 kernel/debug/kdb/kdb_io.c      | 15 +++++++++------
 kernel/debug/kdb/kdb_private.h |  2 +-
 kernel/debug/kdb/kdb_support.c | 10 +++++-----
 3 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index ed5d34925ad06..6a4b41484afe6 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -216,7 +216,7 @@ static char *kdb_read(char *buffer, size_t bufsize)
 	int count;
 	int i;
 	int diag, dtab_count;
-	int key;
+	int key, buf_size, ret;
 
 
 	diag = kdbgetintenv("DTABCOUNT", &dtab_count);
@@ -336,9 +336,8 @@ static char *kdb_read(char *buffer, size_t bufsize)
 		else
 			p_tmp = tmpbuffer;
 		len = strlen(p_tmp);
-		count = kallsyms_symbol_complete(p_tmp,
-						 sizeof(tmpbuffer) -
-						 (p_tmp - tmpbuffer));
+		buf_size = sizeof(tmpbuffer) - (p_tmp - tmpbuffer);
+		count = kallsyms_symbol_complete(p_tmp, buf_size);
 		if (tab == 2 && count > 0) {
 			kdb_printf("\n%d symbols are found.", count);
 			if (count > dtab_count) {
@@ -350,9 +349,13 @@ static char *kdb_read(char *buffer, size_t bufsize)
 			}
 			kdb_printf("\n");
 			for (i = 0; i < count; i++) {
-				if (WARN_ON(!kallsyms_symbol_next(p_tmp, i)))
+				ret = kallsyms_symbol_next(p_tmp, i, buf_size);
+				if (WARN_ON(!ret))
 					break;
-				kdb_printf("%s ", p_tmp);
+				if (ret != -E2BIG)
+					kdb_printf("%s ", p_tmp);
+				else
+					kdb_printf("%s... ", p_tmp);
 				*(p_tmp + len) = '\0';
 			}
 			if (i >= dtab_count)
diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h
index 1e5a502ba4a7b..2118d8258b7c9 100644
--- a/kernel/debug/kdb/kdb_private.h
+++ b/kernel/debug/kdb/kdb_private.h
@@ -83,7 +83,7 @@ typedef struct __ksymtab {
 		unsigned long sym_start;
 		unsigned long sym_end;
 		} kdb_symtab_t;
-extern int kallsyms_symbol_next(char *prefix_name, int flag);
+extern int kallsyms_symbol_next(char *prefix_name, int flag, int buf_size);
 extern int kallsyms_symbol_complete(char *prefix_name, int max_len);
 
 /* Exported Symbols for kernel loadable modules to use. */
diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c
index 987eb73284d2d..b14b0925c1848 100644
--- a/kernel/debug/kdb/kdb_support.c
+++ b/kernel/debug/kdb/kdb_support.c
@@ -221,11 +221,13 @@ int kallsyms_symbol_complete(char *prefix_name, int max_len)
  * Parameters:
  *	prefix_name	prefix of a symbol name to lookup
  *	flag	0 means search from the head, 1 means continue search.
+ *	buf_size	maximum length that can be written to prefix_name
+ *			buffer
  * Returns:
  *	1 if a symbol matches the given prefix.
  *	0 if no string found
  */
-int kallsyms_symbol_next(char *prefix_name, int flag)
+int kallsyms_symbol_next(char *prefix_name, int flag, int buf_size)
 {
 	int prefix_len = strlen(prefix_name);
 	static loff_t pos;
@@ -235,10 +237,8 @@ int kallsyms_symbol_next(char *prefix_name, int flag)
 		pos = 0;
 
 	while ((name = kdb_walk_kallsyms(&pos))) {
-		if (strncmp(name, prefix_name, prefix_len) == 0) {
-			strncpy(prefix_name, name, strlen(name)+1);
-			return 1;
-		}
+		if (!strncmp(name, prefix_name, prefix_len))
+			return strscpy(prefix_name, name, buf_size);
 	}
 	return 0;
 }
-- 
GitLab


From 9eb62f0e1bc70ebc9b15837a0c4e8f12a7b910cb Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Thu, 16 Aug 2018 09:01:41 -0500
Subject: [PATCH 1062/1890] kdb: kdb_main: refactor code in kdb_md_line

Replace the whole switch statement with a for loop.  This makes the
code clearer and easy to read.

This also addresses the following Coverity warnings:

Addresses-Coverity-ID: 115090 ("Missing break in switch")
Addresses-Coverity-ID: 115091 ("Missing break in switch")
Addresses-Coverity-ID: 114700 ("Missing break in switch")

Suggested-by: Daniel Thompson <daniel.thompson@linaro.org>
Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Reviewed-by: Daniel Thompson <daniel.thompson@linaro.org>
[daniel.thompson@linaro.org: Tiny grammar change in description]
Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
---
 kernel/debug/kdb/kdb_main.c | 21 +++------------------
 1 file changed, 3 insertions(+), 18 deletions(-)

diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 959242084b404..d72b32c66f7dd 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -1493,6 +1493,7 @@ static void kdb_md_line(const char *fmtstr, unsigned long addr,
 	char cbuf[32];
 	char *c = cbuf;
 	int i;
+	int j;
 	unsigned long word;
 
 	memset(cbuf, '\0', sizeof(cbuf));
@@ -1538,25 +1539,9 @@ static void kdb_md_line(const char *fmtstr, unsigned long addr,
 			wc.word = word;
 #define printable_char(c) \
 	({unsigned char __c = c; isascii(__c) && isprint(__c) ? __c : '.'; })
-			switch (bytesperword) {
-			case 8:
+			for (j = 0; j < bytesperword; j++)
 				*c++ = printable_char(*cp++);
-				*c++ = printable_char(*cp++);
-				*c++ = printable_char(*cp++);
-				*c++ = printable_char(*cp++);
-				addr += 4;
-			case 4:
-				*c++ = printable_char(*cp++);
-				*c++ = printable_char(*cp++);
-				addr += 2;
-			case 2:
-				*c++ = printable_char(*cp++);
-				addr++;
-			case 1:
-				*c++ = printable_char(*cp++);
-				addr++;
-				break;
-			}
+			addr += bytesperword;
 #undef printable_char
 		}
 	}
-- 
GitLab


From 01cb37351bafc1b44b962842926210115e231f0a Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Sat, 4 Aug 2018 23:18:25 -0500
Subject: [PATCH 1063/1890] kdb: kdb_keyboard: mark expected switch
 fall-throughs

In preparation to enabling -Wimplicit-fallthrough, mark switch cases
where we are expecting to fall through.

Notice that in this particular case, I replaced the code comments with
a proper "fall through" annotation, which is what GCC is expecting
to find.

Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Reviewed-by: Daniel Thompson <daniel.thompson@linaro.org>
Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
---
 kernel/debug/kdb/kdb_keyboard.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/debug/kdb/kdb_keyboard.c b/kernel/debug/kdb/kdb_keyboard.c
index 118527aa60eae..750497b0003a6 100644
--- a/kernel/debug/kdb/kdb_keyboard.c
+++ b/kernel/debug/kdb/kdb_keyboard.c
@@ -173,11 +173,11 @@ int kdb_get_kbd_char(void)
 	case KT_LATIN:
 		if (isprint(keychar))
 			break;		/* printable characters */
-		/* drop through */
+		/* fall through */
 	case KT_SPEC:
 		if (keychar == K_ENTER)
 			break;
-		/* drop through */
+		/* fall through */
 	default:
 		return -1;	/* ignore unprintables */
 	}
-- 
GitLab


From 646558ff1643467d3b941b47f519867cbca462c3 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Sat, 4 Aug 2018 21:48:44 -0500
Subject: [PATCH 1064/1890] kdb: kdb_support: mark expected switch
 fall-throughs

In preparation to enabling -Wimplicit-fallthrough, mark switch cases
where we are expecting to fall through.

Notice that in this particular case, I replaced the code comments with
a proper "fall through" annotation, which is what GCC is expecting
to find.

Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Reviewed-by: Daniel Thompson <daniel.thompson@linaro.org>
Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
---
 kernel/debug/kdb/kdb_support.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c
index b14b0925c1848..50bf9b119bad0 100644
--- a/kernel/debug/kdb/kdb_support.c
+++ b/kernel/debug/kdb/kdb_support.c
@@ -432,7 +432,7 @@ int kdb_getphysword(unsigned long *word, unsigned long addr, size_t size)
 				*word = w8;
 			break;
 		}
-		/* drop through */
+		/* fall through */
 	default:
 		diag = KDB_BADWIDTH;
 		kdb_printf("kdb_getphysword: bad width %ld\n", (long) size);
@@ -481,7 +481,7 @@ int kdb_getword(unsigned long *word, unsigned long addr, size_t size)
 				*word = w8;
 			break;
 		}
-		/* drop through */
+		/* fall through */
 	default:
 		diag = KDB_BADWIDTH;
 		kdb_printf("kdb_getword: bad width %ld\n", (long) size);
@@ -525,7 +525,7 @@ int kdb_putword(unsigned long addr, unsigned long word, size_t size)
 			diag = kdb_putarea(addr, w8);
 			break;
 		}
-		/* drop through */
+		/* fall through */
 	default:
 		diag = KDB_BADWIDTH;
 		kdb_printf("kdb_putword: bad width %ld\n", (long) size);
-- 
GitLab


From fd35f192e42cf7c0df1e2480bfd5965e35b2f4ca Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.ibm.com>
Date: Fri, 9 Nov 2018 00:53:40 -0500
Subject: [PATCH 1065/1890] integrity: support new struct public_key_signature
 encoding field

On systems with IMA-appraisal enabled with a policy requiring file
signatures, the "good" signature values are stored on the filesystem as
extended attributes (security.ima).  Signature verification failure
would normally be limited to just a particular file (eg. executable),
but during boot signature verification failure could result in a system
hang.

Defining and requiring a new public_key_signature field requires all
callers of asymmetric signature verification to be updated to reflect
the change.  This patch updates the integrity asymmetric_verify()
caller.

Fixes: 82f94f24475c ("KEYS: Provide software public key query function [ver #2]")
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
Cc: David Howells <dhowells@redhat.com>
Acked-by: Denis Kenzior <denkenz@gmail.com>
Signed-off-by: James Morris <james.morris@microsoft.com>
---
 security/integrity/digsig_asymmetric.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/security/integrity/digsig_asymmetric.c b/security/integrity/digsig_asymmetric.c
index 6dc0751445087..d775e03fbbcc7 100644
--- a/security/integrity/digsig_asymmetric.c
+++ b/security/integrity/digsig_asymmetric.c
@@ -106,6 +106,7 @@ int asymmetric_verify(struct key *keyring, const char *sig,
 
 	pks.pkey_algo = "rsa";
 	pks.hash_algo = hash_algo_name[hdr->hash_algo];
+	pks.encoding = "pkcs1";
 	pks.digest = (u8 *)data;
 	pks.digest_size = datalen;
 	pks.s = hdr->sig;
-- 
GitLab


From e39d8a186ed002854196668cb7562ffdfbc6d379 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Tue, 13 Nov 2018 16:37:54 -0500
Subject: [PATCH 1066/1890] NFSv4: Fix an Oops during delegation callbacks

If the server sends a CB_GETATTR or a CB_RECALL while the filesystem is
being unmounted, then we can Oops when releasing the inode in
nfs4_callback_getattr() and nfs4_callback_recall().

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/callback_proc.c |  4 ++--
 fs/nfs/delegation.c    | 11 +++++++++--
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index fa515d5ea5ba1..7b861bbc0b43f 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -66,7 +66,7 @@ __be32 nfs4_callback_getattr(void *argp, void *resp,
 out_iput:
 	rcu_read_unlock();
 	trace_nfs4_cb_getattr(cps->clp, &args->fh, inode, -ntohl(res->status));
-	iput(inode);
+	nfs_iput_and_deactive(inode);
 out:
 	dprintk("%s: exit with status = %d\n", __func__, ntohl(res->status));
 	return res->status;
@@ -108,7 +108,7 @@ __be32 nfs4_callback_recall(void *argp, void *resp,
 	}
 	trace_nfs4_cb_recall(cps->clp, &args->fh, inode,
 			&args->stateid, -ntohl(res));
-	iput(inode);
+	nfs_iput_and_deactive(inode);
 out:
 	dprintk("%s: exit with status = %d\n", __func__, ntohl(res));
 	return res;
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 07b8395605762..6ec2f78c1e191 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -850,16 +850,23 @@ nfs_delegation_find_inode_server(struct nfs_server *server,
 				 const struct nfs_fh *fhandle)
 {
 	struct nfs_delegation *delegation;
-	struct inode *res = NULL;
+	struct inode *freeme, *res = NULL;
 
 	list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
 		spin_lock(&delegation->lock);
 		if (delegation->inode != NULL &&
 		    nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) {
-			res = igrab(delegation->inode);
+			freeme = igrab(delegation->inode);
+			if (freeme && nfs_sb_active(freeme->i_sb))
+				res = freeme;
 			spin_unlock(&delegation->lock);
 			if (res != NULL)
 				return res;
+			if (freeme) {
+				rcu_read_unlock();
+				iput(freeme);
+				rcu_read_lock();
+			}
 			return ERR_PTR(-EAGAIN);
 		}
 		spin_unlock(&delegation->lock);
-- 
GitLab


From 9ee325d029c4abb75716851ce38863845911d605 Mon Sep 17 00:00:00 2001
From: Vasily Khoruzhick <anarsoul@gmail.com>
Date: Thu, 8 Nov 2018 20:24:19 -0800
Subject: [PATCH 1067/1890] ASoC: sun8i-codec: add missing route for ADC

sun8i-codec misses a route from ADC to AIF1 Slot 0 ADC. Add it
to the driver to avoid adding it to every dts.

Fixes: eda85d1fee05d ("ASoC: sun8i-codec: Add ADC support for a33")
Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sunxi/sun8i-codec.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/sound/soc/sunxi/sun8i-codec.c b/sound/soc/sunxi/sun8i-codec.c
index c4196d4e59159..92c5de026c43a 100644
--- a/sound/soc/sunxi/sun8i-codec.c
+++ b/sound/soc/sunxi/sun8i-codec.c
@@ -481,7 +481,11 @@ static const struct snd_soc_dapm_route sun8i_codec_dapm_routes[] = {
 	{ "Right Digital DAC Mixer", "AIF1 Slot 0 Digital DAC Playback Switch",
 	  "AIF1 Slot 0 Right"},
 
-	/* ADC routes */
+	/* ADC Routes */
+	{ "AIF1 Slot 0 Right ADC", NULL, "ADC" },
+	{ "AIF1 Slot 0 Left ADC", NULL, "ADC" },
+
+	/* ADC Mixer Routes */
 	{ "Left Digital ADC Mixer", "AIF1 Data Digital ADC Capture Switch",
 	  "AIF1 Slot 0 Left ADC" },
 	{ "Right Digital ADC Mixer", "AIF1 Data Digital ADC Capture Switch",
-- 
GitLab


From 67fd1437d11620de8768b22fe20942e752ed52e9 Mon Sep 17 00:00:00 2001
From: Rohit kumar <rohitkr@codeaurora.org>
Date: Thu, 8 Nov 2018 19:11:40 +0530
Subject: [PATCH 1068/1890] ASoC: qcom: Set dai_link id to each dai_link

Frontend dai_link id is used for closing ADM sessions.
During concurrent usecase when one session is closed,
it closes other ADM session associated with other usecase
too. Dai_link->id should always point to Frontend dai id.
Set cpu_dai id as dai_link id to fix the issue.

Signed-off-by: Rohit kumar <rohitkr@codeaurora.org>
Acked-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/qcom/common.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/sound/soc/qcom/common.c b/sound/soc/qcom/common.c
index eb1b9da05dd47..4715527054e5e 100644
--- a/sound/soc/qcom/common.c
+++ b/sound/soc/qcom/common.c
@@ -13,6 +13,7 @@ int qcom_snd_parse_of(struct snd_soc_card *card)
 	struct device_node *cpu = NULL;
 	struct device *dev = card->dev;
 	struct snd_soc_dai_link *link;
+	struct of_phandle_args args;
 	int ret, num_links;
 
 	ret = snd_soc_of_parse_card_name(card, "model");
@@ -47,12 +48,14 @@ int qcom_snd_parse_of(struct snd_soc_card *card)
 			goto err;
 		}
 
-		link->cpu_of_node = of_parse_phandle(cpu, "sound-dai", 0);
-		if (!link->cpu_of_node) {
+		ret = of_parse_phandle_with_args(cpu, "sound-dai",
+					"#sound-dai-cells", 0, &args);
+		if (ret) {
 			dev_err(card->dev, "error getting cpu phandle\n");
-			ret = -EINVAL;
 			goto err;
 		}
+		link->cpu_of_node = args.np;
+		link->id = args.args[0];
 
 		ret = snd_soc_of_get_dai_name(cpu, &link->cpu_dai_name);
 		if (ret) {
-- 
GitLab


From 933a95496e48bde40e366a79e6dd9f9740905573 Mon Sep 17 00:00:00 2001
From: Kirill Marinushkin <kmarinushkin@birdec.tech>
Date: Mon, 12 Nov 2018 08:08:34 +0100
Subject: [PATCH 1069/1890] ASoC: pcm3060: Rename output widgets

In the initial commit [1], I added differential output of the codec as
separate `+` and `-` widgets:

OUTL+
OUTR+
OUTL-
OUTR-

Later, in the commit [2], I added a device tree property to configure the
output as single-ended or differential. Having this property, the `+` and
`-` separation in widgets seems for me confusing. There are no functional
benefits in such separation, so I find reasonable to get rid of it:

OUTL
OUTR

The new naming is more friendly for sound cards, and is better aligned with
other codec drivers in kernel.

Renaming the output widgets now should not be a problem from the backwards-
compatibility perspective, as the driver for PCM3060 is added into the
mainline very recently, and did not yet appear in any releases.

[1] commit 6ee47d4a8dac ("ASoC: pcm3060: Add codec driver")
[2] commit a78c62de00d5 ("ASoC: pcm3060: Add DT property for single-ended
    output")

Signed-off-by: Kirill Marinushkin <kmarinushkin@birdec.tech>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/pcm3060.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/sound/soc/codecs/pcm3060.c b/sound/soc/codecs/pcm3060.c
index 494d9d662be8d..771b46e1974b9 100644
--- a/sound/soc/codecs/pcm3060.c
+++ b/sound/soc/codecs/pcm3060.c
@@ -198,20 +198,16 @@ static const struct snd_kcontrol_new pcm3060_dapm_controls[] = {
 };
 
 static const struct snd_soc_dapm_widget pcm3060_dapm_widgets[] = {
-	SND_SOC_DAPM_OUTPUT("OUTL+"),
-	SND_SOC_DAPM_OUTPUT("OUTR+"),
-	SND_SOC_DAPM_OUTPUT("OUTL-"),
-	SND_SOC_DAPM_OUTPUT("OUTR-"),
+	SND_SOC_DAPM_OUTPUT("OUTL"),
+	SND_SOC_DAPM_OUTPUT("OUTR"),
 
 	SND_SOC_DAPM_INPUT("INL"),
 	SND_SOC_DAPM_INPUT("INR"),
 };
 
 static const struct snd_soc_dapm_route pcm3060_dapm_map[] = {
-	{ "OUTL+", NULL, "Playback" },
-	{ "OUTR+", NULL, "Playback" },
-	{ "OUTL-", NULL, "Playback" },
-	{ "OUTR-", NULL, "Playback" },
+	{ "OUTL", NULL, "Playback" },
+	{ "OUTR", NULL, "Playback" },
 
 	{ "Capture", NULL, "INL" },
 	{ "Capture", NULL, "INR" },
-- 
GitLab


From 877181a8d9dc663f7a73f77f50af714d7888ec3b Mon Sep 17 00:00:00 2001
From: Paul Moore <paul@paul-moore.com>
Date: Tue, 13 Nov 2018 21:44:33 -0500
Subject: [PATCH 1070/1890] selinux: fix non-MLS handling in
 mls_context_to_sid()

Commit 95ffe194204a ("selinux: refactor mls_context_to_sid() and make
it stricter") inadvertently changed how we handle labels that did not
contain MLS information.  This patch restores the proper behavior in
mls_context_to_sid() and adds a comment explaining the proper
behavior to help ensure this doesn't happen again.

Fixes: 95ffe194204a ("selinux: refactor mls_context_to_sid() and make it stricter")
Reported-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 security/selinux/ss/mls.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/security/selinux/ss/mls.c b/security/selinux/ss/mls.c
index 2fe459df3c858..b7efa2296969c 100644
--- a/security/selinux/ss/mls.c
+++ b/security/selinux/ss/mls.c
@@ -245,9 +245,13 @@ int mls_context_to_sid(struct policydb *pol,
 	char *rangep[2];
 
 	if (!pol->mls_enabled) {
-		if ((def_sid != SECSID_NULL && oldc) || (*scontext) == '\0')
-			return 0;
-		return -EINVAL;
+		/*
+		 * With no MLS, only return -EINVAL if there is a MLS field
+		 * and it did not come from an xattr.
+		 */
+		if (oldc && def_sid == SECSID_NULL)
+			return -EINVAL;
+		return 0;
 	}
 
 	/*
-- 
GitLab


From 437ccdc8ce629470babdda1a7086e2f477048cbd Mon Sep 17 00:00:00 2001
From: Satheesh Rajendran <sathnaga@linux.vnet.ibm.com>
Date: Thu, 8 Nov 2018 10:47:56 +0530
Subject: [PATCH 1071/1890] powerpc/numa: Suppress "VPHN is not supported"
 messages

When VPHN function is not supported and during cpu hotplug event,
kernel prints message 'VPHN function not supported. Disabling
polling...'. Currently it prints on every hotplug event, it floods
dmesg when a KVM guest tries to hotplug huge number of vcpus, let's
just print once and suppress further kernel prints.

Signed-off-by: Satheesh Rajendran <sathnaga@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/numa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 3a048e98a1323..ce28ae5ca0803 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1178,7 +1178,7 @@ static long vphn_get_associativity(unsigned long cpu,
 
 	switch (rc) {
 	case H_FUNCTION:
-		printk(KERN_INFO
+		printk_once(KERN_INFO
 			"VPHN is not supported. Disabling polling...\n");
 		stop_topology_update();
 		break;
-- 
GitLab


From 5f8208f557065163f9a8089ea2ea7888f9d96922 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Mon, 9 Jul 2018 19:51:54 +0000
Subject: [PATCH 1072/1890] ARM: dts: sun8i: a83t: bananapi-m3: increase vcc-pd
 voltage to 3.3V

Since commit d7c5f6863550 ("ARM: dts: sun8i: a83t: bananapi-m3: Add
AXP813 regulator nodes") my BPIM3 no longer works at gigabit speed.

With the default setting, dldo3 is regulated at 2.9v which seems
sufficient for the PHY but the aforementioned commit drops it to 2.5V
which is insufficient. Note that this behaviour is random for all BPIM3.
Some work with 2.5V, but some don't.

Finnaly, someone from Bananapi confirmed that this regulator must be set
to 3.3V.

Fixes: d7c5f6863550 ("ARM: dts: sun8i: a83t: bananapi-m3: Add AXP813
		      regulator nodes")
Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
[wens@csie.org: Reworked commit message]
Signed-off-by: Chen-Yu Tsai <wens@csie.org>
---
 arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts b/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts
index 742d2946b08be..583a5a01642f2 100644
--- a/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts
+++ b/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts
@@ -314,8 +314,8 @@ &reg_dldo1 {
 
 &reg_dldo3 {
 	regulator-always-on;
-	regulator-min-microvolt = <2500000>;
-	regulator-max-microvolt = <2500000>;
+	regulator-min-microvolt = <3300000>;
+	regulator-max-microvolt = <3300000>;
 	regulator-name = "vcc-pd";
 };
 
-- 
GitLab


From 40dc948f234b73497c3278875eb08a01d5854d3f Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Tue, 13 Nov 2018 23:46:42 -0800
Subject: [PATCH 1073/1890] xtensa: fix boot parameters address translation

The bootloader may pass physical address of the boot parameters structure
to the MMUv3 kernel in the register a2. Code in the _SetupMMU block in
the arch/xtensa/kernel/head.S is supposed to map that physical address to
the virtual address in the configured virtual memory layout.

This code haven't been updated when additional 256+256 and 512+512
memory layouts were introduced and it may produce wrong addresses when
used with these layouts.

Cc: stable@vger.kernel.org
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/kernel/head.S | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/xtensa/kernel/head.S b/arch/xtensa/kernel/head.S
index 2f76118ecf623..9053a5622d2c3 100644
--- a/arch/xtensa/kernel/head.S
+++ b/arch/xtensa/kernel/head.S
@@ -88,9 +88,12 @@ _SetupMMU:
 	initialize_mmu
 #if defined(CONFIG_MMU) && XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY
 	rsr	a2, excsave1
-	movi	a3, 0x08000000
+	movi	a3, XCHAL_KSEG_PADDR
+	bltu	a2, a3, 1f
+	sub	a2, a2, a3
+	movi	a3, XCHAL_KSEG_SIZE
 	bgeu	a2, a3, 1f
-	movi	a3, 0xd0000000
+	movi	a3, XCHAL_KSEG_CACHED_VADDR
 	add	a2, a2, a3
 	wsr	a2, excsave1
 1:
-- 
GitLab


From 7b412b04a0c7000293008231ce8413056abb1982 Mon Sep 17 00:00:00 2001
From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Date: Wed, 17 Oct 2018 11:40:26 -0700
Subject: [PATCH 1074/1890] usb: dwc3: Fix NULL pointer exception in
 dwc3_pci_remove()

In dwc3_pci_quirks() function, gpiod lookup table is only registered for
baytrail SOC. But in dwc3_pci_remove(), we try to unregistered it
without any checks. This leads to NULL pointer de-reference exception in
gpiod_remove_lookup_table() when unloading the module for non baytrail
SOCs. This patch fixes this issue.

Fixes: 5741022cbdf3 ("usb: dwc3: pci: Add GPIO lookup table on platforms
without ACPI GPIO resources")
Cc: <stable@vger.kernel.org>
Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/dwc3/dwc3-pci.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c
index 1286076a88903..842795856bf49 100644
--- a/drivers/usb/dwc3/dwc3-pci.c
+++ b/drivers/usb/dwc3/dwc3-pci.c
@@ -283,8 +283,10 @@ static int dwc3_pci_probe(struct pci_dev *pci, const struct pci_device_id *id)
 static void dwc3_pci_remove(struct pci_dev *pci)
 {
 	struct dwc3_pci		*dwc = pci_get_drvdata(pci);
+	struct pci_dev		*pdev = dwc->pci;
 
-	gpiod_remove_lookup_table(&platform_bytcr_gpios);
+	if (pdev->device == PCI_DEVICE_ID_INTEL_BYT)
+		gpiod_remove_lookup_table(&platform_bytcr_gpios);
 #ifdef CONFIG_PM
 	cancel_work_sync(&dwc->wakeup_work);
 #endif
-- 
GitLab


From 3c135e8900199e3b9375c1eff808cceba2ee37de Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 18 Oct 2018 10:36:47 +0300
Subject: [PATCH 1075/1890] usb: dwc2: pci: Fix an error code in probe

We added some error handling to this function but forgot to set the
error code on this path.

Fixes: ecd29dabb2ba ("usb: dwc2: pci: Handle error cleanup in probe")
Acked-by: Minas Harutyunyan <hminas@synopsys.com>
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/dwc2/pci.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/dwc2/pci.c b/drivers/usb/dwc2/pci.c
index d257c541e51ba..7afc10872f1f0 100644
--- a/drivers/usb/dwc2/pci.c
+++ b/drivers/usb/dwc2/pci.c
@@ -120,6 +120,7 @@ static int dwc2_pci_probe(struct pci_dev *pci,
 	dwc2 = platform_device_alloc("dwc2", PLATFORM_DEVID_AUTO);
 	if (!dwc2) {
 		dev_err(dev, "couldn't allocate dwc2 device\n");
+		ret = -ENOMEM;
 		goto err;
 	}
 
-- 
GitLab


From a9c859033f6ec772f8e3228c343bb1321584ae0e Mon Sep 17 00:00:00 2001
From: Shen Jing <jingx.shen@intel.com>
Date: Thu, 1 Nov 2018 15:35:17 +0530
Subject: [PATCH 1076/1890] Revert "usb: gadget: ffs: Fix BUG when userland
 exits with submitted AIO transfers"

This reverts commit b4194da3f9087dd38d91b40f9bec42d59ce589a8
since it causes list corruption followed by kernel panic:

Workqueue: adb ffs_aio_cancel_worker
RIP: 0010:__list_add_valid+0x4d/0x70
Call Trace:
insert_work+0x47/0xb0
__queue_work+0xf6/0x400
queue_work_on+0x65/0x70
dwc3_gadget_giveback+0x44/0x50 [dwc3]
dwc3_gadget_ep_dequeue+0x83/0x2d0 [dwc3]
? finish_wait+0x80/0x80
usb_ep_dequeue+0x1e/0x90
process_one_work+0x18c/0x3b0
worker_thread+0x3c/0x390
? process_one_work+0x3b0/0x3b0
kthread+0x11e/0x140
? kthread_create_worker_on_cpu+0x70/0x70
ret_from_fork+0x3a/0x50

This issue is seen with warm reboot stability testing.

Signed-off-by: Shen Jing <jingx.shen@intel.com>
Signed-off-by: Saranya Gopal <saranya.gopal@intel.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/function/f_fs.c | 26 ++++++++------------------
 1 file changed, 8 insertions(+), 18 deletions(-)

diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 3ada83d81bda8..31e8bf3578c89 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -215,7 +215,6 @@ struct ffs_io_data {
 
 	struct mm_struct *mm;
 	struct work_struct work;
-	struct work_struct cancellation_work;
 
 	struct usb_ep *ep;
 	struct usb_request *req;
@@ -1073,31 +1072,22 @@ ffs_epfile_open(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static void ffs_aio_cancel_worker(struct work_struct *work)
-{
-	struct ffs_io_data *io_data = container_of(work, struct ffs_io_data,
-						   cancellation_work);
-
-	ENTER();
-
-	usb_ep_dequeue(io_data->ep, io_data->req);
-}
-
 static int ffs_aio_cancel(struct kiocb *kiocb)
 {
 	struct ffs_io_data *io_data = kiocb->private;
-	struct ffs_data *ffs = io_data->ffs;
+	struct ffs_epfile *epfile = kiocb->ki_filp->private_data;
 	int value;
 
 	ENTER();
 
-	if (likely(io_data && io_data->ep && io_data->req)) {
-		INIT_WORK(&io_data->cancellation_work, ffs_aio_cancel_worker);
-		queue_work(ffs->io_completion_wq, &io_data->cancellation_work);
-		value = -EINPROGRESS;
-	} else {
+	spin_lock_irq(&epfile->ffs->eps_lock);
+
+	if (likely(io_data && io_data->ep && io_data->req))
+		value = usb_ep_dequeue(io_data->ep, io_data->req);
+	else
 		value = -EINVAL;
-	}
+
+	spin_unlock_irq(&epfile->ffs->eps_lock);
 
 	return value;
 }
-- 
GitLab


From 2fc6d4be35fb1e262f209758e25bfe2b7a113a7f Mon Sep 17 00:00:00 2001
From: Felipe Balbi <felipe.balbi@linux.intel.com>
Date: Wed, 1 Aug 2018 09:37:34 +0300
Subject: [PATCH 1077/1890] usb: dwc3: gadget: fix ISOC TRB type on unaligned
 transfers

When chaining ISOC TRBs together, only the first ISOC TRB should be of
type ISOC_FIRST, all others should be of type ISOC. This patch fixes
that.

Fixes: c6267a51639b ("usb: dwc3: gadget: align transfers to wMaxPacketSize")
Cc: <stable@vger.kernel.org> # v4.11+
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/dwc3/gadget.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 06e22afdf3d14..9faad896b3a13 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -1081,7 +1081,7 @@ static void dwc3_prepare_one_trb_sg(struct dwc3_ep *dep,
 			/* Now prepare one extra TRB to align transfer size */
 			trb = &dep->trb_pool[dep->trb_enqueue];
 			__dwc3_prepare_one_trb(dep, trb, dwc->bounce_addr,
-					maxp - rem, false, 0,
+					maxp - rem, false, 1,
 					req->request.stream_id,
 					req->request.short_not_ok,
 					req->request.no_interrupt);
@@ -1125,7 +1125,7 @@ static void dwc3_prepare_one_trb_linear(struct dwc3_ep *dep,
 		/* Now prepare one extra TRB to align transfer size */
 		trb = &dep->trb_pool[dep->trb_enqueue];
 		__dwc3_prepare_one_trb(dep, trb, dwc->bounce_addr, maxp - rem,
-				false, 0, req->request.stream_id,
+				false, 1, req->request.stream_id,
 				req->request.short_not_ok,
 				req->request.no_interrupt);
 	} else if (req->request.zero && req->request.length &&
@@ -1141,7 +1141,7 @@ static void dwc3_prepare_one_trb_linear(struct dwc3_ep *dep,
 		/* Now prepare one extra TRB to handle ZLP */
 		trb = &dep->trb_pool[dep->trb_enqueue];
 		__dwc3_prepare_one_trb(dep, trb, dwc->bounce_addr, 0,
-				false, 0, req->request.stream_id,
+				false, 1, req->request.stream_id,
 				req->request.short_not_ok,
 				req->request.no_interrupt);
 	} else {
-- 
GitLab


From 6a67a20366f894c172734f28c5646bdbe48a46e3 Mon Sep 17 00:00:00 2001
From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Date: Mon, 12 Nov 2018 12:39:31 +0000
Subject: [PATCH 1078/1890] drm/i915: fix broadwell EU computation

subslice_mask is an array indexed by slice, not subslice.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Fixes: 8cc7669355136f ("drm/i915: store all subslice masks")
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108712
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181112123931.2815-1-lionel.g.landwerlin@intel.com
(cherry picked from commit 63ac3328f0d1d37f286e397b14d9596ed09d7ca5)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_device_info.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
index 0ef0c6448d53a..01fa98299bae6 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -474,7 +474,7 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
 			u8 eu_disabled_mask;
 			u32 n_disabled;
 
-			if (!(sseu->subslice_mask[ss] & BIT(ss)))
+			if (!(sseu->subslice_mask[s] & BIT(ss)))
 				/* skip disabled subslice */
 				continue;
 
-- 
GitLab


From a22612301ae61d78a7c0c82dc556931a35db0e91 Mon Sep 17 00:00:00 2001
From: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Date: Fri, 9 Nov 2018 16:09:23 +0200
Subject: [PATCH 1079/1890] drm/i915/icl: Drop spurious register read from
 icl_dbuf_slices_update

Register DBUF_CTL_S2 is read and it's value is not used. As
there is no explanation why we should prime the hardware with
read, remove it as spurious.

Fixes: aa9664ffe863 ("drm/i915/icl: Enable 2nd DBuf slice only when needed")
Cc: Mahesh Kumar <mahesh1.kumar@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Imre Deak <imre.deak@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181109140924.2663-1-mika.kuoppala@linux.intel.com
(cherry picked from commit 8577c319b6511fbc391f3775225fecd8b979bc26)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_runtime_pm.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 0a4990d8843c4..44e4491a49189 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -3176,8 +3176,7 @@ static u8 intel_dbuf_max_slices(struct drm_i915_private *dev_priv)
 void icl_dbuf_slices_update(struct drm_i915_private *dev_priv,
 			    u8 req_slices)
 {
-	u8 hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices;
-	u32 val;
+	const u8 hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices;
 	bool ret;
 
 	if (req_slices > intel_dbuf_max_slices(dev_priv)) {
@@ -3188,7 +3187,6 @@ void icl_dbuf_slices_update(struct drm_i915_private *dev_priv,
 	if (req_slices == hw_enabled_slices || req_slices == 0)
 		return;
 
-	val = I915_READ(DBUF_CTL_S2);
 	if (req_slices > hw_enabled_slices)
 		ret = intel_dbuf_slice_set(dev_priv, DBUF_CTL_S2, true);
 	else
-- 
GitLab


From 613a41b0d16e617f46776a93b975a1eeea96417c Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.ibm.com>
Date: Tue, 13 Nov 2018 15:38:22 +0000
Subject: [PATCH 1080/1890] s390/cpum_cf: Reject request for sampling in event
 initialization

On s390 command perf top fails
[root@s35lp76 perf] # ./perf top -F100000  --stdio
   Error:
   cycles: PMU Hardware doesn't support sampling/overflow-interrupts.
   	Try 'perf stat'
[root@s35lp76 perf] #

Using event -e rb0000 works as designed.  Event rb0000 is the event
number of the sampling facility for basic sampling.

During system start up the following PMUs are installed in the kernel's
PMU list (from head to tail):
   cpum_cf --> s390 PMU counter facility device driver
   cpum_sf --> s390 PMU sampling facility device driver
   uprobe
   kprobe
   tracepoint
   task_clock
   cpu_clock

Perf top executes following functions and calls perf_event_open(2) system
call with different parameters many times:

cmd_top
--> __cmd_top
    --> perf_evlist__add_default
        --> __perf_evlist__add_default
            --> perf_evlist__new_cycles (creates event type:0 (HW)
			    		config 0 (CPU_CYCLES)
	        --> perf_event_attr__set_max_precise_ip
		    Uses perf_event_open(2) to detect correct
		    precise_ip level. Fails 3 times on s390 which is ok.

Then functions cmd_top
--> __cmd_top
    --> perf_top__start_counters
        -->perf_evlist__config
	   --> perf_can_comm_exec
               --> perf_probe_api
	           This functions test support for the following events:
		   "cycles:u", "instructions:u", "cpu-clock:u" using
		   --> perf_do_probe_api
		       --> perf_event_open_cloexec
		           Test the close on exec flag support with
			   perf_event_open(2).
	               perf_do_probe_api returns true if the event is
		       supported.
		       The function returns true because event cpu-clock is
		       supported by the PMU cpu_clock.
	               This is achieved by many calls to perf_event_open(2).

Function perf_top__start_counters now calls perf_evsel__open() for every
event, which is the default event cpu_cycles (config:0) and type HARDWARE
(type:0) which a predfined frequence of 4000.

Given the above order of the PMU list, the PMU cpum_cf gets called first
and returns 0, which indicates support for this sampling. The event is
fully allocated in the function perf_event_open (file kernel/event/core.c
near line 10521 and the following check fails:

        event = perf_event_alloc(&attr, cpu, task, group_leader, NULL,
		                 NULL, NULL, cgroup_fd);
	if (IS_ERR(event)) {
		err = PTR_ERR(event);
		goto err_cred;
	}

        if (is_sampling_event(event)) {
		if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) {
			err = -EOPNOTSUPP;
			goto err_alloc;
		}
	}

The check for the interrupt capabilities fails and the system call
perf_event_open() returns -EOPNOTSUPP (-95).

Add a check to return -ENODEV when sampling is requested in PMU cpum_cf.
This allows common kernel code in the perf_event_open() system call to
test the next PMU in above list.

Fixes: 97b1198fece0 (" "s390, perf: Use common PMU interrupt disabled code")
Signed-off-by: Thomas Richter <tmricht@linux.ibm.com>
Reviewed-by: Hendrik Brueckner <brueckner@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/perf_cpum_cf.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 74091fd3101e9..d5523adeddbf4 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -346,6 +346,8 @@ static int __hw_perf_event_init(struct perf_event *event)
 		break;
 
 	case PERF_TYPE_HARDWARE:
+		if (is_sampling_event(event))	/* No sampling support */
+			return -ENOENT;
 		ev = attr->config;
 		/* Count user space (problem-state) only */
 		if (!attr->exclude_user && attr->exclude_kernel) {
-- 
GitLab


From 4800bf7bc8c725e955fcbc6191cc872f43f506d3 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Wed, 14 Nov 2018 08:17:18 -0700
Subject: [PATCH 1081/1890] block: fix 32 bit overflow in
 __blkdev_issue_discard()

A discard cleanup merged into 4.20-rc2 causes fstests xfs/259 to
fall into an endless loop in the discard code. The test is creating
a device that is exactly 2^32 sectors in size to test mkfs boundary
conditions around the 32 bit sector overflow region.

mkfs issues a discard for the entire device size by default, and
hence this throws a sector count of 2^32 into
blkdev_issue_discard(). It takes the number of sectors to discard as
a sector_t - a 64 bit value.

The commit ba5d73851e71 ("block: cleanup __blkdev_issue_discard")
takes this sector count and casts it to a 32 bit value before
comapring it against the maximum allowed discard size the device
has. This truncates away the upper 32 bits, and so if the lower 32
bits of the sector count is zero, it starts issuing discards of
length 0. This causes the code to fall into an endless loop, issuing
a zero length discards over and over again on the same sector.

Fixes: ba5d73851e71 ("block: cleanup __blkdev_issue_discard")
Tested-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Dave Chinner <dchinner@redhat.com>

Killed pointless WARN_ON().

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-lib.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/block/blk-lib.c b/block/blk-lib.c
index e8b3bb9bf3759..5f2c429d43784 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -55,9 +55,11 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 		return -EINVAL;
 
 	while (nr_sects) {
-		unsigned int req_sects = min_t(unsigned int, nr_sects,
+		sector_t req_sects = min_t(sector_t, nr_sects,
 				bio_allowed_max_sectors(q));
 
+		WARN_ON_ONCE((req_sects << 9) > UINT_MAX);
+
 		bio = blk_next_bio(bio, 0, gfp_mask);
 		bio->bi_iter.bi_sector = sector;
 		bio_set_dev(bio, bdev);
-- 
GitLab


From 8dc765d438f1e42b3e8227b3b09fad7d73f4ec9a Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Wed, 14 Nov 2018 16:25:51 +0800
Subject: [PATCH 1082/1890] SCSI: fix queue cleanup race before queue
 initialization is done

c2856ae2f315d ("blk-mq: quiesce queue before freeing queue") has
already fixed this race, however the implied synchronize_rcu()
in blk_mq_quiesce_queue() can slow down LUN probe a lot, so caused
performance regression.

Then 1311326cf4755c7 ("blk-mq: avoid to synchronize rcu inside blk_cleanup_queue()")
tried to quiesce queue for avoiding unnecessary synchronize_rcu()
only when queue initialization is done, because it is usual to see
lots of inexistent LUNs which need to be probed.

However, turns out it isn't safe to quiesce queue only when queue
initialization is done. Because when one SCSI command is completed,
the user of sending command can be waken up immediately, then the
scsi device may be removed, meantime the run queue in scsi_end_request()
is still in-progress, so kernel panic can be caused.

In Red Hat QE lab, there are several reports about this kind of kernel
panic triggered during kernel booting.

This patch tries to address the issue by grabing one queue usage
counter during freeing one request and the following run queue.

Fixes: 1311326cf4755c7 ("blk-mq: avoid to synchronize rcu inside blk_cleanup_queue()")
Cc: Andrew Jones <drjones@redhat.com>
Cc: Bart Van Assche <bart.vanassche@wdc.com>
Cc: linux-scsi@vger.kernel.org
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: James E.J. Bottomley <jejb@linux.vnet.ibm.com>
Cc: stable <stable@vger.kernel.org>
Cc: jianchao.wang <jianchao.w.wang@oracle.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c        | 5 ++---
 drivers/scsi/scsi_lib.c | 8 ++++++++
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index ce12515f9b9b9..deb56932f8c46 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -798,9 +798,8 @@ void blk_cleanup_queue(struct request_queue *q)
 	 * dispatch may still be in-progress since we dispatch requests
 	 * from more than one contexts.
 	 *
-	 * No need to quiesce queue if it isn't initialized yet since
-	 * blk_freeze_queue() should be enough for cases of passthrough
-	 * request.
+	 * We rely on driver to deal with the race in case that queue
+	 * initialization isn't done.
 	 */
 	if (q->mq_ops && blk_queue_init_done(q))
 		blk_mq_quiesce_queue(q);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index c7fccbb8f5545..fa6e0c3b3aa67 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -697,6 +697,12 @@ static bool scsi_end_request(struct request *req, blk_status_t error,
 		 */
 		scsi_mq_uninit_cmd(cmd);
 
+		/*
+		 * queue is still alive, so grab the ref for preventing it
+		 * from being cleaned up during running queue.
+		 */
+		percpu_ref_get(&q->q_usage_counter);
+
 		__blk_mq_end_request(req, error);
 
 		if (scsi_target(sdev)->single_lun ||
@@ -704,6 +710,8 @@ static bool scsi_end_request(struct request *req, blk_status_t error,
 			kblockd_schedule_work(&sdev->requeue_work);
 		else
 			blk_mq_run_hw_queues(q, true);
+
+		percpu_ref_put(&q->q_usage_counter);
 	} else {
 		unsigned long flags;
 
-- 
GitLab


From 38cd989ee38c16388cde89db5b734f9d55b905f9 Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicoleotsuka@gmail.com>
Date: Tue, 13 Nov 2018 19:48:54 -0800
Subject: [PATCH 1083/1890] hwmon: (ina2xx) Fix current value calculation

The current register (04h) has a sign bit at MSB. The comments
for this calculation also mention that it's a signed register.

However, the regval is unsigned type so result of calculation
turns out to be an incorrect value when current is negative.

This patch simply fixes this by adding a casting to s16.

Fixes: 5d389b125186c ("hwmon: (ina2xx) Make calibration register value fixed")
Signed-off-by: Nicolin Chen <nicoleotsuka@gmail.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/ina2xx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/ina2xx.c b/drivers/hwmon/ina2xx.c
index c2252cf452f50..07ee19573b3f0 100644
--- a/drivers/hwmon/ina2xx.c
+++ b/drivers/hwmon/ina2xx.c
@@ -274,7 +274,7 @@ static int ina2xx_get_value(struct ina2xx_data *data, u8 reg,
 		break;
 	case INA2XX_CURRENT:
 		/* signed register, result in mA */
-		val = regval * data->current_lsb_uA;
+		val = (s16)regval * data->current_lsb_uA;
 		val = DIV_ROUND_CLOSEST(val, 1000);
 		break;
 	case INA2XX_CALIBRATION:
-- 
GitLab


From f505754fd6599230371cb01b9332754ddc104be1 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 14 Nov 2018 11:35:24 +0000
Subject: [PATCH 1084/1890] Btrfs: ensure path name is null terminated at
 btrfs_control_ioctl

We were using the path name received from user space without checking that
it is null terminated. While btrfs-progs is well behaved and does proper
validation and null termination, someone could call the ioctl and pass
a non-null terminated patch, leading to buffer overrun problems in the
kernel.  The ioctl is protected by CAP_SYS_ADMIN.

So just set the last byte of the path to a null character, similar to what
we do in other ioctls (add/remove/resize device, snapshot creation, etc).

CC: stable@vger.kernel.org # 4.4+
Reviewed-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/super.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index cbc9d0d2c12de..645fc81e2a948 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -2237,6 +2237,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
 	vol = memdup_user((void __user *)arg, sizeof(*vol));
 	if (IS_ERR(vol))
 		return PTR_ERR(vol);
+	vol->name[BTRFS_PATH_NAME_MAX] = '\0';
 
 	switch (cmd) {
 	case BTRFS_IOC_SCAN_DEV:
-- 
GitLab


From 56a6c7268312cba9436b84cac01b3e502c5c511d Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Wed, 14 Nov 2018 09:33:57 +0100
Subject: [PATCH 1085/1890] gnss: serial: fix synchronous write timeout

Passing a timeout of zero to the synchronous serdev_device_write()
helper does currently not imply to wait forever (unlike passing zero to
serdev_device_wait_until_sent()). Instead, if there's insufficient
room in the write buffer, we'd end up with an incomplete write.

Fixes: 37768b054f20 ("gnss: add generic serial driver")
Cc: stable <stable@vger.kernel.org>     # 4.19
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/gnss/serial.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gnss/serial.c b/drivers/gnss/serial.c
index b01ba4438501a..31e891f00175c 100644
--- a/drivers/gnss/serial.c
+++ b/drivers/gnss/serial.c
@@ -13,6 +13,7 @@
 #include <linux/of.h>
 #include <linux/pm.h>
 #include <linux/pm_runtime.h>
+#include <linux/sched.h>
 #include <linux/serdev.h>
 #include <linux/slab.h>
 
@@ -63,7 +64,7 @@ static int gnss_serial_write_raw(struct gnss_device *gdev,
 	int ret;
 
 	/* write is only buffered synchronously */
-	ret = serdev_device_write(serdev, buf, count, 0);
+	ret = serdev_device_write(serdev, buf, count, MAX_SCHEDULE_TIMEOUT);
 	if (ret < 0)
 		return ret;
 
-- 
GitLab


From 1decef370456870bf448a565be95db636428e106 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Wed, 14 Nov 2018 09:37:54 +0100
Subject: [PATCH 1086/1890] gnss: sirf: fix synchronous write timeout

Passing a timeout of zero to the synchronous serdev_device_write()
helper does currently not imply to wait forever (unlike passing zero to
serdev_device_wait_until_sent()). Instead, if there's insufficient
room in the write buffer, we'd end up with an incomplete write.

Fixes: d2efbbd18b1e ("gnss: add driver for sirfstar-based receivers")
Cc: stable <stable@vger.kernel.org>     # 4.19
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/gnss/sirf.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gnss/sirf.c b/drivers/gnss/sirf.c
index 79cb98950013b..71d014edd1676 100644
--- a/drivers/gnss/sirf.c
+++ b/drivers/gnss/sirf.c
@@ -16,6 +16,7 @@
 #include <linux/pm.h>
 #include <linux/pm_runtime.h>
 #include <linux/regulator/consumer.h>
+#include <linux/sched.h>
 #include <linux/serdev.h>
 #include <linux/slab.h>
 #include <linux/wait.h>
@@ -83,7 +84,7 @@ static int sirf_write_raw(struct gnss_device *gdev, const unsigned char *buf,
 	int ret;
 
 	/* write is only buffered synchronously */
-	ret = serdev_device_write(serdev, buf, count, 0);
+	ret = serdev_device_write(serdev, buf, count, MAX_SCHEDULE_TIMEOUT);
 	if (ret < 0)
 		return ret;
 
-- 
GitLab


From 76836fd354922ebe4798a64fda01f8dc6a8b0984 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Wed, 14 Nov 2018 14:58:20 +0200
Subject: [PATCH 1087/1890] ASoC: omap-abe-twl6040: Fix missing audio card
 caused by deferred probing

The machine driver fails to probe in next-20181113 with:

[    2.539093] omap-abe-twl6040 sound: ASoC: CODEC DAI twl6040-legacy not registered
[    2.546630] omap-abe-twl6040 sound: devm_snd_soc_register_card() failed: -517
...
[    3.693206] omap-abe-twl6040 sound: ASoC: Both platform name/of_node are set for TWL6040
[    3.701446] omap-abe-twl6040 sound: ASoC: failed to init link TWL6040
[    3.708007] omap-abe-twl6040 sound: devm_snd_soc_register_card() failed: -22
[    3.715148] omap-abe-twl6040: probe of sound failed with error -22

Bisect pointed to a merge commit:
first bad commit: [0f688ab20a540aafa984c5dbd68a71debebf4d7f] Merge remote-tracking branch 'net-next/master'

and a diff between a working kernel does not reveal anything which would
explain the change in behavior.

Further investigation showed that on the second try of loading fails
because the dai_link->platform is no longer NULL and it might be pointing
to uninitialized memory.

The fix is to move the snd_soc_dai_link and snd_soc_card inside of the
abe_twl6040 struct, which is dynamically allocated every time the driver
probes.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/omap/omap-abe-twl6040.c | 67 +++++++++++++------------------
 1 file changed, 29 insertions(+), 38 deletions(-)

diff --git a/sound/soc/omap/omap-abe-twl6040.c b/sound/soc/omap/omap-abe-twl6040.c
index d5ae9eb8c7569..fed45b41f9d3e 100644
--- a/sound/soc/omap/omap-abe-twl6040.c
+++ b/sound/soc/omap/omap-abe-twl6040.c
@@ -36,6 +36,8 @@
 #include "../codecs/twl6040.h"
 
 struct abe_twl6040 {
+	struct snd_soc_card card;
+	struct snd_soc_dai_link dai_links[2];
 	int	jack_detection;	/* board can detect jack events */
 	int	mclk_freq;	/* MCLK frequency speed for twl6040 */
 };
@@ -208,40 +210,10 @@ static int omap_abe_dmic_init(struct snd_soc_pcm_runtime *rtd)
 				ARRAY_SIZE(dmic_audio_map));
 }
 
-/* Digital audio interface glue - connects codec <--> CPU */
-static struct snd_soc_dai_link abe_twl6040_dai_links[] = {
-	{
-		.name = "TWL6040",
-		.stream_name = "TWL6040",
-		.codec_dai_name = "twl6040-legacy",
-		.codec_name = "twl6040-codec",
-		.init = omap_abe_twl6040_init,
-		.ops = &omap_abe_ops,
-	},
-	{
-		.name = "DMIC",
-		.stream_name = "DMIC Capture",
-		.codec_dai_name = "dmic-hifi",
-		.codec_name = "dmic-codec",
-		.init = omap_abe_dmic_init,
-		.ops = &omap_abe_dmic_ops,
-	},
-};
-
-/* Audio machine driver */
-static struct snd_soc_card omap_abe_card = {
-	.owner = THIS_MODULE,
-
-	.dapm_widgets = twl6040_dapm_widgets,
-	.num_dapm_widgets = ARRAY_SIZE(twl6040_dapm_widgets),
-	.dapm_routes = audio_map,
-	.num_dapm_routes = ARRAY_SIZE(audio_map),
-};
-
 static int omap_abe_probe(struct platform_device *pdev)
 {
 	struct device_node *node = pdev->dev.of_node;
-	struct snd_soc_card *card = &omap_abe_card;
+	struct snd_soc_card *card;
 	struct device_node *dai_node;
 	struct abe_twl6040 *priv;
 	int num_links = 0;
@@ -252,12 +224,18 @@ static int omap_abe_probe(struct platform_device *pdev)
 		return -ENODEV;
 	}
 
-	card->dev = &pdev->dev;
-
 	priv = devm_kzalloc(&pdev->dev, sizeof(struct abe_twl6040), GFP_KERNEL);
 	if (priv == NULL)
 		return -ENOMEM;
 
+	card = &priv->card;
+	card->dev = &pdev->dev;
+	card->owner = THIS_MODULE;
+	card->dapm_widgets = twl6040_dapm_widgets;
+	card->num_dapm_widgets = ARRAY_SIZE(twl6040_dapm_widgets);
+	card->dapm_routes = audio_map;
+	card->num_dapm_routes = ARRAY_SIZE(audio_map);
+
 	if (snd_soc_of_parse_card_name(card, "ti,model")) {
 		dev_err(&pdev->dev, "Card name is not provided\n");
 		return -ENODEV;
@@ -274,14 +252,27 @@ static int omap_abe_probe(struct platform_device *pdev)
 		dev_err(&pdev->dev, "McPDM node is not provided\n");
 		return -EINVAL;
 	}
-	abe_twl6040_dai_links[0].cpu_of_node = dai_node;
-	abe_twl6040_dai_links[0].platform_of_node = dai_node;
+
+	priv->dai_links[0].name = "DMIC";
+	priv->dai_links[0].stream_name = "TWL6040";
+	priv->dai_links[0].cpu_of_node = dai_node;
+	priv->dai_links[0].platform_of_node = dai_node;
+	priv->dai_links[0].codec_dai_name = "twl6040-legacy";
+	priv->dai_links[0].codec_name = "twl6040-codec";
+	priv->dai_links[0].init = omap_abe_twl6040_init;
+	priv->dai_links[0].ops = &omap_abe_ops;
 
 	dai_node = of_parse_phandle(node, "ti,dmic", 0);
 	if (dai_node) {
 		num_links = 2;
-		abe_twl6040_dai_links[1].cpu_of_node = dai_node;
-		abe_twl6040_dai_links[1].platform_of_node = dai_node;
+		priv->dai_links[1].name = "TWL6040";
+		priv->dai_links[1].stream_name = "DMIC Capture";
+		priv->dai_links[1].cpu_of_node = dai_node;
+		priv->dai_links[1].platform_of_node = dai_node;
+		priv->dai_links[1].codec_dai_name = "dmic-hifi";
+		priv->dai_links[1].codec_name = "dmic-codec";
+		priv->dai_links[1].init = omap_abe_dmic_init;
+		priv->dai_links[1].ops = &omap_abe_dmic_ops;
 	} else {
 		num_links = 1;
 	}
@@ -300,7 +291,7 @@ static int omap_abe_probe(struct platform_device *pdev)
 		return -ENODEV;
 	}
 
-	card->dai_link = abe_twl6040_dai_links;
+	card->dai_link = priv->dai_links;
 	card->num_links = num_links;
 
 	snd_soc_card_set_drvdata(card, priv);
-- 
GitLab


From 007b656851ed7f94ba0fa358ac3e5d7705da6846 Mon Sep 17 00:00:00 2001
From: Ursula Braun <ubraun@linux.ibm.com>
Date: Mon, 12 Nov 2018 17:06:12 +0100
Subject: [PATCH 1088/1890] s390/ism: clear dmbe_mask bit before SMC IRQ
 handling

SMC-D stress workload showed connection stalls. Since the firmware
decides to skip raising an interrupt if the SBA DMBE mask bit is
still set, this SBA DMBE mask bit should be cleared before the
IRQ handling in the SMC code runs. Otherwise there are small windows
possible with missing interrupts for incoming data.
SMC-D currently does not care about the old value of the SBA DMBE
mask.

Acked-by: Sebastian Ott <sebott@linux.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/ism_drv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c
index f96ec68af2e58..dcbf5c8577437 100644
--- a/drivers/s390/net/ism_drv.c
+++ b/drivers/s390/net/ism_drv.c
@@ -415,9 +415,9 @@ static irqreturn_t ism_handle_irq(int irq, void *data)
 			break;
 
 		clear_bit_inv(bit, bv);
+		ism->sba->dmbe_mask[bit + ISM_DMB_BIT_OFFSET] = 0;
 		barrier();
 		smcd_handle_irq(ism->smcd, bit + ISM_DMB_BIT_OFFSET);
-		ism->sba->dmbe_mask[bit + ISM_DMB_BIT_OFFSET] = 0;
 	}
 
 	if (ism->sba->e) {
-- 
GitLab


From 882eab6c28d23a970ae73b7eb831b169a672d456 Mon Sep 17 00:00:00 2001
From: Tzung-Bi Shih <tzungbi@google.com>
Date: Wed, 14 Nov 2018 17:06:13 +0800
Subject: [PATCH 1089/1890] ASoC: dapm: Recalculate audio map forcely when card
 instantiated

Audio map are possible in wrong state before card->instantiated has
been set to true.  Imaging the following examples:

time 1: at the beginning

  in:-1    in:-1    in:-1    in:-1
 out:-1   out:-1   out:-1   out:-1
 SIGGEN        A        B      Spk

time 2: after someone called snd_soc_dapm_new_widgets()
(e.g. create_fill_widget_route_map() in sound/soc/codecs/hdac_hdmi.c)

   in:1     in:0     in:0     in:0
  out:0    out:0    out:0    out:1
 SIGGEN        A        B      Spk

time 3: routes added

   in:1     in:0     in:0     in:0
  out:0    out:0    out:0    out:1
 SIGGEN -----> A -----> B ---> Spk

In the end, the path should be powered on but it did not.  At time 3,
"in" of SIGGEN and "out" of Spk did not propagate to their neighbors
because snd_soc_dapm_add_path() will not invalidate the paths if
the card has not instantiated (i.e. card->instantiated is false).
To correct the state of audio map, recalculate the whole map forcely.

Signed-off-by: Tzung-Bi Shih <tzungbi@google.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/soc-core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 6ddcf12bc030d..b29d0f65611eb 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -2131,6 +2131,7 @@ static int snd_soc_instantiate_card(struct snd_soc_card *card)
 	}
 
 	card->instantiated = 1;
+	dapm_mark_endpoints_dirty(card);
 	snd_soc_dapm_sync(&card->dapm);
 	mutex_unlock(&card->mutex);
 	mutex_unlock(&client_mutex);
-- 
GitLab


From 22454b79e6de05fa61a2a72d00d2eed798abbb75 Mon Sep 17 00:00:00 2001
From: Dennis Wassenberg <dennis.wassenberg@secunet.com>
Date: Tue, 13 Nov 2018 14:40:34 +0100
Subject: [PATCH 1090/1890] usb: core: Fix hub port connection events lost

This will clear the USB_PORT_FEAT_C_CONNECTION bit in case of a hub port reset
only if a device is was attached to the hub port before resetting the hub port.

Using a Lenovo T480s attached to the ultra dock it was not possible to detect
some usb-c devices at the dock usb-c ports because the hub_port_reset code
will clear the USB_PORT_FEAT_C_CONNECTION bit after the actual hub port reset.
Using this device combo the USB_PORT_FEAT_C_CONNECTION bit was set between the
actual hub port reset and the clear of the USB_PORT_FEAT_C_CONNECTION bit.
This ends up with clearing the USB_PORT_FEAT_C_CONNECTION bit after the
new device was attached such that it was not detected.

This patch will not clear the USB_PORT_FEAT_C_CONNECTION bit if there is
currently no device attached to the port before the hub port reset.
This will avoid clearing the connection bit for new attached devices.

Signed-off-by: Dennis Wassenberg <dennis.wassenberg@secunet.com>
Acked-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hub.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index d9bd7576786a7..0f9381b69a3b5 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -2850,7 +2850,9 @@ static int hub_port_reset(struct usb_hub *hub, int port1,
 					USB_PORT_FEAT_C_BH_PORT_RESET);
 			usb_clear_port_feature(hub->hdev, port1,
 					USB_PORT_FEAT_C_PORT_LINK_STATE);
-			usb_clear_port_feature(hub->hdev, port1,
+
+			if (udev)
+				usb_clear_port_feature(hub->hdev, port1,
 					USB_PORT_FEAT_C_CONNECTION);
 
 			/*
-- 
GitLab


From 27c0f2b0197070c8e94bdf28686d4c63b959dea8 Mon Sep 17 00:00:00 2001
From: Rajan Vaja <rajan.vaja@xilinx.com>
Date: Tue, 13 Nov 2018 23:46:49 -0800
Subject: [PATCH 1091/1890] clk: zynqmp: handle fixed factor param query error

Return proper error code in case query for fixed factor
parameter fails. This also fixes build warning for set
but not used variable 'ret'.

Signed-off-by: Rajan Vaja <rajan.vaja@xilinx.com>
Fixes: 3fde0e16d016 ("drivers: clk: Add ZynqMP clock driver")
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/zynqmp/clkc.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/clk/zynqmp/clkc.c b/drivers/clk/zynqmp/clkc.c
index 9d7d297f0ea8d..297f16a20bfc5 100644
--- a/drivers/clk/zynqmp/clkc.c
+++ b/drivers/clk/zynqmp/clkc.c
@@ -279,6 +279,9 @@ struct clk_hw *zynqmp_clk_register_fixed_factor(const char *name, u32 clk_id,
 	qdata.arg1 = clk_id;
 
 	ret = eemi_ops->query_data(qdata, ret_payload);
+	if (ret)
+		return ERR_PTR(ret);
+
 	mult = ret_payload[1];
 	div = ret_payload[2];
 
-- 
GitLab


From 6c08ec1216b7b2f84b9755f339d6009768599256 Mon Sep 17 00:00:00 2001
From: Michael Roth <mdroth@linux.vnet.ibm.com>
Date: Thu, 8 Nov 2018 21:27:23 -0600
Subject: [PATCH 1092/1890] KVM: PPC: Book3S HV: Fix handling for interrupted
 H_ENTER_NESTED

While running a nested guest VCPU on L0 via H_ENTER_NESTED hcall, a
pending signal in the L0 QEMU process can generate the following
sequence:

  ret0 = kvmppc_pseries_do_hcall()
    ret1 = kvmhv_enter_nested_guest()
      ret2 = kvmhv_run_single_vcpu()
      if (ret2 == -EINTR)
        return H_INTERRUPT
    if (ret1 == H_INTERRUPT)
      kvmppc_set_gpr(vcpu, 3, 0)
      return -EINTR
    /* skipped: */
    kvmppc_set_gpr(vcpu, 3, ret)
    vcpu->arch.hcall_needed = 0
    return RESUME_GUEST

which causes an exit to L0 userspace with ret0 == -EINTR.

The intention seems to be to set the hcall return value to 0 (via
VCPU r3) so that L1 will see a successful return from H_ENTER_NESTED
once we resume executing the VCPU. However, because we don't set
vcpu->arch.hcall_needed = 0, we do the following once userspace
resumes execution via kvm_arch_vcpu_ioctl_run():

  ...
  } else if (vcpu->arch.hcall_needed) {
    int i

    kvmppc_set_gpr(vcpu, 3, run->papr_hcall.ret);
    for (i = 0; i < 9; ++i)
           kvmppc_set_gpr(vcpu, 4 + i, run->papr_hcall.args[i]);
    vcpu->arch.hcall_needed = 0;

since vcpu->arch.hcall_needed == 1 indicates that userspace should
have handled the hcall and stored the return value in
run->papr_hcall.ret. Since that's not the case here, we can get an
unexpected value in VCPU r3, which can result in
kvmhv_p9_guest_entry() reporting an unexpected trap value when it
returns from H_ENTER_NESTED, causing the following register dump to
console via subsequent call to kvmppc_handle_exit_hv() in L1:

  [  350.612854] vcpu 00000000f9564cf8 (0):
  [  350.612915] pc  = c00000000013eb98  msr = 8000000000009033  trap = 1
  [  350.613020] r 0 = c0000000004b9044  r16 = 0000000000000000
  [  350.613075] r 1 = c00000007cffba30  r17 = 0000000000000000
  [  350.613120] r 2 = c00000000178c100  r18 = 00007fffc24f3b50
  [  350.613166] r 3 = c00000007ef52480  r19 = 00007fffc24fff58
  [  350.613212] r 4 = 0000000000000000  r20 = 00000a1e96ece9d0
  [  350.613253] r 5 = 70616d00746f6f72  r21 = 00000a1ea117c9b0
  [  350.613295] r 6 = 0000000000000020  r22 = 00000a1ea1184360
  [  350.613338] r 7 = c0000000783be440  r23 = 0000000000000003
  [  350.613380] r 8 = fffffffffffffffc  r24 = 00000a1e96e9e124
  [  350.613423] r 9 = c00000007ef52490  r25 = 00000000000007ff
  [  350.613469] r10 = 0000000000000004  r26 = c00000007eb2f7a0
  [  350.613513] r11 = b0616d0009eccdb2  r27 = c00000007cffbb10
  [  350.613556] r12 = c0000000004b9000  r28 = c00000007d83a2c0
  [  350.613597] r13 = c000000001b00000  r29 = c0000000783cdf68
  [  350.613639] r14 = 0000000000000000  r30 = 0000000000000000
  [  350.613681] r15 = 0000000000000000  r31 = c00000007cffbbf0
  [  350.613723] ctr = c0000000004b9000  lr  = c0000000004b9044
  [  350.613765] srr0 = 0000772f954dd48c srr1 = 800000000280f033
  [  350.613808] sprg0 = 0000000000000000 sprg1 = c000000001b00000
  [  350.613859] sprg2 = 0000772f9565a280 sprg3 = 0000000000000000
  [  350.613911] cr = 88002848  xer = 0000000020040000  dsisr = 42000000
  [  350.613962] dar = 0000772f95390000
  [  350.614031] fault dar = c000000244b278c0 dsisr = 00000000
  [  350.614073] SLB (0 entries):
  [  350.614157] lpcr = 0040000003d40413 sdr1 = 0000000000000000 last_inst = ffffffff
  [  350.614252] trap=0x1 | pc=0xc00000000013eb98 | msr=0x8000000000009033

followed by L1's QEMU reporting the following before stopping execution
of the nested guest:

  KVM: unknown exit, hardware reason 1
  NIP c00000000013eb98   LR c0000000004b9044 CTR c0000000004b9000 XER 0000000020040000 CPU#0
  MSR 8000000000009033 HID0 0000000000000000  HF 8000000000000000 iidx 3 didx 3
  TB 00000000 00000000 DECR 00000000
  GPR00 c0000000004b9044 c00000007cffba30 c00000000178c100 c00000007ef52480
  GPR04 0000000000000000 70616d00746f6f72 0000000000000020 c0000000783be440
  GPR08 fffffffffffffffc c00000007ef52490 0000000000000004 b0616d0009eccdb2
  GPR12 c0000000004b9000 c000000001b00000 0000000000000000 0000000000000000
  GPR16 0000000000000000 0000000000000000 00007fffc24f3b50 00007fffc24fff58
  GPR20 00000a1e96ece9d0 00000a1ea117c9b0 00000a1ea1184360 0000000000000003
  GPR24 00000a1e96e9e124 00000000000007ff c00000007eb2f7a0 c00000007cffbb10
  GPR28 c00000007d83a2c0 c0000000783cdf68 0000000000000000 c00000007cffbbf0
  CR 88002848  [ L  L  -  -  E  L  G  L  ]             RES ffffffffffffffff
   SRR0 0000772f954dd48c  SRR1 800000000280f033    PVR 00000000004e1202 VRSAVE 0000000000000000
  SPRG0 0000000000000000 SPRG1 c000000001b00000  SPRG2 0000772f9565a280  SPRG3 0000000000000000
  SPRG4 0000000000000000 SPRG5 0000000000000000  SPRG6 0000000000000000  SPRG7 0000000000000000
  HSRR0 0000000000000000 HSRR1 0000000000000000
   CFAR 0000000000000000
   LPCR 0000000003d40413
   PTCR 0000000000000000   DAR 0000772f95390000  DSISR 0000000042000000

Fix this by setting vcpu->arch.hcall_needed = 0 to indicate completion
of H_ENTER_NESTED before we exit to L0 userspace.

Fixes: 360cae313702 ("KVM: PPC: Book3S HV: Nested guest entry via hypercall")
Cc: linuxppc-dev@ozlabs.org
Cc: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
Reviewed-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_hv.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index d65b961661fbf..a56f8413758ab 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -983,6 +983,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 		ret = kvmhv_enter_nested_guest(vcpu);
 		if (ret == H_INTERRUPT) {
 			kvmppc_set_gpr(vcpu, 3, 0);
+			vcpu->arch.hcall_needed = 0;
 			return -EINTR;
 		}
 		break;
-- 
GitLab


From 66f93c5a02d5ba6ef17fef459143961382593212 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Thu, 15 Nov 2018 12:34:27 +1000
Subject: [PATCH 1093/1890] powerpc/64: Fix kernel stack 16-byte alignment
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 4c2de74cc869 ("powerpc/64: Interrupts save PPR on stack rather
than thread_struct") changed sizeof(struct pt_regs) % 16 from 0 to 8,
which causes the interrupt frame allocation on kernel entry to put the
kernel stack out of alignment.

Quadword (16-byte) alignment for the stack is required by both the
64-bit v1 ABI (v1.9 Â§ 3.2.2) and the 64-bit v2 ABI (v1.1 Â§ 2.2.2.1).

Add a pad field to fix alignment, and add a BUILD_BUG_ON to catch this
in future.

Fixes: 4c2de74cc869 ("powerpc/64: Interrupts save PPR on stack rather than thread_struct")
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/ptrace.h | 1 +
 arch/powerpc/kernel/setup_64.c    | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index f73886a1a7f51..0b8a735b6d85f 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -54,6 +54,7 @@ struct pt_regs
 
 #ifdef CONFIG_PPC64
 	unsigned long ppr;
+	unsigned long __pad;	/* Maintain 16 byte interrupt stack alignment */
 #endif
 };
 #endif
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 2a51e4cc8246d..236c1151a3a77 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -636,6 +636,8 @@ static void *__init alloc_stack(unsigned long limit, int cpu)
 {
 	unsigned long pa;
 
+	BUILD_BUG_ON(STACK_INT_FRAME_SIZE % 16);
+
 	pa = memblock_alloc_base_nid(THREAD_SIZE, THREAD_SIZE, limit,
 					early_cpu_to_node(cpu), MEMBLOCK_NONE);
 	if (!pa) {
-- 
GitLab


From f8504f4ca0a0e9f84546ef86e00b24d2ea9a0bd2 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 13 Nov 2018 01:08:25 +0800
Subject: [PATCH 1094/1890] l2tp: fix a sock refcnt leak in
 l2tp_tunnel_register

This issue happens when trying to add an existent tunnel. It
doesn't call sock_put() before returning -EEXIST to release
the sock refcnt that was held by calling sock_hold() before
the existence check.

This patch is to fix it by holding the sock after doing the
existence check.

Fixes: f6cd651b056f ("l2tp: fix race in duplicate tunnel detection")
Reported-by: Jianlin Shi <jishi@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Reviewed-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 82cdf9020b539..26f1d435696a6 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1490,12 +1490,7 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
 			goto err_sock;
 	}
 
-	sk = sock->sk;
-
-	sock_hold(sk);
-	tunnel->sock = sk;
 	tunnel->l2tp_net = net;
-
 	pn = l2tp_pernet(net);
 
 	spin_lock_bh(&pn->l2tp_tunnel_list_lock);
@@ -1510,6 +1505,10 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
 	list_add_rcu(&tunnel->list, &pn->l2tp_tunnel_list);
 	spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
 
+	sk = sock->sk;
+	sock_hold(sk);
+	tunnel->sock = sk;
+
 	if (tunnel->encap == L2TP_ENCAPTYPE_UDP) {
 		struct udp_tunnel_sock_cfg udp_cfg = {
 			.sk_user_data = tunnel,
-- 
GitLab


From ef1491e791308317bb9851a0ad380c4a68b58d54 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Wed, 14 Nov 2018 09:55:40 -0800
Subject: [PATCH 1095/1890] efi: Fix debugobjects warning on 'efi_rts_work'

The following commit:

  9dbbedaa6171 ("efi: Make efi_rts_work accessible to efi page fault handler")

converted 'efi_rts_work' from an auto variable to a global variable.
However, when submitting the work, INIT_WORK_ONSTACK() was still used,
causing the following complaint from debugobjects:

  ODEBUG: object 00000000ed27b500 is NOT on stack 00000000c7d38760, but annotated.

Change the macro to just INIT_WORK() to eliminate the warning.

Signed-off-by: Waiman Long <longman@redhat.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Fixes: 9dbbedaa6171 ("efi: Make efi_rts_work accessible to efi page fault handler")
Link: http://lkml.kernel.org/r/20181114175544.12860-2-ard.biesheuvel@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/firmware/efi/runtime-wrappers.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c
index a19d845bdb067..8903b9ccfc2b8 100644
--- a/drivers/firmware/efi/runtime-wrappers.c
+++ b/drivers/firmware/efi/runtime-wrappers.c
@@ -67,7 +67,7 @@ struct efi_runtime_work efi_rts_work;
 	}								\
 									\
 	init_completion(&efi_rts_work.efi_rts_comp);			\
-	INIT_WORK_ONSTACK(&efi_rts_work.work, efi_call_rts);		\
+	INIT_WORK(&efi_rts_work.work, efi_call_rts);			\
 	efi_rts_work.arg1 = _arg1;					\
 	efi_rts_work.arg2 = _arg2;					\
 	efi_rts_work.arg3 = _arg3;					\
-- 
GitLab


From 33412b8673135b18ea42beb7f5117ed0091798b6 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Wed, 14 Nov 2018 09:55:41 -0800
Subject: [PATCH 1096/1890] efi/arm: Revert deferred unmap of early memmap
 mapping

Commit:

  3ea86495aef2 ("efi/arm: preserve early mapping of UEFI memory map longer for BGRT")

deferred the unmap of the early mapping of the UEFI memory map to
accommodate the ACPI BGRT code, which looks up the memory type that
backs the BGRT table to validate it against the requirements of the UEFI spec.

Unfortunately, this causes problems on ARM, which does not permit
early mappings to persist after paging_init() is called, resulting
in a WARN() splat. Since we don't support the BGRT table on ARM anway,
let's revert ARM to the old behaviour, which is to take down the
early mapping at the end of efi_init().

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Fixes: 3ea86495aef2 ("efi/arm: preserve early mapping of UEFI memory ...")
Link: http://lkml.kernel.org/r/20181114175544.12860-3-ard.biesheuvel@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/firmware/efi/arm-init.c    | 4 ++++
 drivers/firmware/efi/arm-runtime.c | 2 +-
 drivers/firmware/efi/memmap.c      | 3 +++
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/firmware/efi/arm-init.c b/drivers/firmware/efi/arm-init.c
index 388a929baf95d..1a6a77df8a5e8 100644
--- a/drivers/firmware/efi/arm-init.c
+++ b/drivers/firmware/efi/arm-init.c
@@ -265,6 +265,10 @@ void __init efi_init(void)
 				    (params.mmap & ~PAGE_MASK)));
 
 	init_screen_info();
+
+	/* ARM does not permit early mappings to persist across paging_init() */
+	if (IS_ENABLED(CONFIG_ARM))
+		efi_memmap_unmap();
 }
 
 static int __init register_gop_device(void)
diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c
index 922cfb813109a..a00934d263c51 100644
--- a/drivers/firmware/efi/arm-runtime.c
+++ b/drivers/firmware/efi/arm-runtime.c
@@ -110,7 +110,7 @@ static int __init arm_enable_runtime_services(void)
 {
 	u64 mapsize;
 
-	if (!efi_enabled(EFI_BOOT) || !efi_enabled(EFI_MEMMAP)) {
+	if (!efi_enabled(EFI_BOOT)) {
 		pr_info("EFI services will not be available.\n");
 		return 0;
 	}
diff --git a/drivers/firmware/efi/memmap.c b/drivers/firmware/efi/memmap.c
index fa2904fb841fe..38b686c67b177 100644
--- a/drivers/firmware/efi/memmap.c
+++ b/drivers/firmware/efi/memmap.c
@@ -118,6 +118,9 @@ int __init efi_memmap_init_early(struct efi_memory_map_data *data)
 
 void __init efi_memmap_unmap(void)
 {
+	if (!efi_enabled(EFI_MEMMAP))
+		return;
+
 	if (!efi.memmap.late) {
 		unsigned long size;
 
-- 
GitLab


From 72a58a63a164b4e9d2d914e65caeb551846883f1 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Wed, 14 Nov 2018 09:55:42 -0800
Subject: [PATCH 1097/1890] efi/arm/libstub: Pack FDT after populating it

Commit:

  24d7c494ce46 ("efi/arm-stub: Round up FDT allocation to mapping size")

increased the allocation size for the FDT image created by the stub to a
fixed value of 2 MB, to simplify the former code that made several
attempts with increasing values for the size. This is reasonable
given that the allocation is of type EFI_LOADER_DATA, which is released
to the kernel unless it is explicitly memblock_reserve()d by the early
boot code.

However, this allocation size leaked into the 'size' field of the FDT
header metadata, and so the entire allocation remains occupied by the
device tree binary, even if most of it is not used to store device tree
information.

So call fdt_pack() to shrink the FDT data structure to its minimum size
after populating all the fields, so that the remaining memory is no
longer wasted.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: <stable@vger.kernel.org> # v4.12+
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Fixes: 24d7c494ce46 ("efi/arm-stub: Round up FDT allocation to mapping size")
Link: http://lkml.kernel.org/r/20181114175544.12860-4-ard.biesheuvel@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/firmware/efi/libstub/fdt.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c
index 8830fa601e45d..0c0d2312f4a8a 100644
--- a/drivers/firmware/efi/libstub/fdt.c
+++ b/drivers/firmware/efi/libstub/fdt.c
@@ -158,6 +158,10 @@ static efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt,
 			return efi_status;
 		}
 	}
+
+	/* shrink the FDT back to its minimum size */
+	fdt_pack(fdt);
+
 	return EFI_SUCCESS;
 
 fdt_set_fail:
-- 
GitLab


From eff896288872d687d9662000ec9ae11b6d61766f Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Wed, 14 Nov 2018 09:55:43 -0800
Subject: [PATCH 1098/1890] efi/arm: Defer persistent reservations until after
 paging_init()

The new memory EFI reservation feature we introduced to allow memory
reservations to persist across kexec may trigger an unbounded number
of calls to memblock_reserve(). The memblock subsystem can deal with
this fine, but not before memblock resizing is enabled, which we can
only do after paging_init(), when the memory we reallocate the array
into is actually mapped.

So break out the memreserve table processing into a separate routine
and call it after paging_init() on arm64. On ARM, because of limited
reviewing bandwidth of the maintainer, we cannot currently fix this,
so instead, disable the EFI persistent memreserve entirely on ARM so
we can fix it later.

Tested-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/20181114175544.12860-5-ard.biesheuvel@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm64/kernel/setup.c               | 1 +
 drivers/firmware/efi/efi.c              | 4 ++++
 drivers/firmware/efi/libstub/arm-stub.c | 3 +++
 include/linux/efi.h                     | 7 +++++++
 4 files changed, 15 insertions(+)

diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 953e316521fca..f4fc1e0544b73 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -313,6 +313,7 @@ void __init setup_arch(char **cmdline_p)
 	arm64_memblock_init();
 
 	paging_init();
+	efi_apply_persistent_mem_reservations();
 
 	acpi_table_upgrade();
 
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 249eb70691b0f..72a4da76d2740 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -592,7 +592,11 @@ int __init efi_config_parse_tables(void *config_tables, int count, int sz,
 
 		early_memunmap(tbl, sizeof(*tbl));
 	}
+	return 0;
+}
 
+int __init efi_apply_persistent_mem_reservations(void)
+{
 	if (efi.mem_reserve != EFI_INVALID_TABLE_ADDR) {
 		unsigned long prsv = efi.mem_reserve;
 
diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c
index 30ac0c975f8a1..3d36142cf8120 100644
--- a/drivers/firmware/efi/libstub/arm-stub.c
+++ b/drivers/firmware/efi/libstub/arm-stub.c
@@ -75,6 +75,9 @@ void install_memreserve_table(efi_system_table_t *sys_table_arg)
 	efi_guid_t memreserve_table_guid = LINUX_EFI_MEMRESERVE_TABLE_GUID;
 	efi_status_t status;
 
+	if (IS_ENABLED(CONFIG_ARM))
+		return;
+
 	status = efi_call_early(allocate_pool, EFI_LOADER_DATA, sizeof(*rsv),
 				(void **)&rsv);
 	if (status != EFI_SUCCESS) {
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 845174e113ce9..100ce4a4aff6c 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1167,6 +1167,8 @@ static inline bool efi_enabled(int feature)
 extern void efi_reboot(enum reboot_mode reboot_mode, const char *__unused);
 
 extern bool efi_is_table_address(unsigned long phys_addr);
+
+extern int efi_apply_persistent_mem_reservations(void);
 #else
 static inline bool efi_enabled(int feature)
 {
@@ -1185,6 +1187,11 @@ static inline bool efi_is_table_address(unsigned long phys_addr)
 {
 	return false;
 }
+
+static inline int efi_apply_persistent_mem_reservations(void)
+{
+	return 0;
+}
 #endif
 
 extern int efi_status_to_err(efi_status_t status);
-- 
GitLab


From 63eb322d89c8505af9b4a3d703e85e42281ebaa0 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Wed, 14 Nov 2018 09:55:44 -0800
Subject: [PATCH 1099/1890] efi: Permit calling efi_mem_reserve_persistent()
 from atomic context

Currently, efi_mem_reserve_persistent() may not be called from atomic
context, since both the kmalloc() call and the memremap() call may
sleep.

The kmalloc() call is easy enough to fix, but the memremap() call
needs to be moved into an init hook since we cannot control the
memory allocation behavior of memremap() at the call site.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/20181114175544.12860-6-ard.biesheuvel@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/firmware/efi/efi.c | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 72a4da76d2740..fad7c62cfc0e4 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -967,36 +967,43 @@ bool efi_is_table_address(unsigned long phys_addr)
 }
 
 static DEFINE_SPINLOCK(efi_mem_reserve_persistent_lock);
+static struct linux_efi_memreserve *efi_memreserve_root __ro_after_init;
 
 int efi_mem_reserve_persistent(phys_addr_t addr, u64 size)
 {
-	struct linux_efi_memreserve *rsv, *parent;
+	struct linux_efi_memreserve *rsv;
 
-	if (efi.mem_reserve == EFI_INVALID_TABLE_ADDR)
+	if (!efi_memreserve_root)
 		return -ENODEV;
 
-	rsv = kmalloc(sizeof(*rsv), GFP_KERNEL);
+	rsv = kmalloc(sizeof(*rsv), GFP_ATOMIC);
 	if (!rsv)
 		return -ENOMEM;
 
-	parent = memremap(efi.mem_reserve, sizeof(*rsv), MEMREMAP_WB);
-	if (!parent) {
-		kfree(rsv);
-		return -ENOMEM;
-	}
-
 	rsv->base = addr;
 	rsv->size = size;
 
 	spin_lock(&efi_mem_reserve_persistent_lock);
-	rsv->next = parent->next;
-	parent->next = __pa(rsv);
+	rsv->next = efi_memreserve_root->next;
+	efi_memreserve_root->next = __pa(rsv);
 	spin_unlock(&efi_mem_reserve_persistent_lock);
 
-	memunmap(parent);
+	return 0;
+}
 
+static int __init efi_memreserve_root_init(void)
+{
+	if (efi.mem_reserve == EFI_INVALID_TABLE_ADDR)
+		return -ENODEV;
+
+	efi_memreserve_root = memremap(efi.mem_reserve,
+				       sizeof(*efi_memreserve_root),
+				       MEMREMAP_WB);
+	if (!efi_memreserve_root)
+		return -ENOMEM;
 	return 0;
 }
+early_initcall(efi_memreserve_root_init);
 
 #ifdef CONFIG_KEXEC
 static int update_efi_random_seed(struct notifier_block *nb,
-- 
GitLab


From 4cff280a5fccf6513ed9e895bb3a4e7ad8b0cedc Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Wed, 14 Nov 2018 16:35:10 -0800
Subject: [PATCH 1100/1890] nvme-fc: resolve io failures during connect

If an io error occurs on an io issued while connecting, recovery
of the io falls flat as the state checking ends up nooping the error
handler.

Create an err_work work item that is scheduled upon an io error while
connecting. The work thread terminates all io on all queues and marks
the queues as not connected.  The termination of the io will return
back to the callee, which will then back out of the connection attempt
and will reschedule, if possible, the connection attempt.

The changes:
- in case there are several commands hitting the error handler, a
  state flag is kept so that the error work is only scheduled once,
  on the first error. The subsequent errors can be ignored.
- The calling sequence to stop keep alive and terminate the queues
  and their io is lifted from the reset routine. Made a small
  service routine used by both reset and err_work.
- During debugging, found that the teardown path can reference
  an uninitialized pointer, resulting in a NULL pointer oops.
  The aen_ops weren't initialized yet. Add validation on their
  initialization before calling the teardown routine.

Signed-off-by: James Smart <jsmart2021@gmail.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fc.c | 73 ++++++++++++++++++++++++++++++++++++------
 1 file changed, 63 insertions(+), 10 deletions(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 0b70c8bab045a..54032c4666361 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -152,6 +152,7 @@ struct nvme_fc_ctrl {
 
 	bool			ioq_live;
 	bool			assoc_active;
+	atomic_t		err_work_active;
 	u64			association_id;
 
 	struct list_head	ctrl_list;	/* rport->ctrl_list */
@@ -160,6 +161,7 @@ struct nvme_fc_ctrl {
 	struct blk_mq_tag_set	tag_set;
 
 	struct delayed_work	connect_work;
+	struct work_struct	err_work;
 
 	struct kref		ref;
 	u32			flags;
@@ -1531,6 +1533,10 @@ nvme_fc_abort_aen_ops(struct nvme_fc_ctrl *ctrl)
 	struct nvme_fc_fcp_op *aen_op = ctrl->aen_ops;
 	int i;
 
+	/* ensure we've initialized the ops once */
+	if (!(aen_op->flags & FCOP_FLAGS_AEN))
+		return;
+
 	for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++)
 		__nvme_fc_abort_op(ctrl, aen_op);
 }
@@ -2049,7 +2055,25 @@ nvme_fc_nvme_ctrl_freed(struct nvme_ctrl *nctrl)
 static void
 nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
 {
-	/* only proceed if in LIVE state - e.g. on first error */
+	int active;
+
+	/*
+	 * if an error (io timeout, etc) while (re)connecting,
+	 * it's an error on creating the new association.
+	 * Start the error recovery thread if it hasn't already
+	 * been started. It is expected there could be multiple
+	 * ios hitting this path before things are cleaned up.
+	 */
+	if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) {
+		active = atomic_xchg(&ctrl->err_work_active, 1);
+		if (!active && !schedule_work(&ctrl->err_work)) {
+			atomic_set(&ctrl->err_work_active, 0);
+			WARN_ON(1);
+		}
+		return;
+	}
+
+	/* Otherwise, only proceed if in LIVE state - e.g. on first error */
 	if (ctrl->ctrl.state != NVME_CTRL_LIVE)
 		return;
 
@@ -2814,6 +2838,7 @@ nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl)
 {
 	struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
 
+	cancel_work_sync(&ctrl->err_work);
 	cancel_delayed_work_sync(&ctrl->connect_work);
 	/*
 	 * kill the association on the link side.  this will block
@@ -2866,23 +2891,30 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
 }
 
 static void
-nvme_fc_reset_ctrl_work(struct work_struct *work)
+__nvme_fc_terminate_io(struct nvme_fc_ctrl *ctrl)
 {
-	struct nvme_fc_ctrl *ctrl =
-		container_of(work, struct nvme_fc_ctrl, ctrl.reset_work);
-	int ret;
-
-	nvme_stop_ctrl(&ctrl->ctrl);
+	nvme_stop_keep_alive(&ctrl->ctrl);
 
 	/* will block will waiting for io to terminate */
 	nvme_fc_delete_association(ctrl);
 
-	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
+	if (ctrl->ctrl.state != NVME_CTRL_CONNECTING &&
+	    !nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING))
 		dev_err(ctrl->ctrl.device,
 			"NVME-FC{%d}: error_recovery: Couldn't change state "
 			"to CONNECTING\n", ctrl->cnum);
-		return;
-	}
+}
+
+static void
+nvme_fc_reset_ctrl_work(struct work_struct *work)
+{
+	struct nvme_fc_ctrl *ctrl =
+		container_of(work, struct nvme_fc_ctrl, ctrl.reset_work);
+	int ret;
+
+	__nvme_fc_terminate_io(ctrl);
+
+	nvme_stop_ctrl(&ctrl->ctrl);
 
 	if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE)
 		ret = nvme_fc_create_association(ctrl);
@@ -2897,6 +2929,24 @@ nvme_fc_reset_ctrl_work(struct work_struct *work)
 			ctrl->cnum);
 }
 
+static void
+nvme_fc_connect_err_work(struct work_struct *work)
+{
+	struct nvme_fc_ctrl *ctrl =
+			container_of(work, struct nvme_fc_ctrl, err_work);
+
+	__nvme_fc_terminate_io(ctrl);
+
+	atomic_set(&ctrl->err_work_active, 0);
+
+	/*
+	 * Rescheduling the connection after recovering
+	 * from the io error is left to the reconnect work
+	 * item, which is what should have stalled waiting on
+	 * the io that had the error that scheduled this work.
+	 */
+}
+
 static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
 	.name			= "fc",
 	.module			= THIS_MODULE,
@@ -3007,6 +3057,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 	ctrl->cnum = idx;
 	ctrl->ioq_live = false;
 	ctrl->assoc_active = false;
+	atomic_set(&ctrl->err_work_active, 0);
 	init_waitqueue_head(&ctrl->ioabort_wait);
 
 	get_device(ctrl->dev);
@@ -3014,6 +3065,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 
 	INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work);
 	INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work);
+	INIT_WORK(&ctrl->err_work, nvme_fc_connect_err_work);
 	spin_lock_init(&ctrl->lock);
 
 	/* io queue count */
@@ -3103,6 +3155,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
 fail_ctrl:
 	nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING);
 	cancel_work_sync(&ctrl->ctrl.reset_work);
+	cancel_work_sync(&ctrl->err_work);
 	cancel_delayed_work_sync(&ctrl->connect_work);
 
 	ctrl->ctrl.opts = NULL;
-- 
GitLab


From 2a2777990a342b05f611e78822fc4fa2d9789ade Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Date: Wed, 14 Nov 2018 13:49:23 +0200
Subject: [PATCH 1101/1890] drm/i915: Move programming plane scaler to its own
 function.

This cleans the code up slightly, and will make other changes easier.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180920102711.4184-8-maarten.lankhorst@linux.intel.com
(cherry picked from commit ab5c60bf76755d24ae8de5c1c6ac594934656ace)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_sprite.c | 90 +++++++++++++++++------------
 1 file changed, 52 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index 5fd2f7bf39271..873adcc20109e 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -302,13 +302,63 @@ skl_plane_max_stride(struct intel_plane *plane,
 		return min(8192 * cpp, 32768);
 }
 
+static void
+skl_program_scaler(struct drm_i915_private *dev_priv,
+		   struct intel_plane *plane,
+		   const struct intel_crtc_state *crtc_state,
+		   const struct intel_plane_state *plane_state)
+{
+	enum plane_id plane_id = plane->id;
+	enum pipe pipe = plane->pipe;
+	int scaler_id = plane_state->scaler_id;
+	const struct intel_scaler *scaler =
+		&crtc_state->scaler_state.scalers[scaler_id];
+	int crtc_x = plane_state->base.dst.x1;
+	int crtc_y = plane_state->base.dst.y1;
+	uint32_t crtc_w = drm_rect_width(&plane_state->base.dst);
+	uint32_t crtc_h = drm_rect_height(&plane_state->base.dst);
+	u16 y_hphase, uv_rgb_hphase;
+	u16 y_vphase, uv_rgb_vphase;
+
+	/* Sizes are 0 based */
+	crtc_w--;
+	crtc_h--;
+
+	/* TODO: handle sub-pixel coordinates */
+	if (plane_state->base.fb->format->format == DRM_FORMAT_NV12) {
+		y_hphase = skl_scaler_calc_phase(1, false);
+		y_vphase = skl_scaler_calc_phase(1, false);
+
+		/* MPEG2 chroma siting convention */
+		uv_rgb_hphase = skl_scaler_calc_phase(2, true);
+		uv_rgb_vphase = skl_scaler_calc_phase(2, false);
+	} else {
+		/* not used */
+		y_hphase = 0;
+		y_vphase = 0;
+
+		uv_rgb_hphase = skl_scaler_calc_phase(1, false);
+		uv_rgb_vphase = skl_scaler_calc_phase(1, false);
+	}
+
+	I915_WRITE_FW(SKL_PS_CTRL(pipe, scaler_id),
+		      PS_SCALER_EN | PS_PLANE_SEL(plane_id) | scaler->mode);
+	I915_WRITE_FW(SKL_PS_PWR_GATE(pipe, scaler_id), 0);
+	I915_WRITE_FW(SKL_PS_VPHASE(pipe, scaler_id),
+		      PS_Y_PHASE(y_vphase) | PS_UV_RGB_PHASE(uv_rgb_vphase));
+	I915_WRITE_FW(SKL_PS_HPHASE(pipe, scaler_id),
+		      PS_Y_PHASE(y_hphase) | PS_UV_RGB_PHASE(uv_rgb_hphase));
+	I915_WRITE_FW(SKL_PS_WIN_POS(pipe, scaler_id), (crtc_x << 16) | crtc_y);
+	I915_WRITE_FW(SKL_PS_WIN_SZ(pipe, scaler_id),
+		      ((crtc_w + 1) << 16)|(crtc_h + 1));
+}
+
 void
 skl_update_plane(struct intel_plane *plane,
 		 const struct intel_crtc_state *crtc_state,
 		 const struct intel_plane_state *plane_state)
 {
 	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
-	const struct drm_framebuffer *fb = plane_state->base.fb;
 	enum plane_id plane_id = plane->id;
 	enum pipe pipe = plane->pipe;
 	u32 plane_ctl = plane_state->ctl;
@@ -318,8 +368,6 @@ skl_update_plane(struct intel_plane *plane,
 	u32 aux_stride = skl_plane_stride(plane_state, 1);
 	int crtc_x = plane_state->base.dst.x1;
 	int crtc_y = plane_state->base.dst.y1;
-	uint32_t crtc_w = drm_rect_width(&plane_state->base.dst);
-	uint32_t crtc_h = drm_rect_height(&plane_state->base.dst);
 	uint32_t x = plane_state->color_plane[0].x;
 	uint32_t y = plane_state->color_plane[0].y;
 	uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16;
@@ -329,8 +377,6 @@ skl_update_plane(struct intel_plane *plane,
 	/* Sizes are 0 based */
 	src_w--;
 	src_h--;
-	crtc_w--;
-	crtc_h--;
 
 	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
 
@@ -355,39 +401,7 @@ skl_update_plane(struct intel_plane *plane,
 
 	/* program plane scaler */
 	if (plane_state->scaler_id >= 0) {
-		int scaler_id = plane_state->scaler_id;
-		const struct intel_scaler *scaler =
-			&crtc_state->scaler_state.scalers[scaler_id];
-		u16 y_hphase, uv_rgb_hphase;
-		u16 y_vphase, uv_rgb_vphase;
-
-		/* TODO: handle sub-pixel coordinates */
-		if (fb->format->format == DRM_FORMAT_NV12) {
-			y_hphase = skl_scaler_calc_phase(1, false);
-			y_vphase = skl_scaler_calc_phase(1, false);
-
-			/* MPEG2 chroma siting convention */
-			uv_rgb_hphase = skl_scaler_calc_phase(2, true);
-			uv_rgb_vphase = skl_scaler_calc_phase(2, false);
-		} else {
-			/* not used */
-			y_hphase = 0;
-			y_vphase = 0;
-
-			uv_rgb_hphase = skl_scaler_calc_phase(1, false);
-			uv_rgb_vphase = skl_scaler_calc_phase(1, false);
-		}
-
-		I915_WRITE_FW(SKL_PS_CTRL(pipe, scaler_id),
-			      PS_SCALER_EN | PS_PLANE_SEL(plane_id) | scaler->mode);
-		I915_WRITE_FW(SKL_PS_PWR_GATE(pipe, scaler_id), 0);
-		I915_WRITE_FW(SKL_PS_VPHASE(pipe, scaler_id),
-			      PS_Y_PHASE(y_vphase) | PS_UV_RGB_PHASE(uv_rgb_vphase));
-		I915_WRITE_FW(SKL_PS_HPHASE(pipe, scaler_id),
-			      PS_Y_PHASE(y_hphase) | PS_UV_RGB_PHASE(uv_rgb_hphase));
-		I915_WRITE_FW(SKL_PS_WIN_POS(pipe, scaler_id), (crtc_x << 16) | crtc_y);
-		I915_WRITE_FW(SKL_PS_WIN_SZ(pipe, scaler_id),
-			      ((crtc_w + 1) << 16)|(crtc_h + 1));
+		skl_program_scaler(dev_priv, plane, crtc_state, plane_state);
 
 		I915_WRITE_FW(PLANE_POS(pipe, plane_id), 0);
 	} else {
-- 
GitLab


From 27971d613fcb5b6ad96320bc4f2c90f4d4fde768 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Wed, 14 Nov 2018 13:49:24 +0200
Subject: [PATCH 1102/1890] drm/i915: Clean up skl_program_scaler()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove the "sizes are 0 based" stuff that is not even true for the
scaler.

v2: Rebase

Signed-off-by: Ville SyrjÃ¤lÃ¤ <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181101151736.20522-1-ville.syrjala@linux.intel.com
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
(cherry picked from commit d0105af939769393d6447a04cee2d1ae12e3f09a)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_sprite.c | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index 873adcc20109e..fa7eaace5f922 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -303,12 +303,11 @@ skl_plane_max_stride(struct intel_plane *plane,
 }
 
 static void
-skl_program_scaler(struct drm_i915_private *dev_priv,
-		   struct intel_plane *plane,
+skl_program_scaler(struct intel_plane *plane,
 		   const struct intel_crtc_state *crtc_state,
 		   const struct intel_plane_state *plane_state)
 {
-	enum plane_id plane_id = plane->id;
+	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
 	enum pipe pipe = plane->pipe;
 	int scaler_id = plane_state->scaler_id;
 	const struct intel_scaler *scaler =
@@ -320,10 +319,6 @@ skl_program_scaler(struct drm_i915_private *dev_priv,
 	u16 y_hphase, uv_rgb_hphase;
 	u16 y_vphase, uv_rgb_vphase;
 
-	/* Sizes are 0 based */
-	crtc_w--;
-	crtc_h--;
-
 	/* TODO: handle sub-pixel coordinates */
 	if (plane_state->base.fb->format->format == DRM_FORMAT_NV12) {
 		y_hphase = skl_scaler_calc_phase(1, false);
@@ -342,15 +337,14 @@ skl_program_scaler(struct drm_i915_private *dev_priv,
 	}
 
 	I915_WRITE_FW(SKL_PS_CTRL(pipe, scaler_id),
-		      PS_SCALER_EN | PS_PLANE_SEL(plane_id) | scaler->mode);
+		      PS_SCALER_EN | PS_PLANE_SEL(plane->id) | scaler->mode);
 	I915_WRITE_FW(SKL_PS_PWR_GATE(pipe, scaler_id), 0);
 	I915_WRITE_FW(SKL_PS_VPHASE(pipe, scaler_id),
 		      PS_Y_PHASE(y_vphase) | PS_UV_RGB_PHASE(uv_rgb_vphase));
 	I915_WRITE_FW(SKL_PS_HPHASE(pipe, scaler_id),
 		      PS_Y_PHASE(y_hphase) | PS_UV_RGB_PHASE(uv_rgb_hphase));
 	I915_WRITE_FW(SKL_PS_WIN_POS(pipe, scaler_id), (crtc_x << 16) | crtc_y);
-	I915_WRITE_FW(SKL_PS_WIN_SZ(pipe, scaler_id),
-		      ((crtc_w + 1) << 16)|(crtc_h + 1));
+	I915_WRITE_FW(SKL_PS_WIN_SZ(pipe, scaler_id), (crtc_w << 16) | crtc_h);
 }
 
 void
@@ -399,9 +393,8 @@ skl_update_plane(struct intel_plane *plane,
 		      (plane_state->color_plane[1].y << 16) |
 		      plane_state->color_plane[1].x);
 
-	/* program plane scaler */
 	if (plane_state->scaler_id >= 0) {
-		skl_program_scaler(dev_priv, plane, crtc_state, plane_state);
+		skl_program_scaler(plane, crtc_state, plane_state);
 
 		I915_WRITE_FW(PLANE_POS(pipe, plane_id), 0);
 	} else {
-- 
GitLab


From 6e8adf6f4a4fa57dd3bef6b70de96e2b7b311204 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Wed, 14 Nov 2018 15:32:55 +0200
Subject: [PATCH 1103/1890] drm/i915: Account for scale factor when calculating
 initial phase
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To get the initial phase correct we need to account for the scale
factor as well. I forgot this initially and was mostly looking at
heavily upscaled content where the minor difference between -0.5
and the proper initial phase was not readily apparent.

And let's toss in a comment that tries to explain the formula
a little bit.

v2: The initial phase upper limit is 1.5, not 24.0!

Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Fixes: 0a59952b24e2 ("drm/i915: Configure SKL+ scaler initial phase correctly")
Signed-off-by: Ville SyrjÃ¤lÃ¤ <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181029181820.21956-1-ville.syrjala@linux.intel.com
Tested-by: Juha-Pekka Heikkila <juhapekka.heikkila@gmail.com>
Tested-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> #irc
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> #irc
(cherry picked from commit e7a278a329dd8aa2c70c564849f164cb5673689c)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_display.c | 45 ++++++++++++++++++++++++++--
 drivers/gpu/drm/i915/intel_drv.h     |  2 +-
 drivers/gpu/drm/i915/intel_sprite.c  | 20 +++++++++----
 3 files changed, 57 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 23d8008a93bb6..a54843fdeb2f0 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -4850,8 +4850,31 @@ static void cpt_verify_modeset(struct drm_device *dev, int pipe)
  * chroma samples for both of the luma samples, and thus we don't
  * actually get the expected MPEG2 chroma siting convention :(
  * The same behaviour is observed on pre-SKL platforms as well.
+ *
+ * Theory behind the formula (note that we ignore sub-pixel
+ * source coordinates):
+ * s = source sample position
+ * d = destination sample position
+ *
+ * Downscaling 4:1:
+ * -0.5
+ * | 0.0
+ * | |     1.5 (initial phase)
+ * | |     |
+ * v v     v
+ * | s | s | s | s |
+ * |       d       |
+ *
+ * Upscaling 1:4:
+ * -0.5
+ * | -0.375 (initial phase)
+ * | |     0.0
+ * | |     |
+ * v v     v
+ * |       s       |
+ * | d | d | d | d |
  */
-u16 skl_scaler_calc_phase(int sub, bool chroma_cosited)
+u16 skl_scaler_calc_phase(int sub, int scale, bool chroma_cosited)
 {
 	int phase = -0x8000;
 	u16 trip = 0;
@@ -4859,6 +4882,15 @@ u16 skl_scaler_calc_phase(int sub, bool chroma_cosited)
 	if (chroma_cosited)
 		phase += (sub - 1) * 0x8000 / sub;
 
+	phase += scale / (2 * sub);
+
+	/*
+	 * Hardware initial phase limited to [-0.5:1.5].
+	 * Since the max hardware scale factor is 3.0, we
+	 * should never actually excdeed 1.0 here.
+	 */
+	WARN_ON(phase < -0x8000 || phase > 0x18000);
+
 	if (phase < 0)
 		phase = 0x10000 + phase;
 	else
@@ -5067,13 +5099,20 @@ static void skylake_pfit_enable(struct intel_crtc *crtc)
 
 	if (crtc->config->pch_pfit.enabled) {
 		u16 uv_rgb_hphase, uv_rgb_vphase;
+		int pfit_w, pfit_h, hscale, vscale;
 		int id;
 
 		if (WARN_ON(crtc->config->scaler_state.scaler_id < 0))
 			return;
 
-		uv_rgb_hphase = skl_scaler_calc_phase(1, false);
-		uv_rgb_vphase = skl_scaler_calc_phase(1, false);
+		pfit_w = (crtc->config->pch_pfit.size >> 16) & 0xFFFF;
+		pfit_h = crtc->config->pch_pfit.size & 0xFFFF;
+
+		hscale = (crtc->config->pipe_src_w << 16) / pfit_w;
+		vscale = (crtc->config->pipe_src_h << 16) / pfit_h;
+
+		uv_rgb_hphase = skl_scaler_calc_phase(1, hscale, false);
+		uv_rgb_vphase = skl_scaler_calc_phase(1, vscale, false);
 
 		id = scaler_state->scaler_id;
 		I915_WRITE(SKL_PS_CTRL(pipe, id), PS_SCALER_EN |
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index f8dc84b2d2d34..8b298e5f012da 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1646,7 +1646,7 @@ void intel_mode_from_pipe_config(struct drm_display_mode *mode,
 void intel_crtc_arm_fifo_underrun(struct intel_crtc *crtc,
 				  struct intel_crtc_state *crtc_state);
 
-u16 skl_scaler_calc_phase(int sub, bool chroma_center);
+u16 skl_scaler_calc_phase(int sub, int scale, bool chroma_center);
 int skl_update_scaler_crtc(struct intel_crtc_state *crtc_state);
 int skl_max_scale(const struct intel_crtc_state *crtc_state,
 		  u32 pixel_format);
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index fa7eaace5f922..d3090a7537bb9 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -318,22 +318,30 @@ skl_program_scaler(struct intel_plane *plane,
 	uint32_t crtc_h = drm_rect_height(&plane_state->base.dst);
 	u16 y_hphase, uv_rgb_hphase;
 	u16 y_vphase, uv_rgb_vphase;
+	int hscale, vscale;
+
+	hscale = drm_rect_calc_hscale(&plane_state->base.src,
+				      &plane_state->base.dst,
+				      0, INT_MAX);
+	vscale = drm_rect_calc_vscale(&plane_state->base.src,
+				      &plane_state->base.dst,
+				      0, INT_MAX);
 
 	/* TODO: handle sub-pixel coordinates */
 	if (plane_state->base.fb->format->format == DRM_FORMAT_NV12) {
-		y_hphase = skl_scaler_calc_phase(1, false);
-		y_vphase = skl_scaler_calc_phase(1, false);
+		y_hphase = skl_scaler_calc_phase(1, hscale, false);
+		y_vphase = skl_scaler_calc_phase(1, vscale, false);
 
 		/* MPEG2 chroma siting convention */
-		uv_rgb_hphase = skl_scaler_calc_phase(2, true);
-		uv_rgb_vphase = skl_scaler_calc_phase(2, false);
+		uv_rgb_hphase = skl_scaler_calc_phase(2, hscale, true);
+		uv_rgb_vphase = skl_scaler_calc_phase(2, vscale, false);
 	} else {
 		/* not used */
 		y_hphase = 0;
 		y_vphase = 0;
 
-		uv_rgb_hphase = skl_scaler_calc_phase(1, false);
-		uv_rgb_vphase = skl_scaler_calc_phase(1, false);
+		uv_rgb_hphase = skl_scaler_calc_phase(1, hscale, false);
+		uv_rgb_vphase = skl_scaler_calc_phase(1, vscale, false);
 	}
 
 	I915_WRITE_FW(SKL_PS_CTRL(pipe, scaler_id),
-- 
GitLab


From b2fed34a628df6118b5d4e13f49a33e15f704fa9 Mon Sep 17 00:00:00 2001
From: Gustavo Romero <gromero@linux.vnet.ibm.com>
Date: Wed, 14 Nov 2018 21:33:30 -0500
Subject: [PATCH 1104/1890] selftests/powerpc: Adjust wild_bctr to build with
 old binutils

Currently the selftest wild_bctr can fail to build when an old gcc is
used, notably on gcc using a binutils version <= 2.27, because the
assembler does not support the integer suffix UL.

This patch adjusts the wild_bctr test so the REG_POISON value is still
treated as an unsigned long for the shifts on compilation but the UL
suffix is absent on the stringification, so the inline asm code
generated has no UL suffixes.

Signed-off-by: Gustavo Romero <gromero@linux.vnet.ibm.com>
[mpe: Wrap long line]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 tools/testing/selftests/powerpc/mm/wild_bctr.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/powerpc/mm/wild_bctr.c b/tools/testing/selftests/powerpc/mm/wild_bctr.c
index 90469a9e49d40..f2fa101c5a6ac 100644
--- a/tools/testing/selftests/powerpc/mm/wild_bctr.c
+++ b/tools/testing/selftests/powerpc/mm/wild_bctr.c
@@ -47,8 +47,9 @@ static int ok(void)
 	return 0;
 }
 
-#define REG_POISON	0x5a5aUL
-#define POISONED_REG(n)	((REG_POISON << 48) | ((n) << 32) | (REG_POISON << 16) | (n))
+#define REG_POISON	0x5a5a
+#define POISONED_REG(n)	((((unsigned long)REG_POISON) << 48) | ((n) << 32) | \
+			 (((unsigned long)REG_POISON) << 16) | (n))
 
 static inline void poison_regs(void)
 {
-- 
GitLab


From 5a43911fd256f1c9748f3f82aee1c3a3adad2719 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Thu, 15 Nov 2018 11:58:51 +0100
Subject: [PATCH 1105/1890] drm/vc4: Fix NULL pointer dereference in the async
 update path

vc4_plane_atomic_async_update() calls vc4_plane_atomic_check()
which in turn calls vc4_plane_setup_clipping_and_scaling(), and since
commit 58a6a36fe8e0 ("drm/vc4: Use
drm_atomic_helper_check_plane_state() to simplify the logic"), this
function accesses plane_state->state which will be NULL when called
from the async update path because we're passing the current plane
state, and plane_state->state has been assigned to NULL in
drm_atomic_helper_swap_state().

Pass the new state instead of the current one (the new state has
->state set to a non-NULL value).

Fixes: 58a6a36fe8e0 ("drm/vc4: Use drm_atomic_helper_check_plane_state() to simplify the logic")
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
Link: https://patchwork.freedesktop.org/patch/msgid/20181115105852.9844-1-boris.brezillon@bootlin.com
---
 drivers/gpu/drm/vc4/vc4_plane.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
index 9dc3fcbd290be..c6635f23918a8 100644
--- a/drivers/gpu/drm/vc4/vc4_plane.c
+++ b/drivers/gpu/drm/vc4/vc4_plane.c
@@ -807,7 +807,7 @@ void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb)
 static void vc4_plane_atomic_async_update(struct drm_plane *plane,
 					  struct drm_plane_state *state)
 {
-	struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
+	struct vc4_plane_state *vc4_state, *new_vc4_state;
 
 	if (plane->state->fb != state->fb) {
 		vc4_plane_async_set_fb(plane, state->fb);
@@ -828,7 +828,18 @@ static void vc4_plane_atomic_async_update(struct drm_plane *plane,
 	plane->state->src_y = state->src_y;
 
 	/* Update the display list based on the new crtc_x/y. */
-	vc4_plane_atomic_check(plane, plane->state);
+	vc4_plane_atomic_check(plane, state);
+
+	new_vc4_state = to_vc4_plane_state(state);
+	vc4_state = to_vc4_plane_state(plane->state);
+
+	/* Update the current vc4_state pos0, pos2 and ptr0 dlist entries. */
+	vc4_state->dlist[vc4_state->pos0_offset] =
+		new_vc4_state->dlist[vc4_state->pos0_offset];
+	vc4_state->dlist[vc4_state->pos2_offset] =
+		new_vc4_state->dlist[vc4_state->pos2_offset];
+	vc4_state->dlist[vc4_state->ptr0_offset] =
+		new_vc4_state->dlist[vc4_state->ptr0_offset];
 
 	/* Note that we can't just call vc4_plane_write_dlist()
 	 * because that would smash the context data that the HVS is
-- 
GitLab


From fcc86cb45d38ca2f24bcea9c29c7f4742041caed Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Thu, 15 Nov 2018 11:58:52 +0100
Subject: [PATCH 1106/1890] drm/vc4: Set ->legacy_cursor_update to false when
 doing non-async updates

drm_atomic_helper_setup_commit() auto-completes commit->flip_done when
state->legacy_cursor_update is true, but we know for sure that we want
a sync update when we call drm_atomic_helper_setup_commit() from
vc4_atomic_commit().

Explicitly set state->legacy_cursor_update to false to prevent this
auto-completion.

Fixes: 184d3cf4f738 ("drm/vc4: Use wait_for_flip_done() instead of wait_for_vblanks()")
Cc: <stable@vger.kernel.org>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
Link: https://patchwork.freedesktop.org/patch/msgid/20181115105852.9844-2-boris.brezillon@bootlin.com
---
 drivers/gpu/drm/vc4/vc4_kms.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c
index 127468785f748..1f94b9affe4bb 100644
--- a/drivers/gpu/drm/vc4/vc4_kms.c
+++ b/drivers/gpu/drm/vc4/vc4_kms.c
@@ -214,6 +214,12 @@ static int vc4_atomic_commit(struct drm_device *dev,
 		return 0;
 	}
 
+	/* We know for sure we don't want an async update here. Set
+	 * state->legacy_cursor_update to false to prevent
+	 * drm_atomic_helper_setup_commit() from auto-completing
+	 * commit->flip_done.
+	 */
+	state->legacy_cursor_update = false;
 	ret = drm_atomic_helper_setup_commit(state, nonblock);
 	if (ret)
 		return ret;
-- 
GitLab


From 2f31a67f01a8beb22cae754c53522cb61a005750 Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman@linux.intel.com>
Date: Thu, 15 Nov 2018 11:38:41 +0200
Subject: [PATCH 1107/1890] usb: xhci: Prevent bus suspend if a port connect
 change or polling state is detected

USB3 roothub might autosuspend before a plugged USB3 device is detected,
causing USB3 device enumeration failure.

USB3 devices don't show up as connected and enabled until USB3 link trainig
completes. On a fast booting platform with a slow USB3 link training the
link might reach the connected enabled state just as the bus is suspending.

If this device is discovered first time by the xhci_bus_suspend() routine
it will be put to U3 suspended state like the other ports which failed to
suspend earlier.

The hub thread will notice the connect change and resume the bus,
moving the port back to U0

This U0 -> U3 -> U0 transition right after being connected seems to be
too much for some devices, causing them to first go to SS.Inactive state,
and finally end up stuck in a polling state with reset asserted

Fix this by failing the bus suspend if a port has a connect change or is
in a polling state in xhci_bus_suspend().

Don't do any port changes until all ports are checked, buffer all port
changes and only write them in the end if suspend can proceed

Cc: stable@vger.kernel.org
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-hub.c | 60 ++++++++++++++++++++++++++++---------
 1 file changed, 46 insertions(+), 14 deletions(-)

diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c
index da98a11244e2b..94aca1b5ac8a2 100644
--- a/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@ -1474,15 +1474,18 @@ int xhci_bus_suspend(struct usb_hcd *hcd)
 	unsigned long flags;
 	struct xhci_hub *rhub;
 	struct xhci_port **ports;
+	u32 portsc_buf[USB_MAXCHILDREN];
+	bool wake_enabled;
 
 	rhub = xhci_get_rhub(hcd);
 	ports = rhub->ports;
 	max_ports = rhub->num_ports;
 	bus_state = &xhci->bus_state[hcd_index(hcd)];
+	wake_enabled = hcd->self.root_hub->do_remote_wakeup;
 
 	spin_lock_irqsave(&xhci->lock, flags);
 
-	if (hcd->self.root_hub->do_remote_wakeup) {
+	if (wake_enabled) {
 		if (bus_state->resuming_ports ||	/* USB2 */
 		    bus_state->port_remote_wakeup) {	/* USB3 */
 			spin_unlock_irqrestore(&xhci->lock, flags);
@@ -1490,26 +1493,36 @@ int xhci_bus_suspend(struct usb_hcd *hcd)
 			return -EBUSY;
 		}
 	}
-
-	port_index = max_ports;
+	/*
+	 * Prepare ports for suspend, but don't write anything before all ports
+	 * are checked and we know bus suspend can proceed
+	 */
 	bus_state->bus_suspended = 0;
+	port_index = max_ports;
 	while (port_index--) {
-		/* suspend the port if the port is not suspended */
 		u32 t1, t2;
-		int slot_id;
 
 		t1 = readl(ports[port_index]->addr);
 		t2 = xhci_port_state_to_neutral(t1);
+		portsc_buf[port_index] = 0;
 
-		if ((t1 & PORT_PE) && !(t1 & PORT_PLS_MASK)) {
-			xhci_dbg(xhci, "port %d not suspended\n", port_index);
-			slot_id = xhci_find_slot_id_by_port(hcd, xhci,
-					port_index + 1);
-			if (slot_id) {
+		/* Bail out if a USB3 port has a new device in link training */
+		if ((t1 & PORT_PLS_MASK) == XDEV_POLLING) {
+			bus_state->bus_suspended = 0;
+			spin_unlock_irqrestore(&xhci->lock, flags);
+			xhci_dbg(xhci, "Bus suspend bailout, port in polling\n");
+			return -EBUSY;
+		}
+
+		/* suspend ports in U0, or bail out for new connect changes */
+		if ((t1 & PORT_PE) && (t1 & PORT_PLS_MASK) == XDEV_U0) {
+			if ((t1 & PORT_CSC) && wake_enabled) {
+				bus_state->bus_suspended = 0;
 				spin_unlock_irqrestore(&xhci->lock, flags);
-				xhci_stop_device(xhci, slot_id, 1);
-				spin_lock_irqsave(&xhci->lock, flags);
+				xhci_dbg(xhci, "Bus suspend bailout, port connect change\n");
+				return -EBUSY;
 			}
+			xhci_dbg(xhci, "port %d not suspended\n", port_index);
 			t2 &= ~PORT_PLS_MASK;
 			t2 |= PORT_LINK_STROBE | XDEV_U3;
 			set_bit(port_index, &bus_state->bus_suspended);
@@ -1518,7 +1531,7 @@ int xhci_bus_suspend(struct usb_hcd *hcd)
 		 * including the USB 3.0 roothub, but only if CONFIG_PM
 		 * is enabled, so also enable remote wake here.
 		 */
-		if (hcd->self.root_hub->do_remote_wakeup) {
+		if (wake_enabled) {
 			if (t1 & PORT_CONNECT) {
 				t2 |= PORT_WKOC_E | PORT_WKDISC_E;
 				t2 &= ~PORT_WKCONN_E;
@@ -1538,7 +1551,26 @@ int xhci_bus_suspend(struct usb_hcd *hcd)
 
 		t1 = xhci_port_state_to_neutral(t1);
 		if (t1 != t2)
-			writel(t2, ports[port_index]->addr);
+			portsc_buf[port_index] = t2;
+	}
+
+	/* write port settings, stopping and suspending ports if needed */
+	port_index = max_ports;
+	while (port_index--) {
+		if (!portsc_buf[port_index])
+			continue;
+		if (test_bit(port_index, &bus_state->bus_suspended)) {
+			int slot_id;
+
+			slot_id = xhci_find_slot_id_by_port(hcd, xhci,
+							    port_index + 1);
+			if (slot_id) {
+				spin_unlock_irqrestore(&xhci->lock, flags);
+				xhci_stop_device(xhci, slot_id, 1);
+				spin_lock_irqsave(&xhci->lock, flags);
+			}
+		}
+		writel(portsc_buf[port_index], ports[port_index]->addr);
 	}
 	hcd->state = HC_STATE_SUSPENDED;
 	bus_state->next_statechange = jiffies + msecs_to_jiffies(10);
-- 
GitLab


From 6ba990384e924476b5eed1734f3bcca0df6fd77e Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Thu, 15 Nov 2018 03:25:37 -0500
Subject: [PATCH 1108/1890] bnxt_en: Fix RSS context allocation.

Recent commit has added the reservation of RSS context.  This requires
bnxt_hwrm_vnic_qcaps() to be called before allocating any RSS contexts.
The bnxt_hwrm_vnic_qcaps() call sets up proper flags that will
determine how many RSS contexts to allocate to support NTUPLE.

This causes a regression that too many RSS contexts are being reserved
and causing resource shortage when enabling many VFs.  Fix it by calling
bnxt_hwrm_vnic_qcaps() earlier.

Fixes: 41e8d7983752 ("bnxt_en: Modify the ring reservation functions for 57500 series chips.")
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index dd85d790f6389..4a45a2b809eac 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -10087,6 +10087,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	}
 
 	bnxt_hwrm_func_qcfg(bp);
+	bnxt_hwrm_vnic_qcaps(bp);
 	bnxt_hwrm_port_led_qcaps(bp);
 	bnxt_ethtool_init(bp);
 	bnxt_dcb_init(bp);
@@ -10120,7 +10121,6 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 				    VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV6;
 	}
 
-	bnxt_hwrm_vnic_qcaps(bp);
 	if (bnxt_rfs_supported(bp)) {
 		dev->hw_features |= NETIF_F_NTUPLE;
 		if (bnxt_rfs_capable(bp)) {
-- 
GitLab


From d19819297d9284bd990e22116b8b43d0abcbf488 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Thu, 15 Nov 2018 03:25:38 -0500
Subject: [PATCH 1109/1890] bnxt_en: Fix rx_l4_csum_errors counter on 57500
 devices.

The software counter structure is defined in both the CP ring's structure
and the NQ ring's structure on the new devices.  The legacy code adds the
counter to the CP ring's structure and the counter won't get displayed
since the ethtool code is looking at the NQ ring's structure.

Since all other counters are contained in the NQ ring's structure, it
makes more sense to count rx_l4_csum_errors in the NQ.

Fixes: 50e3ab7836b5 ("bnxt_en: Allocate completion ring structures for 57500 series chips.")
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 4a45a2b809eac..585609990eee0 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -1675,7 +1675,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
 	} else {
 		if (rxcmp1->rx_cmp_cfa_code_errors_v2 & RX_CMP_L4_CS_ERR_BITS) {
 			if (dev->features & NETIF_F_RXCSUM)
-				cpr->rx_l4_csum_errors++;
+				bnapi->cp_ring.rx_l4_csum_errors++;
 		}
 	}
 
-- 
GitLab


From addd4df6d763556e16d5316e4e8cd441050cc2af Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Thu, 15 Nov 2018 03:25:39 -0500
Subject: [PATCH 1110/1890] bnxt_en: Disable RDMA support on the 57500 chips.

There is no RDMA support on 57500 chips yet, so prevent bnxt_re from
registering on these chips.  There is intermittent failure if bnxt_re
is allowed to register and proceed with RDMA operations.

Fixes: 1ab968d2f1d6 ("bnxt_en: Add PCI ID for BCM57508 device.")
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
index beee61292d5e5..b59b382d34f94 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
@@ -43,6 +43,9 @@ static int bnxt_register_dev(struct bnxt_en_dev *edev, int ulp_id,
 	if (ulp_id == BNXT_ROCE_ULP) {
 		unsigned int max_stat_ctxs;
 
+		if (bp->flags & BNXT_FLAG_CHIP_P5)
+			return -EOPNOTSUPP;
+
 		max_stat_ctxs = bnxt_get_max_func_stat_ctxs(bp);
 		if (max_stat_ctxs <= BNXT_MIN_ROCE_STAT_CTXS ||
 		    bp->num_stat_ctxs == max_stat_ctxs)
-- 
GitLab


From ffd77621700ec3adcf859681e24910c38e0931f5 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Thu, 15 Nov 2018 03:25:40 -0500
Subject: [PATCH 1111/1890] bnxt_en: Workaround occasional TX timeout on 57500
 A0.

Hardware can sometimes not generate NQ MSIX with a single pending
CP ring entry.  This seems to always happen at the last entry of
the CP ring before it wraps.  Add logic to check all the CP rings for
pending entries without the CP ring consumer index advancing.  Calling
HWRM_DBG_RING_INFO_GET to read the context of the CP ring will flush
out the NQ entry and MSIX.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 65 +++++++++++++++++++++++
 drivers/net/ethernet/broadcom/bnxt/bnxt.h |  3 ++
 2 files changed, 68 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 585609990eee0..5d4147a75cadc 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -8714,6 +8714,26 @@ static int bnxt_set_features(struct net_device *dev, netdev_features_t features)
 	return rc;
 }
 
+static int bnxt_dbg_hwrm_ring_info_get(struct bnxt *bp, u8 ring_type,
+				       u32 ring_id, u32 *prod, u32 *cons)
+{
+	struct hwrm_dbg_ring_info_get_output *resp = bp->hwrm_cmd_resp_addr;
+	struct hwrm_dbg_ring_info_get_input req = {0};
+	int rc;
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_DBG_RING_INFO_GET, -1, -1);
+	req.ring_type = ring_type;
+	req.fw_ring_id = cpu_to_le32(ring_id);
+	mutex_lock(&bp->hwrm_cmd_lock);
+	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (!rc) {
+		*prod = le32_to_cpu(resp->producer_index);
+		*cons = le32_to_cpu(resp->consumer_index);
+	}
+	mutex_unlock(&bp->hwrm_cmd_lock);
+	return rc;
+}
+
 static void bnxt_dump_tx_sw_state(struct bnxt_napi *bnapi)
 {
 	struct bnxt_tx_ring_info *txr = bnapi->tx_ring;
@@ -8821,6 +8841,11 @@ static void bnxt_timer(struct timer_list *t)
 			bnxt_queue_sp_work(bp);
 		}
 	}
+
+	if ((bp->flags & BNXT_FLAG_CHIP_P5) && netif_carrier_ok(dev)) {
+		set_bit(BNXT_RING_COAL_NOW_SP_EVENT, &bp->sp_event);
+		bnxt_queue_sp_work(bp);
+	}
 bnxt_restart_timer:
 	mod_timer(&bp->timer, jiffies + bp->current_interval);
 }
@@ -8851,6 +8876,43 @@ static void bnxt_reset(struct bnxt *bp, bool silent)
 	bnxt_rtnl_unlock_sp(bp);
 }
 
+static void bnxt_chk_missed_irq(struct bnxt *bp)
+{
+	int i;
+
+	if (!(bp->flags & BNXT_FLAG_CHIP_P5))
+		return;
+
+	for (i = 0; i < bp->cp_nr_rings; i++) {
+		struct bnxt_napi *bnapi = bp->bnapi[i];
+		struct bnxt_cp_ring_info *cpr;
+		u32 fw_ring_id;
+		int j;
+
+		if (!bnapi)
+			continue;
+
+		cpr = &bnapi->cp_ring;
+		for (j = 0; j < 2; j++) {
+			struct bnxt_cp_ring_info *cpr2 = cpr->cp_ring_arr[j];
+			u32 val[2];
+
+			if (!cpr2 || cpr2->has_more_work ||
+			    !bnxt_has_work(bp, cpr2))
+				continue;
+
+			if (cpr2->cp_raw_cons != cpr2->last_cp_raw_cons) {
+				cpr2->last_cp_raw_cons = cpr2->cp_raw_cons;
+				continue;
+			}
+			fw_ring_id = cpr2->cp_ring_struct.fw_ring_id;
+			bnxt_dbg_hwrm_ring_info_get(bp,
+				DBG_RING_INFO_GET_REQ_RING_TYPE_L2_CMPL,
+				fw_ring_id, &val[0], &val[1]);
+		}
+	}
+}
+
 static void bnxt_cfg_ntp_filters(struct bnxt *);
 
 static void bnxt_sp_task(struct work_struct *work)
@@ -8930,6 +8992,9 @@ static void bnxt_sp_task(struct work_struct *work)
 	if (test_and_clear_bit(BNXT_FLOW_STATS_SP_EVENT, &bp->sp_event))
 		bnxt_tc_flow_stats_work(bp);
 
+	if (test_and_clear_bit(BNXT_RING_COAL_NOW_SP_EVENT, &bp->sp_event))
+		bnxt_chk_missed_irq(bp);
+
 	/* These functions below will clear BNXT_STATE_IN_SP_TASK.  They
 	 * must be the last functions to be called before exiting.
 	 */
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 498b373c992d3..00bd17e55e99b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -798,6 +798,8 @@ struct bnxt_cp_ring_info {
 	u8			had_work_done:1;
 	u8			has_more_work:1;
 
+	u32			last_cp_raw_cons;
+
 	struct bnxt_coal	rx_ring_coal;
 	u64			rx_packets;
 	u64			rx_bytes;
@@ -1527,6 +1529,7 @@ struct bnxt {
 #define BNXT_LINK_SPEED_CHNG_SP_EVENT	14
 #define BNXT_FLOW_STATS_SP_EVENT	15
 #define BNXT_UPDATE_PHY_SP_EVENT	16
+#define BNXT_RING_COAL_NOW_SP_EVENT	17
 
 	struct bnxt_hw_resc	hw_resc;
 	struct bnxt_pf_info	pf;
-- 
GitLab


From 83eb5c5cff32681f3769f502cb5589c7d7509bfe Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Thu, 15 Nov 2018 03:25:41 -0500
Subject: [PATCH 1112/1890] bnxt_en: Add software "missed_irqs" counter.

To keep track of the number of times the workaround code for 57500 A0
has been triggered.  This is a per NQ counter.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c         | 1 +
 drivers/net/ethernet/broadcom/bnxt/bnxt.h         | 1 +
 drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 5 ++++-
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 5d4147a75cadc..d4c3001175299 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -8909,6 +8909,7 @@ static void bnxt_chk_missed_irq(struct bnxt *bp)
 			bnxt_dbg_hwrm_ring_info_get(bp,
 				DBG_RING_INFO_GET_REQ_RING_TYPE_L2_CMPL,
 				fw_ring_id, &val[0], &val[1]);
+			cpr->missed_irqs++;
 		}
 	}
 }
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 00bd17e55e99b..9e99d4ab3e062 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -818,6 +818,7 @@ struct bnxt_cp_ring_info {
 	dma_addr_t		hw_stats_map;
 	u32			hw_stats_ctx_id;
 	u64			rx_l4_csum_errors;
+	u64			missed_irqs;
 
 	struct bnxt_ring_struct	cp_ring_struct;
 
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 48078564f0258..4b734cd81f8b1 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -137,7 +137,7 @@ static int bnxt_set_coalesce(struct net_device *dev,
 	return rc;
 }
 
-#define BNXT_NUM_STATS	21
+#define BNXT_NUM_STATS	22
 
 #define BNXT_RX_STATS_ENTRY(counter)	\
 	{ BNXT_RX_STATS_OFFSET(counter), __stringify(counter) }
@@ -384,6 +384,7 @@ static void bnxt_get_ethtool_stats(struct net_device *dev,
 		for (k = 0; k < stat_fields; j++, k++)
 			buf[j] = le64_to_cpu(hw_stats[k]);
 		buf[j++] = cpr->rx_l4_csum_errors;
+		buf[j++] = cpr->missed_irqs;
 
 		bnxt_sw_func_stats[RX_TOTAL_DISCARDS].counter +=
 			le64_to_cpu(cpr->hw_stats->rx_discard_pkts);
@@ -468,6 +469,8 @@ static void bnxt_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
 			buf += ETH_GSTRING_LEN;
 			sprintf(buf, "[%d]: rx_l4_csum_errors", i);
 			buf += ETH_GSTRING_LEN;
+			sprintf(buf, "[%d]: missed_irqs", i);
+			buf += ETH_GSTRING_LEN;
 		}
 		for (i = 0; i < BNXT_NUM_SW_FUNC_STATS; i++) {
 			strcpy(buf, bnxt_sw_func_stats[i].string);
-- 
GitLab


From 8dc5ae2d48976764cf3498e97963fa06befefb0e Mon Sep 17 00:00:00 2001
From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Date: Thu, 15 Nov 2018 03:25:42 -0500
Subject: [PATCH 1113/1890] bnxt_en: Fix filling time in
 bnxt_fill_coredump_record()

Fix the year and month offset while storing it in
bnxt_fill_coredump_record().

Fixes: 6c5657d085ae ("bnxt_en: Add support for ethtool get dump.")
Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 4b734cd81f8b1..6cc69a58478a5 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -2945,8 +2945,8 @@ bnxt_fill_coredump_record(struct bnxt *bp, struct bnxt_coredump_record *record,
 	record->asic_state = 0;
 	strlcpy(record->system_name, utsname()->nodename,
 		sizeof(record->system_name));
-	record->year = cpu_to_le16(tm.tm_year);
-	record->month = cpu_to_le16(tm.tm_mon);
+	record->year = cpu_to_le16(tm.tm_year + 1900);
+	record->month = cpu_to_le16(tm.tm_mon + 1);
 	record->day = cpu_to_le16(tm.tm_mday);
 	record->hour = cpu_to_le16(tm.tm_hour);
 	record->minute = cpu_to_le16(tm.tm_min);
-- 
GitLab


From b8c3c10cf68d7556466bb7d99f249ed586ddfbe3 Mon Sep 17 00:00:00 2001
From: Thor Thayer <thor.thayer@linux.intel.com>
Date: Mon, 12 Nov 2018 11:50:56 -0600
Subject: [PATCH 1114/1890] MAINTAINERS: Replace Vince Bridgers as Altera TSE
 maintainer

Vince has moved to a different role. Replace him as Altera
TSE maintainer.

Signed-off-by: Thor Thayer <thor.thayer@linux.intel.com>
Acked-by: Vince Bridgers <vince.bridgers@intel.com>
Acked-by: Alan Tull <atull@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 0abecc528daca..5a4bd37d9d02a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -717,7 +717,7 @@ F:	include/linux/mfd/altera-a10sr.h
 F:	include/dt-bindings/reset/altr,rst-mgr-a10sr.h
 
 ALTERA TRIPLE SPEED ETHERNET DRIVER
-M:	Vince Bridgers <vbridger@opensource.altera.com>
+M:	Thor Thayer <thor.thayer@linux.intel.com>
 L:	netdev@vger.kernel.org
 L:	nios2-dev@lists.rocketboards.org (moderated for non-subscribers)
 S:	Maintained
-- 
GitLab


From ebcd210e93b2a984b7a7b82d45f7f0d21b7ec2d2 Mon Sep 17 00:00:00 2001
From: Ganesh Goudar <ganeshgr@chelsio.com>
Date: Thu, 15 Nov 2018 15:36:21 +0530
Subject: [PATCH 1115/1890] cxgb4: fix thermal zone build error

with CONFIG_THERMAL=m and cxgb4 as built-in build fails, and
'commit e70a57fa59bb ("cxgb4: fix thermal configuration dependencies")'
tries to fix it but when cxgb4i is made built-in build fails again,
use IS_REACHABLE instead of IS_ENABLED to fix the issue.

Fixes: e70a57fa59bb (cxgb4: fix thermal configuration dependencies)
Reported-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/Kconfig            | 1 -
 drivers/net/ethernet/chelsio/cxgb4/Makefile     | 4 +---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 4 ++--
 3 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/Kconfig b/drivers/net/ethernet/chelsio/Kconfig
index 75c1c5ed23878..e2cdfa75673fd 100644
--- a/drivers/net/ethernet/chelsio/Kconfig
+++ b/drivers/net/ethernet/chelsio/Kconfig
@@ -67,7 +67,6 @@ config CHELSIO_T3
 config CHELSIO_T4
 	tristate "Chelsio Communications T4/T5/T6 Ethernet support"
 	depends on PCI && (IPV6 || IPV6=n)
-	depends on THERMAL || !THERMAL
 	select FW_LOADER
 	select MDIO
 	select ZLIB_DEFLATE
diff --git a/drivers/net/ethernet/chelsio/cxgb4/Makefile b/drivers/net/ethernet/chelsio/cxgb4/Makefile
index 78e5d17a1d5fb..91d8a885deba9 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/Makefile
+++ b/drivers/net/ethernet/chelsio/cxgb4/Makefile
@@ -12,6 +12,4 @@ cxgb4-objs := cxgb4_main.o l2t.o smt.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o
 cxgb4-$(CONFIG_CHELSIO_T4_DCB) +=  cxgb4_dcb.o
 cxgb4-$(CONFIG_CHELSIO_T4_FCOE) +=  cxgb4_fcoe.o
 cxgb4-$(CONFIG_DEBUG_FS) += cxgb4_debugfs.o
-ifdef CONFIG_THERMAL
-cxgb4-objs += cxgb4_thermal.o
-endif
+cxgb4-$(CONFIG_THERMAL) += cxgb4_thermal.o
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 05a46926016a5..d49db46254cd7 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -5863,7 +5863,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (!is_t4(adapter->params.chip))
 		cxgb4_ptp_init(adapter);
 
-	if (IS_ENABLED(CONFIG_THERMAL) &&
+	if (IS_REACHABLE(CONFIG_THERMAL) &&
 	    !is_t4(adapter->params.chip) && (adapter->flags & FW_OK))
 		cxgb4_thermal_init(adapter);
 
@@ -5932,7 +5932,7 @@ static void remove_one(struct pci_dev *pdev)
 
 		if (!is_t4(adapter->params.chip))
 			cxgb4_ptp_stop(adapter);
-		if (IS_ENABLED(CONFIG_THERMAL))
+		if (IS_REACHABLE(CONFIG_THERMAL))
 			cxgb4_thermal_remove(adapter);
 
 		/* If we allocated filters, free up state associated with any
-- 
GitLab


From 10f91c73cc41ceead210a905dbd196398e99c7d2 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Thu, 15 Nov 2018 11:05:10 -0800
Subject: [PATCH 1116/1890] Input: hyper-v - fix wakeup from suspend-to-idle

It makes little sense but still possible to put Hyper-V guests into
suspend-to-idle state. To wake them up two wakeup sources were registered
in the past: hyperv-keyboard and hid-hyperv. However, since
commit eed4d47efe95 ("ACPI / sleep: Ignore spurious SCI wakeups from
suspend-to-idle") pm_wakeup_event() from these devices is ignored. Switch
to pm_wakeup_hard_event() API as these devices are actually the only
possible way to wakeup Hyper-V guests.

Fixes: eed4d47efe95 (ACPI / sleep: Ignore spurious SCI wakeups from suspend-to-idle)
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: K. Y. Srinivasan <kys@microsoft.com>
Acked-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/hid/hid-hyperv.c              | 2 +-
 drivers/input/serio/hyperv-keyboard.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/hid/hid-hyperv.c b/drivers/hid/hid-hyperv.c
index 3aa2bb9f0f811..fc1db8c4ff0f9 100644
--- a/drivers/hid/hid-hyperv.c
+++ b/drivers/hid/hid-hyperv.c
@@ -309,7 +309,7 @@ static void mousevsc_on_receive(struct hv_device *device,
 		hid_input_report(input_dev->hid_device, HID_INPUT_REPORT,
 				 input_dev->input_buf, len, 1);
 
-		pm_wakeup_event(&input_dev->device->device, 0);
+		pm_wakeup_hard_event(&input_dev->device->device);
 
 		break;
 	default:
diff --git a/drivers/input/serio/hyperv-keyboard.c b/drivers/input/serio/hyperv-keyboard.c
index 25151d9214e05..55288a026e4e2 100644
--- a/drivers/input/serio/hyperv-keyboard.c
+++ b/drivers/input/serio/hyperv-keyboard.c
@@ -177,7 +177,7 @@ static void hv_kbd_on_receive(struct hv_device *hv_dev,
 		 * state because the Enter-UP can trigger a wakeup at once.
 		 */
 		if (!(info & IS_BREAK))
-			pm_wakeup_event(&hv_dev->device, 0);
+			pm_wakeup_hard_event(&hv_dev->device);
 
 		break;
 
-- 
GitLab


From 5277715639ff6f75c729e657690751a831112c4b Mon Sep 17 00:00:00 2001
From: Andreas Dannenberg <dannenberg@ti.com>
Date: Thu, 15 Nov 2018 11:01:31 -0600
Subject: [PATCH 1117/1890] ASoC: pcm186x: Fix device reset-registers trigger
 value

According to the current device datasheet (TI Lit # SLAS831D, revised
March 2018) the value written to the device's PAGE register to trigger
a complete register reset should be 0xfe, not 0xff. So go ahead and
update to the correct value.

Reported-by: Stephane Le Provost <stephane.leprovost@mediatek.com>
Tested-by: Stephane Le Provost <stephane.leprovost@mediatek.com>
Signed-off-by: Andreas Dannenberg <dannenberg@ti.com>
Acked-by: Andrew F. Davis <afd@ti.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Cc: stable@vger.kernel.org
---
 sound/soc/codecs/pcm186x.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/codecs/pcm186x.h b/sound/soc/codecs/pcm186x.h
index 2c6ba55bf394e..bb3f0c42a1cdd 100644
--- a/sound/soc/codecs/pcm186x.h
+++ b/sound/soc/codecs/pcm186x.h
@@ -139,7 +139,7 @@ enum pcm186x_type {
 #define PCM186X_MAX_REGISTER		PCM186X_CURR_TRIM_CTRL
 
 /* PCM186X_PAGE */
-#define PCM186X_RESET			0xff
+#define PCM186X_RESET			0xfe
 
 /* PCM186X_ADCX_INPUT_SEL_X */
 #define PCM186X_ADC_INPUT_SEL_POL	BIT(7)
-- 
GitLab


From 7150ceaacb27f7b3bf494e72cd4be4e11612dfff Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 12 Nov 2018 22:33:22 +0000
Subject: [PATCH 1118/1890] rxrpc: Fix life check

The life-checking function, which is used by kAFS to make sure that a call
is still live in the event of a pending signal, only samples the received
packet serial number counter; it doesn't actually provoke a change in the
counter, rather relying on the server to happen to give us a packet in the
time window.

Fix this by adding a function to force a ping to be transmitted.

kAFS then keeps track of whether there's been a stall, and if so, uses the
new function to ping the server, resetting the timeout to allow the reply
to come back.

If there's a stall, a ping and the call is *still* stalled in the same
place after another period, then the call will be aborted.

Fixes: bc5e3a546d55 ("rxrpc: Use MSG_WAITALL to tell sendmsg() to temporarily ignore signals")
Fixes: f4d15fb6f99a ("rxrpc: Provide functions for allowing cleaner handling of signals")
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/rxrpc.txt | 17 +++++++++++------
 fs/afs/rxrpc.c                     | 11 ++++++++++-
 include/net/af_rxrpc.h             |  3 ++-
 include/trace/events/rxrpc.h       |  2 ++
 net/rxrpc/af_rxrpc.c               | 27 +++++++++++++++++++++++----
 5 files changed, 48 insertions(+), 12 deletions(-)

diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt
index 605e00cdd6beb..89f1302d593a5 100644
--- a/Documentation/networking/rxrpc.txt
+++ b/Documentation/networking/rxrpc.txt
@@ -1056,18 +1056,23 @@ The kernel interface functions are as follows:
 
 	u32 rxrpc_kernel_check_life(struct socket *sock,
 				    struct rxrpc_call *call);
+	void rxrpc_kernel_probe_life(struct socket *sock,
+				     struct rxrpc_call *call);
 
-     This returns a number that is updated when ACKs are received from the peer
-     (notably including PING RESPONSE ACKs which we can elicit by sending PING
-     ACKs to see if the call still exists on the server).  The caller should
-     compare the numbers of two calls to see if the call is still alive after
-     waiting for a suitable interval.
+     The first function returns a number that is updated when ACKs are received
+     from the peer (notably including PING RESPONSE ACKs which we can elicit by
+     sending PING ACKs to see if the call still exists on the server).  The
+     caller should compare the numbers of two calls to see if the call is still
+     alive after waiting for a suitable interval.
 
      This allows the caller to work out if the server is still contactable and
      if the call is still alive on the server whilst waiting for the server to
      process a client operation.
 
-     This function may transmit a PING ACK.
+     The second function causes a ping ACK to be transmitted to try to provoke
+     the peer into responding, which would then cause the value returned by the
+     first function to change.  Note that this must be called in TASK_RUNNING
+     state.
 
  (*) Get reply timestamp.
 
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 59970886690f1..a7b44863d502e 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -576,6 +576,7 @@ static long afs_wait_for_call_to_complete(struct afs_call *call,
 {
 	signed long rtt2, timeout;
 	long ret;
+	bool stalled = false;
 	u64 rtt;
 	u32 life, last_life;
 
@@ -609,12 +610,20 @@ static long afs_wait_for_call_to_complete(struct afs_call *call,
 
 		life = rxrpc_kernel_check_life(call->net->socket, call->rxcall);
 		if (timeout == 0 &&
-		    life == last_life && signal_pending(current))
+		    life == last_life && signal_pending(current)) {
+			if (stalled)
 				break;
+			__set_current_state(TASK_RUNNING);
+			rxrpc_kernel_probe_life(call->net->socket, call->rxcall);
+			timeout = rtt2;
+			stalled = true;
+			continue;
+		}
 
 		if (life != last_life) {
 			timeout = rtt2;
 			last_life = life;
+			stalled = false;
 		}
 
 		timeout = schedule_timeout(timeout);
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index de587948042a4..1adefe42c0a68 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -77,7 +77,8 @@ int rxrpc_kernel_retry_call(struct socket *, struct rxrpc_call *,
 			    struct sockaddr_rxrpc *, struct key *);
 int rxrpc_kernel_check_call(struct socket *, struct rxrpc_call *,
 			    enum rxrpc_call_completion *, u32 *);
-u32 rxrpc_kernel_check_life(struct socket *, struct rxrpc_call *);
+u32 rxrpc_kernel_check_life(const struct socket *, const struct rxrpc_call *);
+void rxrpc_kernel_probe_life(struct socket *, struct rxrpc_call *);
 u32 rxrpc_kernel_get_epoch(struct socket *, struct rxrpc_call *);
 bool rxrpc_kernel_get_reply_time(struct socket *, struct rxrpc_call *,
 				 ktime_t *);
diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index 573d5b901fb11..5b50fe4906d2e 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -181,6 +181,7 @@ enum rxrpc_timer_trace {
 enum rxrpc_propose_ack_trace {
 	rxrpc_propose_ack_client_tx_end,
 	rxrpc_propose_ack_input_data,
+	rxrpc_propose_ack_ping_for_check_life,
 	rxrpc_propose_ack_ping_for_keepalive,
 	rxrpc_propose_ack_ping_for_lost_ack,
 	rxrpc_propose_ack_ping_for_lost_reply,
@@ -380,6 +381,7 @@ enum rxrpc_tx_point {
 #define rxrpc_propose_ack_traces \
 	EM(rxrpc_propose_ack_client_tx_end,	"ClTxEnd") \
 	EM(rxrpc_propose_ack_input_data,	"DataIn ") \
+	EM(rxrpc_propose_ack_ping_for_check_life, "ChkLife") \
 	EM(rxrpc_propose_ack_ping_for_keepalive, "KeepAlv") \
 	EM(rxrpc_propose_ack_ping_for_lost_ack,	"LostAck") \
 	EM(rxrpc_propose_ack_ping_for_lost_reply, "LostRpl") \
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 64362d078da84..a2522f9d71e26 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -375,16 +375,35 @@ EXPORT_SYMBOL(rxrpc_kernel_end_call);
  * getting ACKs from the server.  Returns a number representing the life state
  * which can be compared to that returned by a previous call.
  *
- * If this is a client call, ping ACKs will be sent to the server to find out
- * whether it's still responsive and whether the call is still alive on the
- * server.
+ * If the life state stalls, rxrpc_kernel_probe_life() should be called and
+ * then 2RTT waited.
  */
-u32 rxrpc_kernel_check_life(struct socket *sock, struct rxrpc_call *call)
+u32 rxrpc_kernel_check_life(const struct socket *sock,
+			    const struct rxrpc_call *call)
 {
 	return call->acks_latest;
 }
 EXPORT_SYMBOL(rxrpc_kernel_check_life);
 
+/**
+ * rxrpc_kernel_probe_life - Poke the peer to see if it's still alive
+ * @sock: The socket the call is on
+ * @call: The call to check
+ *
+ * In conjunction with rxrpc_kernel_check_life(), allow a kernel service to
+ * find out whether a call is still alive by pinging it.  This should cause the
+ * life state to be bumped in about 2*RTT.
+ *
+ * The must be called in TASK_RUNNING state on pain of might_sleep() objecting.
+ */
+void rxrpc_kernel_probe_life(struct socket *sock, struct rxrpc_call *call)
+{
+	rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false,
+			  rxrpc_propose_ack_ping_for_check_life);
+	rxrpc_send_ack_packet(call, true, NULL);
+}
+EXPORT_SYMBOL(rxrpc_kernel_probe_life);
+
 /**
  * rxrpc_kernel_get_epoch - Retrieve the epoch value from a call.
  * @sock: The socket the call is on
-- 
GitLab


From 08e14fe429a07475ee9f29a283945d602e4a6d92 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 12 Nov 2018 16:17:16 -0800
Subject: [PATCH 1119/1890] net_sched: sch_fq: ensure maxrate fq parameter
 applies to EDT flows

When EDT conversion happened, fq lost the ability to enfore a maxrate
for all flows. It kept it for non EDT flows.

This commit restores the functionality.

Tested:

tc qd replace dev eth0 root fq maxrate 500Mbit
netperf -P0 -H host -- -O THROUGHPUT
489.75

Fixes: ab408b6dc744 ("tcp: switch tcp and sch_fq to new earliest departure time model")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_fq.c | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 4b1af706896c0..25a7cf6d380fd 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -469,22 +469,29 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch)
 		goto begin;
 	}
 	prefetch(&skb->end);
-	f->credit -= qdisc_pkt_len(skb);
+	plen = qdisc_pkt_len(skb);
+	f->credit -= plen;
 
-	if (ktime_to_ns(skb->tstamp) || !q->rate_enable)
+	if (!q->rate_enable)
 		goto out;
 
 	rate = q->flow_max_rate;
-	if (skb->sk)
-		rate = min(skb->sk->sk_pacing_rate, rate);
-
-	if (rate <= q->low_rate_threshold) {
-		f->credit = 0;
-		plen = qdisc_pkt_len(skb);
-	} else {
-		plen = max(qdisc_pkt_len(skb), q->quantum);
-		if (f->credit > 0)
-			goto out;
+
+	/* If EDT time was provided for this skb, we need to
+	 * update f->time_next_packet only if this qdisc enforces
+	 * a flow max rate.
+	 */
+	if (!skb->tstamp) {
+		if (skb->sk)
+			rate = min(skb->sk->sk_pacing_rate, rate);
+
+		if (rate <= q->low_rate_threshold) {
+			f->credit = 0;
+		} else {
+			plen = max(plen, q->quantum);
+			if (f->credit > 0)
+				goto out;
+		}
 	}
 	if (rate != ~0UL) {
 		u64 len = (u64)plen * NSEC_PER_SEC;
-- 
GitLab


From 66e839030fd698586734e017fd55c4f2a89dba0b Mon Sep 17 00:00:00 2001
From: Matt Chen <matt.chen@intel.com>
Date: Fri, 3 Aug 2018 14:29:20 +0800
Subject: [PATCH 1120/1890] iwlwifi: fix wrong WGDS_WIFI_DATA_SIZE

From coreboot/BIOS:
Name ("WGDS", Package() {
 Revision,
 Package() {
     DomainType,                         // 0x7:WiFi ==> We miss this one.
     WgdsWiFiSarDeltaGroup1PowerMax1,    // Group 1 FCC 2400 Max
     WgdsWiFiSarDeltaGroup1PowerChainA1, // Group 1 FCC 2400 A Offset
     WgdsWiFiSarDeltaGroup1PowerChainB1, // Group 1 FCC 2400 B Offset
     WgdsWiFiSarDeltaGroup1PowerMax2,    // Group 1 FCC 5200 Max
     WgdsWiFiSarDeltaGroup1PowerChainA2, // Group 1 FCC 5200 A Offset
     WgdsWiFiSarDeltaGroup1PowerChainB2, // Group 1 FCC 5200 B Offset
     WgdsWiFiSarDeltaGroup2PowerMax1,    // Group 2 EC Jap 2400 Max
     WgdsWiFiSarDeltaGroup2PowerChainA1, // Group 2 EC Jap 2400 A Offset
     WgdsWiFiSarDeltaGroup2PowerChainB1, // Group 2 EC Jap 2400 B Offset
     WgdsWiFiSarDeltaGroup2PowerMax2,    // Group 2 EC Jap 5200 Max
     WgdsWiFiSarDeltaGroup2PowerChainA2, // Group 2 EC Jap 5200 A Offset
     WgdsWiFiSarDeltaGroup2PowerChainB2, // Group 2 EC Jap 5200 B Offset
     WgdsWiFiSarDeltaGroup3PowerMax1,    // Group 3 ROW 2400 Max
     WgdsWiFiSarDeltaGroup3PowerChainA1, // Group 3 ROW 2400 A Offset
     WgdsWiFiSarDeltaGroup3PowerChainB1, // Group 3 ROW 2400 B Offset
     WgdsWiFiSarDeltaGroup3PowerMax2,    // Group 3 ROW 5200 Max
     WgdsWiFiSarDeltaGroup3PowerChainA2, // Group 3 ROW 5200 A Offset
     WgdsWiFiSarDeltaGroup3PowerChainB2, // Group 3 ROW 5200 B Offset
 }
})

When read the ACPI data to find out the WGDS, the DATA_SIZE is never
matched.
From the above format, it gives 19 numbers, but our driver is hardcode
as 18.
Fix it to pass then can parse the data into our wgds table.
Then we will see:
iwlwifi 0000:01:00.0: U iwl_mvm_sar_geo_init Sending GEO_TX_POWER_LIMIT
iwlwifi 0000:01:00.0: U iwl_mvm_sar_geo_init SAR geographic profile[0]
Band[0]: chain A = 68 chain B = 69 max_tx_power = 54
iwlwifi 0000:01:00.0: U iwl_mvm_sar_geo_init SAR geographic profile[0]
Band[1]: chain A = 48 chain B = 49 max_tx_power = 70
iwlwifi 0000:01:00.0: U iwl_mvm_sar_geo_init SAR geographic profile[1]
Band[0]: chain A = 51 chain B = 67 max_tx_power = 50
iwlwifi 0000:01:00.0: U iwl_mvm_sar_geo_init SAR geographic profile[1]
Band[1]: chain A = 69 chain B = 70 max_tx_power = 68
iwlwifi 0000:01:00.0: U iwl_mvm_sar_geo_init SAR geographic profile[2]
Band[0]: chain A = 49 chain B = 50 max_tx_power = 48
iwlwifi 0000:01:00.0: U iwl_mvm_sar_geo_init SAR geographic profile[2]
Band[1]: chain A = 52 chain B = 53 max_tx_power = 51

Cc: stable@vger.kernel.org # 4.12+
Fixes: a6bff3cb19b7 ("iwlwifi: mvm: add GEO_TX_POWER_LIMIT cmd for geographic tx power table")
Signed-off-by: Matt Chen <matt.chen@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/fw/acpi.h | 4 +++-
 drivers/net/wireless/intel/iwlwifi/mvm/fw.c  | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h
index 2439e98431eef..7492dfb6729b8 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h
@@ -6,6 +6,7 @@
  * GPL LICENSE SUMMARY
  *
  * Copyright(c) 2017        Intel Deutschland GmbH
+ * Copyright(c) 2018        Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -26,6 +27,7 @@
  * BSD LICENSE
  *
  * Copyright(c) 2017        Intel Deutschland GmbH
+ * Copyright(c) 2018        Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -81,7 +83,7 @@
 #define ACPI_WRDS_WIFI_DATA_SIZE	(ACPI_SAR_TABLE_SIZE + 2)
 #define ACPI_EWRD_WIFI_DATA_SIZE	((ACPI_SAR_PROFILE_NUM - 1) * \
 					 ACPI_SAR_TABLE_SIZE + 3)
-#define ACPI_WGDS_WIFI_DATA_SIZE	18
+#define ACPI_WGDS_WIFI_DATA_SIZE	19
 #define ACPI_WRDD_WIFI_DATA_SIZE	2
 #define ACPI_SPLC_WIFI_DATA_SIZE	2
 
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
index dade206d55115..899f4a6432fb2 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -893,7 +893,7 @@ static int iwl_mvm_sar_geo_init(struct iwl_mvm *mvm)
 	IWL_DEBUG_RADIO(mvm, "Sending GEO_TX_POWER_LIMIT\n");
 
 	BUILD_BUG_ON(ACPI_NUM_GEO_PROFILES * ACPI_WGDS_NUM_BANDS *
-		     ACPI_WGDS_TABLE_SIZE !=  ACPI_WGDS_WIFI_DATA_SIZE);
+		     ACPI_WGDS_TABLE_SIZE + 1 !=  ACPI_WGDS_WIFI_DATA_SIZE);
 
 	BUILD_BUG_ON(ACPI_NUM_GEO_PROFILES > IWL_NUM_GEO_PROFILES);
 
-- 
GitLab


From ec484d03ef0df8d34086b95710e355a259cbe1f2 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Thu, 16 Aug 2018 13:25:48 +0300
Subject: [PATCH 1121/1890] iwlwifi: mvm: support sta_statistics() even on
 older firmware

The oldest firmware supported by iwlmvm do support getting
the average beacon RSSI. Enable the sta_statistics() call
from mac80211 even on older firmware versions.

Fixes: 33cef9256342 ("iwlwifi: mvm: support beacon statistics for BSS client")
Cc: stable@vger.kernel.org # 4.2+
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index 505b0385d8000..7c09ce20e8b1d 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -4444,10 +4444,6 @@ static void iwl_mvm_mac_sta_statistics(struct ieee80211_hw *hw,
 		sinfo->filled |= BIT_ULL(NL80211_STA_INFO_SIGNAL_AVG);
 	}
 
-	if (!fw_has_capa(&mvm->fw->ucode_capa,
-			 IWL_UCODE_TLV_CAPA_RADIO_BEACON_STATS))
-		return;
-
 	/* if beacon filtering isn't on mac80211 does it anyway */
 	if (!(vif->driver_flags & IEEE80211_VIF_BEACON_FILTER))
 		return;
-- 
GitLab


From 82715ac71e6b94a2c2136e31f3a8e6748e33aa8c Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Wed, 3 Oct 2018 11:16:54 +0300
Subject: [PATCH 1122/1890] iwlwifi: mvm: fix regulatory domain update when the
 firmware starts

When the firmware starts, it doesn't have any regulatory
information, hence it uses the world wide limitations. The
driver can feed the firmware with previous knowledge that
was kept in the driver, but the firmware may still not
update its internal tables.

This happens when we start a BSS interface, and then the
firmware can change the regulatory tables based on our
location and it'll use more lenient, location specific
rules. Then, if the firmware is shut down (when the
interface is brought down), and then an AP interface is
created, the firmware will forget the country specific
rules.

The host will think that we are in a certain country that
may allow channels and will try to teach the firmware about
our location, but the firmware may still not allow to drop
the world wide limitations and apply country specific rules
because it was just re-started.

In this case, the firmware will reply with MCC_RESP_ILLEGAL
to the MCC_UPDATE_CMD. In that case, iwlwifi needs to let
the upper layers (cfg80211 / hostapd) know that the channel
list they know about has been updated.

This fixes https://bugzilla.kernel.org/show_bug.cgi?id=201105

Cc: stable@vger.kernel.org
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 8 ++++++--
 drivers/net/wireless/intel/iwlwifi/mvm/nvm.c      | 5 ++---
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index 7c09ce20e8b1d..00f831d88366d 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -301,8 +301,12 @@ struct ieee80211_regdomain *iwl_mvm_get_regdomain(struct wiphy *wiphy,
 		goto out;
 	}
 
-	if (changed)
-		*changed = (resp->status == MCC_RESP_NEW_CHAN_PROFILE);
+	if (changed) {
+		u32 status = le32_to_cpu(resp->status);
+
+		*changed = (status == MCC_RESP_NEW_CHAN_PROFILE ||
+			    status == MCC_RESP_ILLEGAL);
+	}
 
 	regd = iwl_parse_nvm_mcc_info(mvm->trans->dev, mvm->cfg,
 				      __le32_to_cpu(resp->n_channels),
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c
index 3633f27d048ab..6fc5cc1f2b5b3 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c
@@ -539,9 +539,8 @@ iwl_mvm_update_mcc(struct iwl_mvm *mvm, const char *alpha2,
 	}
 
 	IWL_DEBUG_LAR(mvm,
-		      "MCC response status: 0x%x. new MCC: 0x%x ('%c%c') change: %d n_chans: %d\n",
-		      status, mcc, mcc >> 8, mcc & 0xff,
-		      !!(status == MCC_RESP_NEW_CHAN_PROFILE), n_channels);
+		      "MCC response status: 0x%x. new MCC: 0x%x ('%c%c') n_chans: %d\n",
+		      status, mcc, mcc >> 8, mcc & 0xff, n_channels);
 
 exit:
 	iwl_free_resp(&cmd);
-- 
GitLab


From 54f3f994e9964466855e11068c0f7d952d58b260 Mon Sep 17 00:00:00 2001
From: Shahar S Matityahu <shahar.s.matityahu@intel.com>
Date: Mon, 20 Aug 2018 17:16:21 +0300
Subject: [PATCH 1123/1890] iwlwifi: fix D3 debug data buffer memory leak

If the driver is unloaded when D3 debug data pulling is enabled
but not triggered, it doesn't release the data buffer.

Fix this by adding iwl_fw_runtime_free and calling it from the
relevant places.

Fixes: 2d8c261511ab ("iwlwifi: add d3 debug data support")
Signed-off-by: Shahar S Matityahu <shahar.s.matityahu@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/fw/runtime.h | 6 +++++-
 drivers/net/wireless/intel/iwlwifi/mvm/ops.c    | 2 ++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/runtime.h b/drivers/net/wireless/intel/iwlwifi/fw/runtime.h
index 6b95d0e758897..2b8b50a77990c 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/runtime.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/runtime.h
@@ -154,7 +154,11 @@ void iwl_fw_runtime_init(struct iwl_fw_runtime *fwrt, struct iwl_trans *trans,
 			const struct iwl_fw_runtime_ops *ops, void *ops_ctx,
 			struct dentry *dbgfs_dir);
 
-void iwl_fw_runtime_exit(struct iwl_fw_runtime *fwrt);
+static inline void iwl_fw_runtime_free(struct iwl_fw_runtime *fwrt)
+{
+	kfree(fwrt->dump.d3_debug_data);
+	fwrt->dump.d3_debug_data = NULL;
+}
 
 void iwl_fw_runtime_suspend(struct iwl_fw_runtime *fwrt);
 
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
index 0e2092526fae1..af3fba10abc19 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
@@ -858,6 +858,7 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg,
 	iwl_mvm_thermal_exit(mvm);
  out_free:
 	iwl_fw_flush_dump(&mvm->fwrt);
+	iwl_fw_runtime_free(&mvm->fwrt);
 
 	if (iwlmvm_mod_params.init_dbg)
 		return op_mode;
@@ -910,6 +911,7 @@ static void iwl_op_mode_mvm_stop(struct iwl_op_mode *op_mode)
 
 	iwl_mvm_tof_clean(mvm);
 
+	iwl_fw_runtime_free(&mvm->fwrt);
 	mutex_destroy(&mvm->mutex);
 	mutex_destroy(&mvm->d0i3_suspend_mutex);
 
-- 
GitLab


From 5d041c46ccb9b48acc110e214beff5e2789311df Mon Sep 17 00:00:00 2001
From: Luca Coelho <luciano.coelho@intel.com>
Date: Wed, 17 Oct 2018 08:35:15 +0300
Subject: [PATCH 1124/1890] iwlwifi: mvm: don't use SAR Geo if basic SAR is not
 used

We can't use SAR Geo if basic SAR is not enabled, since the SAR Geo
tables define offsets in relation to the basic SAR table in use.

To fix this, make iwl_mvm_sar_init() return one in case WRDS is not
available, so we can skip reading WGDS entirely.

Fixes: a6bff3cb19b7 ("iwlwifi: mvm: add GEO_TX_POWER_LIMIT cmd for geographic tx power table")
Cc: stable@vger.kernel.org # 4.12+
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 36 ++++++++++++++++-----
 1 file changed, 28 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
index 899f4a6432fb2..2ba890445c356 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -928,6 +928,11 @@ static int iwl_mvm_sar_get_ewrd_table(struct iwl_mvm *mvm)
 	return -ENOENT;
 }
 
+static int iwl_mvm_sar_get_wgds_table(struct iwl_mvm *mvm)
+{
+	return -ENOENT;
+}
+
 static int iwl_mvm_sar_geo_init(struct iwl_mvm *mvm)
 {
 	return 0;
@@ -954,8 +959,11 @@ static int iwl_mvm_sar_init(struct iwl_mvm *mvm)
 		IWL_DEBUG_RADIO(mvm,
 				"WRDS SAR BIOS table invalid or unavailable. (%d)\n",
 				ret);
-		/* if not available, don't fail and don't bother with EWRD */
-		return 0;
+		/*
+		 * If not available, don't fail and don't bother with EWRD.
+		 * Return 1 to tell that we can't use WGDS either.
+		 */
+		return 1;
 	}
 
 	ret = iwl_mvm_sar_get_ewrd_table(mvm);
@@ -968,9 +976,13 @@ static int iwl_mvm_sar_init(struct iwl_mvm *mvm)
 	/* choose profile 1 (WRDS) as default for both chains */
 	ret = iwl_mvm_sar_select_profile(mvm, 1, 1);
 
-	/* if we don't have profile 0 from BIOS, just skip it */
+	/*
+	 * If we don't have profile 0 from BIOS, just skip it.  This
+	 * means that SAR Geo will not be enabled either, even if we
+	 * have other valid profiles.
+	 */
 	if (ret == -ENOENT)
-		return 0;
+		return 1;
 
 	return ret;
 }
@@ -1168,11 +1180,19 @@ int iwl_mvm_up(struct iwl_mvm *mvm)
 		iwl_mvm_unref(mvm, IWL_MVM_REF_UCODE_DOWN);
 
 	ret = iwl_mvm_sar_init(mvm);
-	if (ret)
-		goto error;
+	if (ret == 0) {
+		ret = iwl_mvm_sar_geo_init(mvm);
+	} else if (ret > 0 && !iwl_mvm_sar_get_wgds_table(mvm)) {
+		/*
+		 * If basic SAR is not available, we check for WGDS,
+		 * which should *not* be available either.  If it is
+		 * available, issue an error, because we can't use SAR
+		 * Geo without basic SAR.
+		 */
+		IWL_ERR(mvm, "BIOS contains WGDS but no WRDS\n");
+	}
 
-	ret = iwl_mvm_sar_geo_init(mvm);
-	if (ret)
+	if (ret < 0)
 		goto error;
 
 	iwl_mvm_leds_sync(mvm);
-- 
GitLab


From 69756c6ff0de478c10100481f16c966dde3b5339 Mon Sep 17 00:00:00 2001
From: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Date: Thu, 15 Nov 2018 17:19:12 -0500
Subject: [PATCH 1125/1890] drm/amdgpu: Add amdgpu "max bpc" connector property
 (v2)

[Why]
Many panels support more than 8bpc but some modes are unavailable while
running at greater than 8bpc due to DP/HDMI bandwidth constraints.

Support for more than 8bpc was added recently in the driver but it
defaults to the maximum supported bpc - locking out these modes.

This should be a user configurable option such that the user can select
what bpc configuration they would like.

[How]
This patch introduces the "max bpc" amdgpu driver specific connector
property so the user can limit the maximum bpc. It ranges from 8 to 16.

This doesn't directly set the preferred bpc for the panel since it
follows Intel's existing driver conventions.

This proprety should be removed once common drm support for max bpc
lands.

v2: rebase on upstream (Alex)

Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 7 +++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h    | 2 ++
 2 files changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 6748cd7fc129b..686a26de50f91 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -626,6 +626,13 @@ int amdgpu_display_modeset_create_props(struct amdgpu_device *adev)
 					 "dither",
 					 amdgpu_dither_enum_list, sz);
 
+	if (amdgpu_device_has_dc_support(adev)) {
+		adev->mode_info.max_bpc_property =
+			drm_property_create_range(adev->ddev, 0, "max bpc", 8, 16);
+		if (!adev->mode_info.max_bpc_property)
+			return -ENOMEM;
+	}
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index b9e9e8b02fb75..d1b4d9b6aae0d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -339,6 +339,8 @@ struct amdgpu_mode_info {
 	struct drm_property *audio_property;
 	/* FMT dithering */
 	struct drm_property *dither_property;
+	/* maximum number of bits per channel for monitor color */
+	struct drm_property *max_bpc_property;
 	/* hardcoded DFP edid from BIOS */
 	struct edid *bios_hardcoded_edid;
 	int bios_hardcoded_edid_size;
-- 
GitLab


From 07e3a1cfb0568b6d8d7862077029af96af6690ea Mon Sep 17 00:00:00 2001
From: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Date: Thu, 15 Nov 2018 17:21:34 -0500
Subject: [PATCH 1126/1890] drm/amd/display: Support amdgpu "max bpc" connector
 property (v2)

[Why]
Many panels support more than 8bpc but some modes are unavailable while
running at greater than 8bpc due to DP/HDMI bandwidth constraints.

Support for more than 8bpc was added recently in the driver but it
defaults to the maximum supported bpc - locking out these modes.

This should be a user configurable option such that the user can select
what bpc configuration they would like.

[How]
This patch adds support for getting and setting the amdgpu driver
specific "max bpc" property on the connector.

It also adds support for limiting the output bpc based on the property
value. The default limitation is the lowest value in the range, 8bpc.
This was the old value before the range was uncapped.

This patch should be updated/replaced later once common drm support
for max bpc lands.

Bugzilla: https://bugs.freedesktop.org/108542
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=201585
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=200645
Fixes: e03fd3f300f6 ("drm/amd/display: Do not limit color depth to 8bpc")

v2: rebase on upstream (Alex)

Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c    | 16 ++++++++++++++++
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h    |  1 +
 2 files changed, 17 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index c1262f62cd9f2..ca925200fe092 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -2358,8 +2358,15 @@ static void update_stream_scaling_settings(const struct drm_display_mode *mode,
 static enum dc_color_depth
 convert_color_depth_from_display_info(const struct drm_connector *connector)
 {
+	struct dm_connector_state *dm_conn_state =
+		to_dm_connector_state(connector->state);
 	uint32_t bpc = connector->display_info.bpc;
 
+	/* TODO: Remove this when there's support for max_bpc in drm */
+	if (dm_conn_state && bpc > dm_conn_state->max_bpc)
+		/* Round down to nearest even number. */
+		bpc = dm_conn_state->max_bpc - (dm_conn_state->max_bpc & 1);
+
 	switch (bpc) {
 	case 0:
 		/*
@@ -2943,6 +2950,9 @@ int amdgpu_dm_connector_atomic_set_property(struct drm_connector *connector,
 	} else if (property == adev->mode_info.underscan_property) {
 		dm_new_state->underscan_enable = val;
 		ret = 0;
+	} else if (property == adev->mode_info.max_bpc_property) {
+		dm_new_state->max_bpc = val;
+		ret = 0;
 	}
 
 	return ret;
@@ -2985,6 +2995,9 @@ int amdgpu_dm_connector_atomic_get_property(struct drm_connector *connector,
 	} else if (property == adev->mode_info.underscan_property) {
 		*val = dm_state->underscan_enable;
 		ret = 0;
+	} else if (property == adev->mode_info.max_bpc_property) {
+		*val = dm_state->max_bpc;
+		ret = 0;
 	}
 	return ret;
 }
@@ -3795,6 +3808,9 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm,
 	drm_object_attach_property(&aconnector->base.base,
 				adev->mode_info.underscan_vborder_property,
 				0);
+	drm_object_attach_property(&aconnector->base.base,
+				adev->mode_info.max_bpc_property,
+				0);
 
 }
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 924a38a1fc446..6e069d777ab22 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -204,6 +204,7 @@ struct dm_connector_state {
 	enum amdgpu_rmx_type scaling;
 	uint8_t underscan_vborder;
 	uint8_t underscan_hborder;
+	uint8_t max_bpc;
 	bool underscan_enable;
 	bool freesync_enable;
 	bool freesync_capable;
-- 
GitLab


From 1229ace4a4a2e2c982a32fb075dc1bf95423924f Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Mon, 12 Nov 2018 22:18:01 +0000
Subject: [PATCH 1127/1890] MIPS: Loongson3,SGI-IP27: Simplify max_low_pfn
 calculation

Both the Loongson3 & SGI-IP27 platforms set max_low_pfn to the last
available PFN describing memory. They both do it in paging_init() which
is later than ideal since max_low_pfn is used before that function is
called. Simplify both platforms to trivially initialize max_low_pfn
using the end address of DRAM, and do it earlier in prom_meminit().

Signed-off-by: Paul Burton <paul.burton@mips.com>
Suggested-by: Mike Rapoport <rppt@linux.ibm.com>
Tested-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Patchwork: https://patchwork.linux-mips.org/patch/21104/
References: https://patchwork.linux-mips.org/patch/21031/
Cc: Huacai Chen <chenhc@lemote.com>
Cc: Mike Rapoport <rppt@linux.ibm.com>
Cc: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Cc: linux-mips@linux-mips.org
---
 arch/mips/loongson64/loongson-3/numa.c | 12 ++----------
 arch/mips/sgi-ip27/ip27-memory.c       | 11 +----------
 2 files changed, 3 insertions(+), 20 deletions(-)

diff --git a/arch/mips/loongson64/loongson-3/numa.c b/arch/mips/loongson64/loongson-3/numa.c
index 622761878cd11..60bf0a1cb7571 100644
--- a/arch/mips/loongson64/loongson-3/numa.c
+++ b/arch/mips/loongson64/loongson-3/numa.c
@@ -231,6 +231,8 @@ static __init void prom_meminit(void)
 			cpumask_clear(&__node_data[(node)]->cpumask);
 		}
 	}
+	max_low_pfn = PHYS_PFN(memblock_end_of_DRAM());
+
 	for (cpu = 0; cpu < loongson_sysconf.nr_cpus; cpu++) {
 		node = cpu / loongson_sysconf.cores_per_node;
 		if (node >= num_online_nodes())
@@ -248,19 +250,9 @@ static __init void prom_meminit(void)
 
 void __init paging_init(void)
 {
-	unsigned node;
 	unsigned long zones_size[MAX_NR_ZONES] = {0, };
 
 	pagetable_init();
-
-	for_each_online_node(node) {
-		unsigned long  start_pfn, end_pfn;
-
-		get_pfn_range_for_nid(node, &start_pfn, &end_pfn);
-
-		if (end_pfn > max_low_pfn)
-			max_low_pfn = end_pfn;
-	}
 #ifdef CONFIG_ZONE_DMA32
 	zones_size[ZONE_DMA32] = MAX_DMA32_PFN;
 #endif
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c
index d8b8444d67952..813d13f92957e 100644
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -435,6 +435,7 @@ void __init prom_meminit(void)
 
 	mlreset();
 	szmem();
+	max_low_pfn = PHYS_PFN(memblock_end_of_DRAM());
 
 	for (node = 0; node < MAX_COMPACT_NODES; node++) {
 		if (node_online(node)) {
@@ -455,18 +456,8 @@ extern void setup_zero_pages(void);
 void __init paging_init(void)
 {
 	unsigned long zones_size[MAX_NR_ZONES] = {0, };
-	unsigned node;
 
 	pagetable_init();
-
-	for_each_online_node(node) {
-		unsigned long start_pfn, end_pfn;
-
-		get_pfn_range_for_nid(node, &start_pfn, &end_pfn);
-
-		if (end_pfn > max_low_pfn)
-			max_low_pfn = end_pfn;
-	}
 	zones_size[ZONE_NORMAL] = max_low_pfn;
 	free_area_init_nodes(zones_size);
 }
-- 
GitLab


From bd85fbc2038a1bbe84990b23ff69b6fc81a32b2c Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Thu, 15 Nov 2018 18:05:13 +0200
Subject: [PATCH 1128/1890] net/mlx4_core: Zero out lkey field in SW2HW_MPT fw
 command

When re-registering a user mr, the mpt information for the
existing mr when running SRIOV is obtained via the QUERY_MPT
fw command. The returned information includes the mpt's lkey.

This retrieved mpt information is used to move the mpt back
to hardware ownership in the rereg flow (via the SW2HW_MPT
fw command when running SRIOV).

The fw API spec states that for SW2HW_MPT, the lkey field
must be zero. Any ConnectX-3 PF driver which checks for strict spec
adherence will return failure for SW2HW_MPT if the lkey field is not
zero (although the fw in practice ignores this field for SW2HW_MPT).

Thus, in order to conform to the fw API spec, set the lkey field to zero
before invoking SW2HW_MPT when running SRIOV.

Fixes: e630664c8383 ("mlx4_core: Add helper functions to support MR re-registration")
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/mr.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c
index 2e84f10f59ba9..1a11bc0e16123 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mr.c
@@ -363,6 +363,7 @@ int mlx4_mr_hw_write_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr,
 			container_of((void *)mpt_entry, struct mlx4_cmd_mailbox,
 				     buf);
 
+		(*mpt_entry)->lkey = 0;
 		err = mlx4_SW2HW_MPT(dev, mailbox, key);
 	}
 
-- 
GitLab


From 3ea7e7ea53c9f6ee41cb69a29c375fe9dd9a56a7 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Thu, 15 Nov 2018 18:05:14 +0200
Subject: [PATCH 1129/1890] net/mlx4_core: Fix uninitialized variable
 compilation warning

Initialize the uid variable to zero to avoid the compilation warning.

Fixes: 7a89399ffad7 ("net/mlx4: Add mlx4_bitmap zone allocator")
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/alloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c
index deef5a998985a..9af34e03892c1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/alloc.c
+++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c
@@ -337,7 +337,7 @@ void mlx4_zone_allocator_destroy(struct mlx4_zone_allocator *zone_alloc)
 static u32 __mlx4_alloc_from_zone(struct mlx4_zone_entry *zone, int count,
 				  int align, u32 skip_mask, u32 *puid)
 {
-	u32 uid;
+	u32 uid = 0;
 	u32 res;
 	struct mlx4_zone_allocator *zone_alloc = zone->allocator;
 	struct mlx4_zone_entry *curr_node;
-- 
GitLab


From a463146e67c848cbab5ce706d6528281b7cded08 Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@mellanox.com>
Date: Thu, 15 Nov 2018 18:05:15 +0200
Subject: [PATCH 1130/1890] net/mlx4: Fix UBSAN warning of signed integer
 overflow

UBSAN: Undefined behavior in
drivers/net/ethernet/mellanox/mlx4/resource_tracker.c:626:29
signed integer overflow: 1802201963 + 1802201963 cannot be represented
in type 'int'

The union of res_reserved and res_port_rsvd[MLX4_MAX_PORTS] monitors
granting of reserved resources. The grant operation is calculated and
protected, thus both members of the union cannot be negative.  Changed
type of res_reserved and of res_port_rsvd[MLX4_MAX_PORTS] from signed
int to unsigned int, allowing large value.

Fixes: 5a0d0a6161ae ("mlx4: Structures and init/teardown for VF resource quotas")
Signed-off-by: Aya Levin <ayal@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/mlx4.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index ebcd2778eeb3e..23f1b5b512c21 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -540,8 +540,8 @@ struct slave_list {
 struct resource_allocator {
 	spinlock_t alloc_lock; /* protect quotas */
 	union {
-		int res_reserved;
-		int res_port_rsvd[MLX4_MAX_PORTS];
+		unsigned int res_reserved;
+		unsigned int res_port_rsvd[MLX4_MAX_PORTS];
 	};
 	union {
 		int res_free;
-- 
GitLab


From 91b9deefedf4c35a01027ce38bed7299605026a3 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Thu, 15 Nov 2018 15:59:39 -0800
Subject: [PATCH 1131/1890] spi: omap2-mcspi: Add missing suspend and resume
 calls

I've been wondering still about omap2-mcspi related suspend and resume
flakeyness and looks like we're missing calls to spi_master_suspend()
and spi_master_resume(). Adding those and using pm_runtime_force_suspend()
and pm_runtime_force_resume() makes things work for suspend and resume
and allows us to stop using noirq suspend and resume.

And while at it, let's use SET_SYSTEM_SLEEP_PM_OPS to simplify things
further.

Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-omap2-mcspi.c | 37 +++++++++++++++++++++++------------
 1 file changed, 25 insertions(+), 12 deletions(-)

diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c
index f024c3fc3679d..2fd8881fcd65c 100644
--- a/drivers/spi/spi-omap2-mcspi.c
+++ b/drivers/spi/spi-omap2-mcspi.c
@@ -1540,13 +1540,26 @@ static int omap2_mcspi_remove(struct platform_device *pdev)
 /* work with hotplug and coldplug */
 MODULE_ALIAS("platform:omap2_mcspi");
 
-#ifdef	CONFIG_SUSPEND
-static int omap2_mcspi_suspend_noirq(struct device *dev)
+static int __maybe_unused omap2_mcspi_suspend(struct device *dev)
 {
-	return pinctrl_pm_select_sleep_state(dev);
+	struct spi_master *master = dev_get_drvdata(dev);
+	struct omap2_mcspi *mcspi = spi_master_get_devdata(master);
+	int error;
+
+	error = pinctrl_pm_select_sleep_state(dev);
+	if (error)
+		dev_warn(mcspi->dev, "%s: failed to set pins: %i\n",
+			 __func__, error);
+
+	error = spi_master_suspend(master);
+	if (error)
+		dev_warn(mcspi->dev, "%s: master suspend failed: %i\n",
+			 __func__, error);
+
+	return pm_runtime_force_suspend(dev);
 }
 
-static int omap2_mcspi_resume_noirq(struct device *dev)
+static int __maybe_unused omap2_mcspi_resume(struct device *dev)
 {
 	struct spi_master *master = dev_get_drvdata(dev);
 	struct omap2_mcspi *mcspi = spi_master_get_devdata(master);
@@ -1557,17 +1570,17 @@ static int omap2_mcspi_resume_noirq(struct device *dev)
 		dev_warn(mcspi->dev, "%s: failed to set pins: %i\n",
 			 __func__, error);
 
-	return 0;
-}
+	error = spi_master_resume(master);
+	if (error)
+		dev_warn(mcspi->dev, "%s: master resume failed: %i\n",
+			 __func__, error);
 
-#else
-#define omap2_mcspi_suspend_noirq	NULL
-#define omap2_mcspi_resume_noirq	NULL
-#endif
+	return pm_runtime_force_resume(dev);
+}
 
 static const struct dev_pm_ops omap2_mcspi_pm_ops = {
-	.suspend_noirq = omap2_mcspi_suspend_noirq,
-	.resume_noirq = omap2_mcspi_resume_noirq,
+	SET_SYSTEM_SLEEP_PM_OPS(omap2_mcspi_suspend,
+				omap2_mcspi_resume)
 	.runtime_resume	= omap_mcspi_runtime_resume,
 };
 
-- 
GitLab


From a97b9565338350d70d8d971c4ee6f0d4fa967418 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Thu, 15 Nov 2018 16:15:20 -0800
Subject: [PATCH 1132/1890] drivers/net/ethernet/qlogic/qed/qed_rdma.h: fix
 typo

Add missing semicolon.

Fixes: 291d57f67d244973 ("qed: Fix rdma_info structure allocation")
Cc: Michal Kalderon <michal.kalderon@cavium.com>
Cc: Denis Bolotin <denis.bolotin@cavium.com>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_rdma.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.h b/drivers/net/ethernet/qlogic/qed/qed_rdma.h
index 50d609c0e108d..3689fe3e59354 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_rdma.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.h
@@ -183,7 +183,7 @@ void qed_rdma_info_free(struct qed_hwfn *p_hwfn);
 static inline void qed_rdma_dpm_conf(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) {}
 static inline void qed_rdma_dpm_bar(struct qed_hwfn *p_hwfn,
 				    struct qed_ptt *p_ptt) {}
-static inline int qed_rdma_info_alloc(struct qed_hwfn *p_hwfn) {return -EINVAL}
+static inline int qed_rdma_info_alloc(struct qed_hwfn *p_hwfn) {return -EINVAL;}
 static inline void qed_rdma_info_free(struct qed_hwfn *p_hwfn) {}
 #endif
 
-- 
GitLab


From 5c074eeabbd332b11559f7fc1e89d456f94801fb Mon Sep 17 00:00:00 2001
From: Gerd Hoffmann <kraxel@redhat.com>
Date: Wed, 14 Nov 2018 13:20:29 +0100
Subject: [PATCH 1133/1890] udmabuf: set read/write flag when exporting

Otherwise, mmap fails when done with PROT_WRITE.

Suggested-by: Gurchetan Singh <gurchetansingh@chromium.org>
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Gurchetan Singh <gurchetansingh@chromium.org>
Tested-by: Gurchetan Singh <gurchetansingh@chromium.org>
Link: http://patchwork.freedesktop.org/patch/msgid/20181114122029.16766-1-kraxel@redhat.com
---
 drivers/dma-buf/udmabuf.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
index 5b44ef226904f..fc359ca4503d1 100644
--- a/drivers/dma-buf/udmabuf.c
+++ b/drivers/dma-buf/udmabuf.c
@@ -184,6 +184,7 @@ static long udmabuf_create(const struct udmabuf_create_list *head,
 	exp_info.ops  = &udmabuf_ops;
 	exp_info.size = ubuf->pagecount << PAGE_SHIFT;
 	exp_info.priv = ubuf;
+	exp_info.flags = O_RDWR;
 
 	buf = dma_buf_export(&exp_info);
 	if (IS_ERR(buf)) {
-- 
GitLab


From 0145b50566e7de5637e80ecba96c7f0e6fff1aad Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 31 Oct 2018 15:20:05 +0100
Subject: [PATCH 1134/1890] iio/hid-sensors: Fix IIO_CHAN_INFO_RAW returning
 wrong values for signed numbers

Before this commit sensor_hub_input_attr_get_raw_value() failed to take
the signedness of 16 and 8 bit values into account, returning e.g.
65436 instead of -100 for the z-axis reading of an accelerometer.

This commit adds a new is_signed parameter to the function and makes all
callers pass the appropriate value for this.

While at it, this commit also fixes up some neighboring lines where
statements were needlessly split over 2 lines to improve readability.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Acked-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/hid/hid-sensor-custom.c                  |  2 +-
 drivers/hid/hid-sensor-hub.c                     | 13 ++++++++++---
 drivers/iio/accel/hid-sensor-accel-3d.c          |  5 ++++-
 drivers/iio/gyro/hid-sensor-gyro-3d.c            |  5 ++++-
 drivers/iio/humidity/hid-sensor-humidity.c       |  3 ++-
 drivers/iio/light/hid-sensor-als.c               |  8 +++++---
 drivers/iio/light/hid-sensor-prox.c              |  8 +++++---
 drivers/iio/magnetometer/hid-sensor-magn-3d.c    |  8 +++++---
 drivers/iio/orientation/hid-sensor-incl-3d.c     |  8 +++++---
 drivers/iio/pressure/hid-sensor-press.c          |  8 +++++---
 drivers/iio/temperature/hid-sensor-temperature.c |  3 ++-
 drivers/rtc/rtc-hid-sensor-time.c                |  2 +-
 include/linux/hid-sensor-hub.h                   |  4 +++-
 13 files changed, 52 insertions(+), 25 deletions(-)

diff --git a/drivers/hid/hid-sensor-custom.c b/drivers/hid/hid-sensor-custom.c
index e8a114157f87b..bb012bc032e02 100644
--- a/drivers/hid/hid-sensor-custom.c
+++ b/drivers/hid/hid-sensor-custom.c
@@ -358,7 +358,7 @@ static ssize_t show_value(struct device *dev, struct device_attribute *attr,
 						sensor_inst->hsdev,
 						sensor_inst->hsdev->usage,
 						usage, report_id,
-						SENSOR_HUB_SYNC);
+						SENSOR_HUB_SYNC, false);
 	} else if (!strncmp(name, "units", strlen("units")))
 		value = sensor_inst->fields[field_index].attribute.units;
 	else if (!strncmp(name, "unit-expo", strlen("unit-expo")))
diff --git a/drivers/hid/hid-sensor-hub.c b/drivers/hid/hid-sensor-hub.c
index 2b63487057c25..4256fdc5cd6d5 100644
--- a/drivers/hid/hid-sensor-hub.c
+++ b/drivers/hid/hid-sensor-hub.c
@@ -299,7 +299,8 @@ EXPORT_SYMBOL_GPL(sensor_hub_get_feature);
 int sensor_hub_input_attr_get_raw_value(struct hid_sensor_hub_device *hsdev,
 					u32 usage_id,
 					u32 attr_usage_id, u32 report_id,
-					enum sensor_hub_read_flags flag)
+					enum sensor_hub_read_flags flag,
+					bool is_signed)
 {
 	struct sensor_hub_data *data = hid_get_drvdata(hsdev->hdev);
 	unsigned long flags;
@@ -331,10 +332,16 @@ int sensor_hub_input_attr_get_raw_value(struct hid_sensor_hub_device *hsdev,
 						&hsdev->pending.ready, HZ*5);
 		switch (hsdev->pending.raw_size) {
 		case 1:
-			ret_val = *(u8 *)hsdev->pending.raw_data;
+			if (is_signed)
+				ret_val = *(s8 *)hsdev->pending.raw_data;
+			else
+				ret_val = *(u8 *)hsdev->pending.raw_data;
 			break;
 		case 2:
-			ret_val = *(u16 *)hsdev->pending.raw_data;
+			if (is_signed)
+				ret_val = *(s16 *)hsdev->pending.raw_data;
+			else
+				ret_val = *(u16 *)hsdev->pending.raw_data;
 			break;
 		case 4:
 			ret_val = *(u32 *)hsdev->pending.raw_data;
diff --git a/drivers/iio/accel/hid-sensor-accel-3d.c b/drivers/iio/accel/hid-sensor-accel-3d.c
index 41d97faf50138..38ff374a3ca45 100644
--- a/drivers/iio/accel/hid-sensor-accel-3d.c
+++ b/drivers/iio/accel/hid-sensor-accel-3d.c
@@ -149,6 +149,7 @@ static int accel_3d_read_raw(struct iio_dev *indio_dev,
 	int report_id = -1;
 	u32 address;
 	int ret_type;
+	s32 min;
 	struct hid_sensor_hub_device *hsdev =
 					accel_state->common_attributes.hsdev;
 
@@ -158,12 +159,14 @@ static int accel_3d_read_raw(struct iio_dev *indio_dev,
 	case IIO_CHAN_INFO_RAW:
 		hid_sensor_power_state(&accel_state->common_attributes, true);
 		report_id = accel_state->accel[chan->scan_index].report_id;
+		min = accel_state->accel[chan->scan_index].logical_minimum;
 		address = accel_3d_addresses[chan->scan_index];
 		if (report_id >= 0)
 			*val = sensor_hub_input_attr_get_raw_value(
 					accel_state->common_attributes.hsdev,
 					hsdev->usage, address, report_id,
-					SENSOR_HUB_SYNC);
+					SENSOR_HUB_SYNC,
+					min < 0);
 		else {
 			*val = 0;
 			hid_sensor_power_state(&accel_state->common_attributes,
diff --git a/drivers/iio/gyro/hid-sensor-gyro-3d.c b/drivers/iio/gyro/hid-sensor-gyro-3d.c
index 36941e69f9595..88e857c4baf45 100644
--- a/drivers/iio/gyro/hid-sensor-gyro-3d.c
+++ b/drivers/iio/gyro/hid-sensor-gyro-3d.c
@@ -111,6 +111,7 @@ static int gyro_3d_read_raw(struct iio_dev *indio_dev,
 	int report_id = -1;
 	u32 address;
 	int ret_type;
+	s32 min;
 
 	*val = 0;
 	*val2 = 0;
@@ -118,13 +119,15 @@ static int gyro_3d_read_raw(struct iio_dev *indio_dev,
 	case IIO_CHAN_INFO_RAW:
 		hid_sensor_power_state(&gyro_state->common_attributes, true);
 		report_id = gyro_state->gyro[chan->scan_index].report_id;
+		min = gyro_state->gyro[chan->scan_index].logical_minimum;
 		address = gyro_3d_addresses[chan->scan_index];
 		if (report_id >= 0)
 			*val = sensor_hub_input_attr_get_raw_value(
 					gyro_state->common_attributes.hsdev,
 					HID_USAGE_SENSOR_GYRO_3D, address,
 					report_id,
-					SENSOR_HUB_SYNC);
+					SENSOR_HUB_SYNC,
+					min < 0);
 		else {
 			*val = 0;
 			hid_sensor_power_state(&gyro_state->common_attributes,
diff --git a/drivers/iio/humidity/hid-sensor-humidity.c b/drivers/iio/humidity/hid-sensor-humidity.c
index beab6d6fd6e18..4bc95f31c730e 100644
--- a/drivers/iio/humidity/hid-sensor-humidity.c
+++ b/drivers/iio/humidity/hid-sensor-humidity.c
@@ -75,7 +75,8 @@ static int humidity_read_raw(struct iio_dev *indio_dev,
 				HID_USAGE_SENSOR_HUMIDITY,
 				HID_USAGE_SENSOR_ATMOSPHERIC_HUMIDITY,
 				humid_st->humidity_attr.report_id,
-				SENSOR_HUB_SYNC);
+				SENSOR_HUB_SYNC,
+				humid_st->humidity_attr.logical_minimum < 0);
 		hid_sensor_power_state(&humid_st->common_attributes, false);
 
 		return IIO_VAL_INT;
diff --git a/drivers/iio/light/hid-sensor-als.c b/drivers/iio/light/hid-sensor-als.c
index 406caaee9a3c5..94f33250ba5a6 100644
--- a/drivers/iio/light/hid-sensor-als.c
+++ b/drivers/iio/light/hid-sensor-als.c
@@ -93,6 +93,7 @@ static int als_read_raw(struct iio_dev *indio_dev,
 	int report_id = -1;
 	u32 address;
 	int ret_type;
+	s32 min;
 
 	*val = 0;
 	*val2 = 0;
@@ -102,8 +103,8 @@ static int als_read_raw(struct iio_dev *indio_dev,
 		case  CHANNEL_SCAN_INDEX_INTENSITY:
 		case  CHANNEL_SCAN_INDEX_ILLUM:
 			report_id = als_state->als_illum.report_id;
-			address =
-			HID_USAGE_SENSOR_LIGHT_ILLUM;
+			min = als_state->als_illum.logical_minimum;
+			address = HID_USAGE_SENSOR_LIGHT_ILLUM;
 			break;
 		default:
 			report_id = -1;
@@ -116,7 +117,8 @@ static int als_read_raw(struct iio_dev *indio_dev,
 					als_state->common_attributes.hsdev,
 					HID_USAGE_SENSOR_ALS, address,
 					report_id,
-					SENSOR_HUB_SYNC);
+					SENSOR_HUB_SYNC,
+					min < 0);
 			hid_sensor_power_state(&als_state->common_attributes,
 						false);
 		} else {
diff --git a/drivers/iio/light/hid-sensor-prox.c b/drivers/iio/light/hid-sensor-prox.c
index 45107f7537b5d..cf5a0c242609d 100644
--- a/drivers/iio/light/hid-sensor-prox.c
+++ b/drivers/iio/light/hid-sensor-prox.c
@@ -73,6 +73,7 @@ static int prox_read_raw(struct iio_dev *indio_dev,
 	int report_id = -1;
 	u32 address;
 	int ret_type;
+	s32 min;
 
 	*val = 0;
 	*val2 = 0;
@@ -81,8 +82,8 @@ static int prox_read_raw(struct iio_dev *indio_dev,
 		switch (chan->scan_index) {
 		case  CHANNEL_SCAN_INDEX_PRESENCE:
 			report_id = prox_state->prox_attr.report_id;
-			address =
-			HID_USAGE_SENSOR_HUMAN_PRESENCE;
+			min = prox_state->prox_attr.logical_minimum;
+			address = HID_USAGE_SENSOR_HUMAN_PRESENCE;
 			break;
 		default:
 			report_id = -1;
@@ -95,7 +96,8 @@ static int prox_read_raw(struct iio_dev *indio_dev,
 				prox_state->common_attributes.hsdev,
 				HID_USAGE_SENSOR_PROX, address,
 				report_id,
-				SENSOR_HUB_SYNC);
+				SENSOR_HUB_SYNC,
+				min < 0);
 			hid_sensor_power_state(&prox_state->common_attributes,
 						false);
 		} else {
diff --git a/drivers/iio/magnetometer/hid-sensor-magn-3d.c b/drivers/iio/magnetometer/hid-sensor-magn-3d.c
index d55c4885211ad..f3c0d41e5a8c2 100644
--- a/drivers/iio/magnetometer/hid-sensor-magn-3d.c
+++ b/drivers/iio/magnetometer/hid-sensor-magn-3d.c
@@ -163,21 +163,23 @@ static int magn_3d_read_raw(struct iio_dev *indio_dev,
 	int report_id = -1;
 	u32 address;
 	int ret_type;
+	s32 min;
 
 	*val = 0;
 	*val2 = 0;
 	switch (mask) {
 	case IIO_CHAN_INFO_RAW:
 		hid_sensor_power_state(&magn_state->magn_flux_attributes, true);
-		report_id =
-			magn_state->magn[chan->address].report_id;
+		report_id = magn_state->magn[chan->address].report_id;
+		min = magn_state->magn[chan->address].logical_minimum;
 		address = magn_3d_addresses[chan->address];
 		if (report_id >= 0)
 			*val = sensor_hub_input_attr_get_raw_value(
 				magn_state->magn_flux_attributes.hsdev,
 				HID_USAGE_SENSOR_COMPASS_3D, address,
 				report_id,
-				SENSOR_HUB_SYNC);
+				SENSOR_HUB_SYNC,
+				min < 0);
 		else {
 			*val = 0;
 			hid_sensor_power_state(
diff --git a/drivers/iio/orientation/hid-sensor-incl-3d.c b/drivers/iio/orientation/hid-sensor-incl-3d.c
index 1e5451d1ff884..bdc5e4554ee48 100644
--- a/drivers/iio/orientation/hid-sensor-incl-3d.c
+++ b/drivers/iio/orientation/hid-sensor-incl-3d.c
@@ -111,21 +111,23 @@ static int incl_3d_read_raw(struct iio_dev *indio_dev,
 	int report_id = -1;
 	u32 address;
 	int ret_type;
+	s32 min;
 
 	*val = 0;
 	*val2 = 0;
 	switch (mask) {
 	case IIO_CHAN_INFO_RAW:
 		hid_sensor_power_state(&incl_state->common_attributes, true);
-		report_id =
-			incl_state->incl[chan->scan_index].report_id;
+		report_id = incl_state->incl[chan->scan_index].report_id;
+		min = incl_state->incl[chan->scan_index].logical_minimum;
 		address = incl_3d_addresses[chan->scan_index];
 		if (report_id >= 0)
 			*val = sensor_hub_input_attr_get_raw_value(
 				incl_state->common_attributes.hsdev,
 				HID_USAGE_SENSOR_INCLINOMETER_3D, address,
 				report_id,
-				SENSOR_HUB_SYNC);
+				SENSOR_HUB_SYNC,
+				min < 0);
 		else {
 			hid_sensor_power_state(&incl_state->common_attributes,
 						false);
diff --git a/drivers/iio/pressure/hid-sensor-press.c b/drivers/iio/pressure/hid-sensor-press.c
index 4c437918f1d28..d7b1c00ceb4da 100644
--- a/drivers/iio/pressure/hid-sensor-press.c
+++ b/drivers/iio/pressure/hid-sensor-press.c
@@ -77,6 +77,7 @@ static int press_read_raw(struct iio_dev *indio_dev,
 	int report_id = -1;
 	u32 address;
 	int ret_type;
+	s32 min;
 
 	*val = 0;
 	*val2 = 0;
@@ -85,8 +86,8 @@ static int press_read_raw(struct iio_dev *indio_dev,
 		switch (chan->scan_index) {
 		case  CHANNEL_SCAN_INDEX_PRESSURE:
 			report_id = press_state->press_attr.report_id;
-			address =
-			HID_USAGE_SENSOR_ATMOSPHERIC_PRESSURE;
+			min = press_state->press_attr.logical_minimum;
+			address = HID_USAGE_SENSOR_ATMOSPHERIC_PRESSURE;
 			break;
 		default:
 			report_id = -1;
@@ -99,7 +100,8 @@ static int press_read_raw(struct iio_dev *indio_dev,
 				press_state->common_attributes.hsdev,
 				HID_USAGE_SENSOR_PRESSURE, address,
 				report_id,
-				SENSOR_HUB_SYNC);
+				SENSOR_HUB_SYNC,
+				min < 0);
 			hid_sensor_power_state(&press_state->common_attributes,
 						false);
 		} else {
diff --git a/drivers/iio/temperature/hid-sensor-temperature.c b/drivers/iio/temperature/hid-sensor-temperature.c
index beaf6fd3e337c..b592fc4f007e4 100644
--- a/drivers/iio/temperature/hid-sensor-temperature.c
+++ b/drivers/iio/temperature/hid-sensor-temperature.c
@@ -76,7 +76,8 @@ static int temperature_read_raw(struct iio_dev *indio_dev,
 			HID_USAGE_SENSOR_TEMPERATURE,
 			HID_USAGE_SENSOR_DATA_ENVIRONMENTAL_TEMPERATURE,
 			temp_st->temperature_attr.report_id,
-			SENSOR_HUB_SYNC);
+			SENSOR_HUB_SYNC,
+			temp_st->temperature_attr.logical_minimum < 0);
 		hid_sensor_power_state(
 				&temp_st->common_attributes,
 				false);
diff --git a/drivers/rtc/rtc-hid-sensor-time.c b/drivers/rtc/rtc-hid-sensor-time.c
index 2751dba850c61..3e1abb4554721 100644
--- a/drivers/rtc/rtc-hid-sensor-time.c
+++ b/drivers/rtc/rtc-hid-sensor-time.c
@@ -213,7 +213,7 @@ static int hid_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	/* get a report with all values through requesting one value */
 	sensor_hub_input_attr_get_raw_value(time_state->common_attributes.hsdev,
 			HID_USAGE_SENSOR_TIME, hid_time_addresses[0],
-			time_state->info[0].report_id, SENSOR_HUB_SYNC);
+			time_state->info[0].report_id, SENSOR_HUB_SYNC, false);
 	/* wait for all values (event) */
 	ret = wait_for_completion_killable_timeout(
 			&time_state->comp_last_time, HZ*6);
diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h
index 331dc377c2758..dc12f5c4b076c 100644
--- a/include/linux/hid-sensor-hub.h
+++ b/include/linux/hid-sensor-hub.h
@@ -177,6 +177,7 @@ int sensor_hub_input_get_attribute_info(struct hid_sensor_hub_device *hsdev,
 * @attr_usage_id:	Attribute usage id as per spec
 * @report_id:	Report id to look for
 * @flag:      Synchronous or asynchronous read
+* @is_signed:   If true then fields < 32 bits will be sign-extended
 *
 * Issues a synchronous or asynchronous read request for an input attribute.
 * Returns data upto 32 bits.
@@ -190,7 +191,8 @@ enum sensor_hub_read_flags {
 int sensor_hub_input_attr_get_raw_value(struct hid_sensor_hub_device *hsdev,
  					u32 usage_id,
  					u32 attr_usage_id, u32 report_id,
- 					enum sensor_hub_read_flags flag
+					enum sensor_hub_read_flags flag,
+					bool is_signed
 );
 
 /**
-- 
GitLab


From d987f783a4808ce0636f58e2412addd453cba88c Mon Sep 17 00:00:00 2001
From: Brian Norris <briannorris@chromium.org>
Date: Wed, 7 Nov 2018 16:40:35 -0800
Subject: [PATCH 1135/1890] ath10k: don't assume 'vif' is non-NULL in flush()

mac80211 may call us with vif == NULL, if the station is not currently
active (e.g., not associated). It is trivially easy to reproduce a crash
by suspending the system when not connected to an AP:

[   65.533934] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000
...
[   65.574521] pc : ath10k_flush+0x30/0xd0 [ath10k_core]
[   65.574538] lr : __ieee80211_flush_queues+0x180/0x244 [mac80211]
[   65.599680] Process kworker/u12:1 (pid: 57, stack limit = 0x(____ptrval____))
[   65.599682] Call trace:
[   65.599695]  ath10k_flush+0x30/0xd0 [ath10k_core]
[   65.642064]  __ieee80211_flush_queues+0x180/0x244 [mac80211]
[   65.642079]  ieee80211_flush_queues+0x34/0x40 [mac80211]
[   65.642095]  __ieee80211_suspend+0xfc/0x47c [mac80211]
[   65.658611]  ieee80211_suspend+0x30/0x3c [mac80211]
[   65.658627]  wiphy_suspend+0x15c/0x3a8 [cfg80211]
[   65.672810]  dpm_run_callback+0xf0/0x1f0
[   65.672814]  __device_suspend+0x3ac/0x4f8
[   65.672819]  async_suspend+0x34/0xbc
[   65.684096]  async_run_entry_fn+0x54/0x104
[   65.684099]  worker_thread+0x4cc/0x72c
[   65.684102]  kthread+0x134/0x13c
[   65.684105]  ret_from_fork+0x10/0x18

Fixes: 9de4162f0999 ("ath10k: add peer flush in ath10k_flush for STATION")
Signed-off-by: Brian Norris <briannorris@chromium.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/ath/ath10k/mac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index a1c2801ded101..7e49342bae384 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -6867,7 +6867,7 @@ static void ath10k_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 	u32 bitmap;
 
 	if (drop) {
-		if (vif->type == NL80211_IFTYPE_STATION) {
+		if (vif && vif->type == NL80211_IFTYPE_STATION) {
 			bitmap = ~(1 << WMI_MGMT_TID);
 			list_for_each_entry(arvif, &ar->arvifs, list) {
 				if (arvif->vdev_type == WMI_VDEV_TYPE_STA)
-- 
GitLab


From d1fe6ad6f6bd61c84788d3a7b11e459a439c6169 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
Date: Thu, 8 Nov 2018 16:08:29 +0100
Subject: [PATCH 1136/1890] brcmfmac: fix reporting support for 160 MHz
 channels
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Driver can report IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ so it's
important to provide valid & complete info about supported bands for
each channel. By default no support for 160 MHz should be assumed unless
firmware reports it for a given channel later.

This fixes info passed to the userspace. Without that change userspace
could try to use invalid channel and fail to start an interface.

Signed-off-by: RafaÅ‚ MiÅ‚ecki <rafal@milecki.pl>
Cc: stable@vger.kernel.org
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
index 230a378c26fcf..7f0a5bade70a6 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
@@ -6005,7 +6005,8 @@ static int brcmf_construct_chaninfo(struct brcmf_cfg80211_info *cfg,
 			 * for subsequent chanspecs.
 			 */
 			channel->flags = IEEE80211_CHAN_NO_HT40 |
-					 IEEE80211_CHAN_NO_80MHZ;
+					 IEEE80211_CHAN_NO_80MHZ |
+					 IEEE80211_CHAN_NO_160MHZ;
 			ch.bw = BRCMU_CHAN_BW_20;
 			cfg->d11inf.encchspec(&ch);
 			chaninfo = ch.chspec;
-- 
GitLab


From 1770f0fa978ed691a454fa9d3ba903d5a70630cc Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Date: Sat, 10 Nov 2018 12:03:09 +0100
Subject: [PATCH 1137/1890] mt76: fix uninitialized mutex access setting rts
 threshold

Fix following crash due to a leftover uninitialized mutex access
in mt76x2_set_rts_threshold routine.

[   31.018059] Call Trace:
[   31.018341]  register_lock_class+0x51f/0x530
[   31.018828]  __lock_acquire+0x6c/0x1580
[   31.019247]  lock_acquire+0x88/0x120
[   31.021089]  __mutex_lock+0x4a/0x4f0
[   31.023343]  mt76x2_set_rts_threshold+0x28/0x50
[   31.023831]  ieee80211_set_wiphy_params+0x16d/0x4e0
[   31.024344]  nl80211_set_wiphy+0x72b/0xbc0
[   31.024781]  genl_family_rcv_msg+0x192/0x3a0
[   31.025233]  genl_rcv_msg+0x42/0x89
[   31.026079]  netlink_rcv_skb+0x38/0x100
[   31.026475]  genl_rcv+0x1f/0x30
[   31.026804]  netlink_unicast+0x19c/0x250
[   31.027212]  netlink_sendmsg+0x1ed/0x390
[   31.027615]  sock_sendmsg+0x31/0x40
[   31.027973]  ___sys_sendmsg+0x23c/0x280
[   31.030414]  __sys_sendmsg+0x42/0x80
[   31.030783]  do_syscall_64+0x4a/0x170
[   31.031160]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
[   31.031677] RIP: 0033:0x7f3498b39ba7
[   31.033953] RSP: 002b:00007fffe19675b8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
[   31.034883] RAX: ffffffffffffffda RBX: 00000000012d5350 RCX: 00007f3498b39ba7
[   31.035756] RDX: 0000000000000000 RSI: 00007fffe19675f0 RDI: 0000000000000003
[   31.036587] RBP: 00000000012da740 R08: 0000000000000002 R09: 0000000000000000
[   31.037422] R10: 0000000000000006 R11: 0000000000000246 R12: 00000000012da880
[   31.038252] R13: 00007fffe19675f0 R14: 00007fffe19678c0 R15: 00000000012da880

Fixes: 108a4861ef19 ("mt76: create new mt76x02-lib module for common mt76x{0,2} code")
Reported-by: lorenzo.trisolini@fluidmesh.com
Reported-by: luca.bisti@fluidmesh.com
Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/mediatek/mt76/mt76x02.h         | 1 -
 drivers/net/wireless/mediatek/mt76/mt76x2/pci_main.c | 4 ++--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02.h b/drivers/net/wireless/mediatek/mt76/mt76x02.h
index 47c42c6079643..7806963b19052 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02.h
@@ -71,7 +71,6 @@ struct mt76x02_dev {
 	struct mac_address macaddr_list[8];
 
 	struct mutex phy_mutex;
-	struct mutex mutex;
 
 	u8 txdone_seq;
 	DECLARE_KFIFO_PTR(txstatus_fifo, struct mt76x02_tx_status);
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci_main.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci_main.c
index 034a062956681..3f001bd6806ce 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci_main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci_main.c
@@ -272,9 +272,9 @@ mt76x2_set_rts_threshold(struct ieee80211_hw *hw, u32 val)
 	if (val != ~0 && val > 0xffff)
 		return -EINVAL;
 
-	mutex_lock(&dev->mutex);
+	mutex_lock(&dev->mt76.mutex);
 	mt76x2_mac_set_tx_protection(dev, val);
-	mutex_unlock(&dev->mutex);
+	mutex_unlock(&dev->mt76.mutex);
 
 	return 0;
 }
-- 
GitLab


From 243cfe3fb8978c7eec24511aba7dac98819ed896 Mon Sep 17 00:00:00 2001
From: Vadim Pasternak <vadimp@mellanox.com>
Date: Fri, 16 Nov 2018 13:47:11 +0000
Subject: [PATCH 1138/1890] hwmon: (mlxreg-fan) Fix macros for tacho fault
 reading

Fix macros for tacometer fault reading.
This fix is relevant for three Mellanox systems MQMB7, MSN37, MSN34,
which are about to be released to the customers.
At the moment, none of them is at customers sites.

Fixes: 65afb4c8e7e4 ("hwmon: (mlxreg-fan) Add support for Mellanox FAN driver")
Signed-off-by: Vadim Pasternak <vadimp@mellanox.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/mlxreg-fan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/mlxreg-fan.c b/drivers/hwmon/mlxreg-fan.c
index de46577c7d5a1..d8fa4bea4bc84 100644
--- a/drivers/hwmon/mlxreg-fan.c
+++ b/drivers/hwmon/mlxreg-fan.c
@@ -51,7 +51,7 @@
  */
 #define MLXREG_FAN_GET_RPM(rval, d, s)	(DIV_ROUND_CLOSEST(15000000 * 100, \
 					 ((rval) + (s)) * (d)))
-#define MLXREG_FAN_GET_FAULT(val, mask) (!!((val) ^ (mask)))
+#define MLXREG_FAN_GET_FAULT(val, mask) (!((val) ^ (mask)))
 #define MLXREG_FAN_PWM_DUTY2STATE(duty)	(DIV_ROUND_CLOSEST((duty) *	\
 					 MLXREG_FAN_MAX_STATE,		\
 					 MLXREG_FAN_MAX_DUTY))
-- 
GitLab


From c26b5aa8ef0d46035060fded475e6ab957b9f69f Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Sun, 11 Nov 2018 11:15:21 +0000
Subject: [PATCH 1139/1890] gfs2: Fix iomap buffer head reference counting bug

GFS2 passes the inode buffer head (dibh) from gfs2_iomap_begin to
gfs2_iomap_end in iomap->private.  It sets that private pointer in
gfs2_iomap_get.  Users of gfs2_iomap_get other than gfs2_iomap_begin
would have to release iomap->private, but this isn't done correctly,
leading to a leak of buffer head references.

To fix this, move the code for setting iomap->private from
gfs2_iomap_get to gfs2_iomap_begin.

Fixes: 64bc06bb32 ("gfs2: iomap buffered write support")
Cc: stable@vger.kernel.org # v4.19+
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/gfs2/bmap.c | 40 +++++++++++++++++-----------------------
 1 file changed, 17 insertions(+), 23 deletions(-)

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 38d88fcb69883..0d643306c255b 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -826,7 +826,7 @@ static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
 	ret = gfs2_meta_inode_buffer(ip, &dibh);
 	if (ret)
 		goto unlock;
-	iomap->private = dibh;
+	mp->mp_bh[0] = dibh;
 
 	if (gfs2_is_stuffed(ip)) {
 		if (flags & IOMAP_WRITE) {
@@ -863,9 +863,6 @@ static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
 	len = lblock_stop - lblock + 1;
 	iomap->length = len << inode->i_blkbits;
 
-	get_bh(dibh);
-	mp->mp_bh[0] = dibh;
-
 	height = ip->i_height;
 	while ((lblock + 1) * sdp->sd_sb.sb_bsize > sdp->sd_heightsize[height])
 		height++;
@@ -898,8 +895,6 @@ static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
 	iomap->bdev = inode->i_sb->s_bdev;
 unlock:
 	up_read(&ip->i_rw_mutex);
-	if (ret && dibh)
-		brelse(dibh);
 	return ret;
 
 do_alloc:
@@ -980,9 +975,9 @@ static void gfs2_iomap_journaled_page_done(struct inode *inode, loff_t pos,
 
 static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
 				  loff_t length, unsigned flags,
-				  struct iomap *iomap)
+				  struct iomap *iomap,
+				  struct metapath *mp)
 {
-	struct metapath mp = { .mp_aheight = 1, };
 	struct gfs2_inode *ip = GFS2_I(inode);
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
@@ -996,9 +991,9 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
 	unstuff = gfs2_is_stuffed(ip) &&
 		  pos + length > gfs2_max_stuffed_size(ip);
 
-	ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
+	ret = gfs2_iomap_get(inode, pos, length, flags, iomap, mp);
 	if (ret)
-		goto out_release;
+		goto out_unlock;
 
 	alloc_required = unstuff || iomap->type == IOMAP_HOLE;
 
@@ -1013,7 +1008,7 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
 
 		ret = gfs2_quota_lock_check(ip, &ap);
 		if (ret)
-			goto out_release;
+			goto out_unlock;
 
 		ret = gfs2_inplace_reserve(ip, &ap);
 		if (ret)
@@ -1038,17 +1033,15 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
 		ret = gfs2_unstuff_dinode(ip, NULL);
 		if (ret)
 			goto out_trans_end;
-		release_metapath(&mp);
-		brelse(iomap->private);
-		iomap->private = NULL;
+		release_metapath(mp);
 		ret = gfs2_iomap_get(inode, iomap->offset, iomap->length,
-				     flags, iomap, &mp);
+				     flags, iomap, mp);
 		if (ret)
 			goto out_trans_end;
 	}
 
 	if (iomap->type == IOMAP_HOLE) {
-		ret = gfs2_iomap_alloc(inode, iomap, flags, &mp);
+		ret = gfs2_iomap_alloc(inode, iomap, flags, mp);
 		if (ret) {
 			gfs2_trans_end(sdp);
 			gfs2_inplace_release(ip);
@@ -1056,7 +1049,6 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
 			goto out_qunlock;
 		}
 	}
-	release_metapath(&mp);
 	if (gfs2_is_jdata(ip))
 		iomap->page_done = gfs2_iomap_journaled_page_done;
 	return 0;
@@ -1069,10 +1061,7 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
 out_qunlock:
 	if (alloc_required)
 		gfs2_quota_unlock(ip);
-out_release:
-	if (iomap->private)
-		brelse(iomap->private);
-	release_metapath(&mp);
+out_unlock:
 	gfs2_write_unlock(inode);
 	return ret;
 }
@@ -1088,10 +1077,10 @@ static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
 
 	trace_gfs2_iomap_start(ip, pos, length, flags);
 	if ((flags & IOMAP_WRITE) && !(flags & IOMAP_DIRECT)) {
-		ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap);
+		ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap, &mp);
 	} else {
 		ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
-		release_metapath(&mp);
+
 		/*
 		 * Silently fall back to buffered I/O for stuffed files or if
 		 * we've hot a hole (see gfs2_file_direct_write).
@@ -1100,6 +1089,11 @@ static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
 		    iomap->type != IOMAP_MAPPED)
 			ret = -ENOTBLK;
 	}
+	if (!ret) {
+		get_bh(mp.mp_bh[0]);
+		iomap->private = mp.mp_bh[0];
+	}
+	release_metapath(&mp);
 	trace_gfs2_iomap_end(ip, iomap, ret);
 	return ret;
 }
-- 
GitLab


From 44a4a66b619a0a83a52e707ebcd80182207bd50e Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Mon, 5 Nov 2018 10:53:09 -0500
Subject: [PATCH 1140/1890] XArray: Correct xa_store_range

The explicit '64' should have been BITS_PER_LONG, but while looking at
this code I realised I meant to use __ffs(), not ilog2().

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 lib/xarray.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/lib/xarray.c b/lib/xarray.c
index 7946380cd6c93..bbacca5765936 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -1565,8 +1565,9 @@ void *xa_store_range(struct xarray *xa, unsigned long first,
 	do {
 		xas_lock(&xas);
 		if (entry) {
-			unsigned int order = (last == ~0UL) ? 64 :
-						ilog2(last + 1);
+			unsigned int order = BITS_PER_LONG;
+			if (last + 1)
+				order = __ffs(last + 1);
 			xas_set_order(&xas, last, order);
 			xas_create(&xas);
 			if (xas_error(&xas))
-- 
GitLab


From 5404a7f1c21cfda061712bedf2d06cc0f6c755e9 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Mon, 5 Nov 2018 09:34:04 -0500
Subject: [PATCH 1141/1890] XArray tests: Correct some 64-bit assumptions

The test-suite caught these two mistakes when compiled for 32-bit.
I had only been running the test-suite in 64-bit mode.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 lib/test_xarray.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index e5294b20b52fd..5f9c14e975a40 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -504,7 +504,7 @@ static noinline void check_multi_store(struct xarray *xa)
 	rcu_read_unlock();
 
 	/* We can erase multiple values with a single store */
-	xa_store_order(xa, 0, 63, NULL, GFP_KERNEL);
+	xa_store_order(xa, 0, BITS_PER_LONG - 1, NULL, GFP_KERNEL);
 	XA_BUG_ON(xa, !xa_empty(xa));
 
 	/* Even when the first slot is empty but the others aren't */
@@ -1101,7 +1101,7 @@ static noinline void check_store_range(struct xarray *xa)
 			__check_store_range(xa, 4095 + i, 4095 + j);
 			__check_store_range(xa, 4096 + i, 4096 + j);
 			__check_store_range(xa, 123456 + i, 123456 + j);
-			__check_store_range(xa, UINT_MAX + i, UINT_MAX + j);
+			__check_store_range(xa, (1 << 24) + i, (1 << 24) + j);
 		}
 	}
 }
-- 
GitLab


From 6d7cd8c1373746a93dc868ee9d38a82df78b38aa Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Tue, 6 Nov 2018 13:11:57 -0500
Subject: [PATCH 1142/1890] dax: Remove optimisation from
 dax_lock_mapping_entry

Skipping some of the revalidation after we sleep can lead to returning
a mapping which has already been freed.  Just drop this optimisation.

Reported-by: Dan Williams <dan.j.williams@intel.com>
Fixes: 9f32d221301c ("dax: Convert dax_lock_mapping_entry to XArray")
Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 fs/dax.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 616e36ea6aaab..529ac9d7c10a0 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -383,11 +383,8 @@ bool dax_lock_mapping_entry(struct page *page)
 		entry = xas_load(&xas);
 		if (dax_is_locked(entry)) {
 			entry = get_unlocked_entry(&xas);
-			/* Did the page move while we slept? */
-			if (dax_to_pfn(entry) != page_to_pfn(page)) {
-				xas_unlock_irq(&xas);
-				continue;
-			}
+			xas_unlock_irq(&xas);
+			continue;
 		}
 		dax_lock_entry(&xas, entry);
 		xas_unlock_irq(&xas);
-- 
GitLab


From 7ae2ea7dc45e8250a74cfaaecdce578427669ae5 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Fri, 9 Nov 2018 20:09:37 -0500
Subject: [PATCH 1143/1890] dax: Make sure the unlocking entry isn't locked

I wrote the semantics in the commit message, but didn't document it in
the source code.  Use a BUG_ON instead (if any code does do this, it's
really buggy; we can't recover and it's worth taking the machine down).

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 fs/dax.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/dax.c b/fs/dax.c
index 529ac9d7c10a0..7944417f5a718 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -255,6 +255,7 @@ static void dax_unlock_entry(struct xa_state *xas, void *entry)
 {
 	void *old;
 
+	BUG_ON(dax_is_locked(entry));
 	xas_reset(xas);
 	xas_lock_irq(xas);
 	old = xas_store(xas, entry);
-- 
GitLab


From c5bbd4515a05f8acb7e6ab6297044a529762cbf5 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Fri, 16 Nov 2018 14:37:06 -0500
Subject: [PATCH 1144/1890] dax: Reinstate RCU protection of inode

For the device-dax case, it is possible that the inode can go away
underneath us.  The rcu_read_lock() was there to prevent it from
being freed, and not (as I thought) to protect the tree.  Bring back
the rcu_read_lock() protection.  Also add a little kernel-doc; while
this function is not exported to modules, it is used from outside dax.c

Reported-by: Dan Williams <dan.j.williams@intel.com>
Fixes: 9f32d221301c ("dax: Convert dax_lock_mapping_entry to XArray")
Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 fs/dax.c | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 7944417f5a718..ce87d21b38059 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -353,16 +353,27 @@ static struct page *dax_busy_page(void *entry)
 	return NULL;
 }
 
+/*
+ * dax_lock_mapping_entry - Lock the DAX entry corresponding to a page
+ * @page: The page whose entry we want to lock
+ *
+ * Context: Process context.
+ * Return: %true if the entry was locked or does not need to be locked.
+ */
 bool dax_lock_mapping_entry(struct page *page)
 {
 	XA_STATE(xas, NULL, 0);
 	void *entry;
+	bool locked;
 
+	/* Ensure page->mapping isn't freed while we look at it */
+	rcu_read_lock();
 	for (;;) {
 		struct address_space *mapping = READ_ONCE(page->mapping);
 
+		locked = false;
 		if (!dax_mapping(mapping))
-			return false;
+			break;
 
 		/*
 		 * In the device-dax case there's no need to lock, a
@@ -371,8 +382,9 @@ bool dax_lock_mapping_entry(struct page *page)
 		 * otherwise we would not have a valid pfn_to_page()
 		 * translation.
 		 */
+		locked = true;
 		if (S_ISCHR(mapping->host->i_mode))
-			return true;
+			break;
 
 		xas.xa = &mapping->i_pages;
 		xas_lock_irq(&xas);
@@ -383,14 +395,18 @@ bool dax_lock_mapping_entry(struct page *page)
 		xas_set(&xas, page->index);
 		entry = xas_load(&xas);
 		if (dax_is_locked(entry)) {
+			rcu_read_unlock();
 			entry = get_unlocked_entry(&xas);
 			xas_unlock_irq(&xas);
+			rcu_read_lock();
 			continue;
 		}
 		dax_lock_entry(&xas, entry);
 		xas_unlock_irq(&xas);
-		return true;
+		break;
 	}
+	rcu_read_unlock();
+	return locked;
 }
 
 void dax_unlock_mapping_entry(struct page *page)
-- 
GitLab


From 70cdb6ad6dc342d9643a00c9092e88f0075f2b9a Mon Sep 17 00:00:00 2001
From: Robert Jarzmik <robert.jarzmik@free.fr>
Date: Thu, 15 Nov 2018 18:16:38 +0100
Subject: [PATCH 1145/1890] gpio: pxa: fix legacy non pinctrl aware builds
 again

As pointed out by Gregor, spitz keyboard matrix is broken, with or
without CONFIG_PINCTRL set, quoting :
"The gpio matrix keypard on the Zaurus C3x00 (see spitz.c) does not work
properly. Noticeable are that rshift+c does nothing where as lshift+c
creates C.  Opposite it is for rshift+a vs lshift+a, here only rshift
works. This affects a few other combinations using the rshift or lshift
buttons."

As a matter of fact, as for platform_data based builds CONFIG_PINCTRL=n
is required for now (as opposed for devicetree builds where it should be
set), this means gpio driver should change the direction, which is what
was attempted by commit c4e5ffb6f224 ("gpio: pxa: fix legacy non pinctrl
aware builds").

Unfortunately, the input case was inverted, and the direction change was
never done. This wasn't seen up until now because the initial platform
setup (MFP) was setting this direction. Yet in Gregory's case, the
matrix-keypad driver changes back and forth the direction dynamically,
and this is why he's the first to report it.

Fixes: c4e5ffb6f224 ("gpio: pxa: fix legacy non pinctrl aware builds")
Tested-by: Greg <greguu@null.net>
Signed-off-by: Robert Jarzmik <robert.jarzmik@free.fr>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpio-pxa.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpio/gpio-pxa.c b/drivers/gpio/gpio-pxa.c
index bfe4c5c9f41ce..e9600b556f397 100644
--- a/drivers/gpio/gpio-pxa.c
+++ b/drivers/gpio/gpio-pxa.c
@@ -268,8 +268,8 @@ static int pxa_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
 
 	if (pxa_gpio_has_pinctrl()) {
 		ret = pinctrl_gpio_direction_input(chip->base + offset);
-		if (!ret)
-			return 0;
+		if (ret)
+			return ret;
 	}
 
 	spin_lock_irqsave(&gpio_lock, flags);
-- 
GitLab


From bff466bac59994cfcceabe4d0be5fdc1c20cd5b8 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <brgl@bgdev.pl>
Date: Thu, 8 Nov 2018 17:52:53 +0100
Subject: [PATCH 1146/1890] gpio: mockup: fix indicated direction

Commit 3edfb7bd76bd ("gpiolib: Show correct direction from the
beginning") fixed an existing issue but broke libgpiod tests by
changing the default direction of dummy lines to output.

We don't break user-space so make gpio-mockup behave as before.

Signed-off-by: Bartosz Golaszewski <brgl@bgdev.pl>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpio-mockup.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpio/gpio-mockup.c b/drivers/gpio/gpio-mockup.c
index 8269cffc2967f..6a50f9f59c901 100644
--- a/drivers/gpio/gpio-mockup.c
+++ b/drivers/gpio/gpio-mockup.c
@@ -35,8 +35,8 @@
 #define gpio_mockup_err(...)	pr_err(GPIO_MOCKUP_NAME ": " __VA_ARGS__)
 
 enum {
-	GPIO_MOCKUP_DIR_OUT = 0,
-	GPIO_MOCKUP_DIR_IN = 1,
+	GPIO_MOCKUP_DIR_IN = 0,
+	GPIO_MOCKUP_DIR_OUT = 1,
 };
 
 /*
@@ -131,7 +131,7 @@ static int gpio_mockup_get_direction(struct gpio_chip *gc, unsigned int offset)
 {
 	struct gpio_mockup_chip *chip = gpiochip_get_data(gc);
 
-	return chip->lines[offset].dir;
+	return !chip->lines[offset].dir;
 }
 
 static int gpio_mockup_to_irq(struct gpio_chip *gc, unsigned int offset)
-- 
GitLab


From 10547d956d590fbb846a7bd053a1dfc30e8f9a68 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 9 Nov 2018 17:40:16 +0200
Subject: [PATCH 1147/1890] MAINTAINERS: Do maintain Intel GPIO drivers via
 separate tree

We would like to consolidate Intel pure GPIO drivers, including PMICs and
some old x86 platforms, in one tree which is maintained by Intel.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 MAINTAINERS | 33 +++++++++++++++++++++------------
 1 file changed, 21 insertions(+), 12 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index e1bacf0f8b085..1602421a5fd6f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7430,6 +7430,20 @@ S:	Maintained
 F:	Documentation/fb/intelfb.txt
 F:	drivers/video/fbdev/intelfb/
 
+INTEL GPIO DRIVERS
+M:	Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+L:	linux-gpio@vger.kernel.org
+S:	Maintained
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/andy/linux-gpio-intel.git
+F:	drivers/gpio/gpio-ich.c
+F:	drivers/gpio/gpio-intel-mid.c
+F:	drivers/gpio/gpio-lynxpoint.c
+F:	drivers/gpio/gpio-merrifield.c
+F:	drivers/gpio/gpio-ml-ioh.c
+F:	drivers/gpio/gpio-pch.c
+F:	drivers/gpio/gpio-sch.c
+F:	drivers/gpio/gpio-sodaville.c
+
 INTEL GVT-g DRIVERS (Intel GPU Virtualization)
 M:	Zhenyu Wang <zhenyuw@linux.intel.com>
 M:	Zhi Wang <zhi.a.wang@intel.com>
@@ -7440,12 +7454,6 @@ T:	git https://github.com/intel/gvt-linux.git
 S:	Supported
 F:	drivers/gpu/drm/i915/gvt/
 
-INTEL PMIC GPIO DRIVER
-R:	Andy Shevchenko <andriy.shevchenko@linux.intel.com>
-S:	Maintained
-F:	drivers/gpio/gpio-*cove.c
-F:	drivers/gpio/gpio-msic.c
-
 INTEL HID EVENT DRIVER
 M:	Alex Hung <alex.hung@canonical.com>
 L:	platform-driver-x86@vger.kernel.org
@@ -7533,12 +7541,6 @@ W:	https://01.org/linux-acpi
 S:	Supported
 F:	drivers/platform/x86/intel_menlow.c
 
-INTEL MERRIFIELD GPIO DRIVER
-M:	Andy Shevchenko <andriy.shevchenko@linux.intel.com>
-L:	linux-gpio@vger.kernel.org
-S:	Maintained
-F:	drivers/gpio/gpio-merrifield.c
-
 INTEL MIC DRIVERS (mic)
 M:	Sudeep Dutt <sudeep.dutt@intel.com>
 M:	Ashutosh Dixit <ashutosh.dixit@intel.com>
@@ -7571,6 +7573,13 @@ F:	drivers/platform/x86/intel_punit_ipc.c
 F:	arch/x86/include/asm/intel_pmc_ipc.h
 F:	arch/x86/include/asm/intel_punit_ipc.h
 
+INTEL PMIC GPIO DRIVERS
+M:	Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+S:	Maintained
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/andy/linux-gpio-intel.git
+F:	drivers/gpio/gpio-*cove.c
+F:	drivers/gpio/gpio-msic.c
+
 INTEL MULTIFUNCTION PMIC DEVICE DRIVERS
 R:	Andy Shevchenko <andriy.shevchenko@linux.intel.com>
 S:	Maintained
-- 
GitLab


From 83e65df6dfece9eb588735459428f221eb930c0c Mon Sep 17 00:00:00 2001
From: Maxime Chevallier <maxime.chevallier@bootlin.com>
Date: Fri, 9 Nov 2018 09:17:33 +0100
Subject: [PATCH 1148/1890] net: mvneta: Don't advertise 2.5G modes

Using 2.5G speed relies on the SerDes lanes being configured
accordingly. The lanes have to be reconfigured to switch between
1G and 2.5G, and for now only the bootloader does this configuration.

In the case we add a Comphy driver to handle switching the lanes
dynamically, it's better for now to stick with supporting only 1G and
add advertisement for 2.5G once we really are capable of handling both
speeds without problem.

Since the interface mode is initialy taken from the DT, we want to make
sure that adding comphy support won't break boards that don't update
their dtb.

Fixes: da58a931f248 ("net: mvneta: Add support for 2500Mbps SGMII")
Reported-by: Andrew Lunn <andrew@lunn.ch>
Reported-by: Russell King <linux@armlinux.org.uk>
Signed-off-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvneta.c | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 3ba672e9e353d..e5397c8197b9c 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -3343,7 +3343,6 @@ static void mvneta_validate(struct net_device *ndev, unsigned long *supported,
 	if (state->interface != PHY_INTERFACE_MODE_NA &&
 	    state->interface != PHY_INTERFACE_MODE_QSGMII &&
 	    state->interface != PHY_INTERFACE_MODE_SGMII &&
-	    state->interface != PHY_INTERFACE_MODE_2500BASEX &&
 	    !phy_interface_mode_is_8023z(state->interface) &&
 	    !phy_interface_mode_is_rgmii(state->interface)) {
 		bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS);
@@ -3357,14 +3356,9 @@ static void mvneta_validate(struct net_device *ndev, unsigned long *supported,
 	/* Asymmetric pause is unsupported */
 	phylink_set(mask, Pause);
 
-	/* We cannot use 1Gbps when using the 2.5G interface. */
-	if (state->interface == PHY_INTERFACE_MODE_2500BASEX) {
-		phylink_set(mask, 2500baseT_Full);
-		phylink_set(mask, 2500baseX_Full);
-	} else {
-		phylink_set(mask, 1000baseT_Full);
-		phylink_set(mask, 1000baseX_Full);
-	}
+	/* Half-duplex at speeds higher than 100Mbit is unsupported */
+	phylink_set(mask, 1000baseT_Full);
+	phylink_set(mask, 1000baseX_Full);
 
 	if (!phy_interface_mode_is_8023z(state->interface)) {
 		/* 10M and 100M are only supported in non-802.3z mode */
-- 
GitLab


From 761f60261b4401aa368d71d431b4c218af0efcee Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Wed, 14 Nov 2018 00:48:28 +0800
Subject: [PATCH 1149/1890] ipv6: fix a dst leak when removing its exception

These is no need to hold dst before calling rt6_remove_exception_rt().
The call to dst_hold_safe() in ip6_link_failure() was for ip6_del_rt(),
which has been removed in Commit 93531c674315 ("net/ipv6: separate
handling of FIB entries from dst based routes"). Otherwise, it will
cause a dst leak.

This patch is to simply remove the dst_hold_safe() call before calling
rt6_remove_exception_rt() and also do the same in ip6_del_cached_rt().
It's safe, because the removal of the exception that holds its dst's
refcnt is protected by rt6_exception_lock.

Fixes: 93531c674315 ("net/ipv6: separate handling of FIB entries from dst based routes")
Fixes: 23fb93a4d3f1 ("net/ipv6: Cleanup exception and cache route handling")
Reported-by: Li Shuang <shuali@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 2a7423c394560..14b422f35504f 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2232,8 +2232,7 @@ static void ip6_link_failure(struct sk_buff *skb)
 	if (rt) {
 		rcu_read_lock();
 		if (rt->rt6i_flags & RTF_CACHE) {
-			if (dst_hold_safe(&rt->dst))
-				rt6_remove_exception_rt(rt);
+			rt6_remove_exception_rt(rt);
 		} else {
 			struct fib6_info *from;
 			struct fib6_node *fn;
@@ -3214,8 +3213,8 @@ static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
 	if (cfg->fc_flags & RTF_GATEWAY &&
 	    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
 		goto out;
-	if (dst_hold_safe(&rt->dst))
-		rc = rt6_remove_exception_rt(rt);
+
+	rc = rt6_remove_exception_rt(rt);
 out:
 	return rc;
 }
-- 
GitLab


From 06bc4d0079ab6a2de86f56703ce1bd13e90b9d9d Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Tue, 13 Nov 2018 18:42:24 +0100
Subject: [PATCH 1150/1890] net: lantiq: Fix returned value in case of error in
 'xrx200_probe()'

Return 'err' in the error handling path instead of 0.
Return explicitly 0 in the normal path, instead of 'err', which is known
to be 0 at this point.

Fixes: fe1a56420cf2 ("net: lantiq: Add Lantiq / Intel VRX200 Ethernet driver")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/lantiq_xrx200.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c
index 8c5ba4b81fb73..2d4d10a017e59 100644
--- a/drivers/net/ethernet/lantiq_xrx200.c
+++ b/drivers/net/ethernet/lantiq_xrx200.c
@@ -512,7 +512,8 @@ static int xrx200_probe(struct platform_device *pdev)
 	err = register_netdev(net_dev);
 	if (err)
 		goto err_unprepare_clk;
-	return err;
+
+	return 0;
 
 err_unprepare_clk:
 	clk_disable_unprepare(priv->clk);
@@ -520,7 +521,7 @@ static int xrx200_probe(struct platform_device *pdev)
 err_uninit_dma:
 	xrx200_hw_cleanup(priv);
 
-	return 0;
+	return err;
 }
 
 static int xrx200_remove(struct platform_device *pdev)
-- 
GitLab


From 19ab69107d3ecfb7cd3e38ad262a881be40c01a3 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Wed, 14 Nov 2018 12:17:25 +0100
Subject: [PATCH 1151/1890] net/sched: act_pedit: fix memory leak when IDR
 allocation fails

tcf_idr_check_alloc() can return a negative value, on allocation failures
(-ENOMEM) or IDR exhaustion (-ENOSPC): don't leak keys_ex in these cases.

Fixes: 0190c1d452a9 ("net: sched: atomically check-allocate action")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_pedit.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index da3dd0f68cc24..2b372a06b432a 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -201,7 +201,8 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
 			goto out_release;
 		}
 	} else {
-		return err;
+		ret = err;
+		goto out_free;
 	}
 
 	p = to_pedit(*a);
-- 
GitLab


From dfa0d55ff6be64e7b6881212a291cb95f8da3b08 Mon Sep 17 00:00:00 2001
From: Martin Schiller <ms@dev.tdt.de>
Date: Wed, 14 Nov 2018 12:54:49 +0100
Subject: [PATCH 1152/1890] net: phy: mdio-gpio: Fix working over slow
 can_sleep GPIOs

This commit re-enables support for slow GPIO pins. It was initially
introduced by commit 2d6c9091ab76 ("net: mdio-gpio: support access that
may sleep") and got lost by commit 7e5fbd1e0700 ("net: mdio-gpio:
Convert to use gpiod functions where possible").

Also add a warning about slow GPIO pins like it is done in i2c-gpio.

Signed-off-by: Martin Schiller <ms@dev.tdt.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio-gpio.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/net/phy/mdio-gpio.c b/drivers/net/phy/mdio-gpio.c
index 33265747bf399..3a5a24daf3843 100644
--- a/drivers/net/phy/mdio-gpio.c
+++ b/drivers/net/phy/mdio-gpio.c
@@ -63,7 +63,7 @@ static void mdio_dir(struct mdiobb_ctrl *ctrl, int dir)
 		 * assume the pin serves as pull-up. If direction is
 		 * output, the default value is high.
 		 */
-		gpiod_set_value(bitbang->mdo, 1);
+		gpiod_set_value_cansleep(bitbang->mdo, 1);
 		return;
 	}
 
@@ -78,7 +78,7 @@ static int mdio_get(struct mdiobb_ctrl *ctrl)
 	struct mdio_gpio_info *bitbang =
 		container_of(ctrl, struct mdio_gpio_info, ctrl);
 
-	return gpiod_get_value(bitbang->mdio);
+	return gpiod_get_value_cansleep(bitbang->mdio);
 }
 
 static void mdio_set(struct mdiobb_ctrl *ctrl, int what)
@@ -87,9 +87,9 @@ static void mdio_set(struct mdiobb_ctrl *ctrl, int what)
 		container_of(ctrl, struct mdio_gpio_info, ctrl);
 
 	if (bitbang->mdo)
-		gpiod_set_value(bitbang->mdo, what);
+		gpiod_set_value_cansleep(bitbang->mdo, what);
 	else
-		gpiod_set_value(bitbang->mdio, what);
+		gpiod_set_value_cansleep(bitbang->mdio, what);
 }
 
 static void mdc_set(struct mdiobb_ctrl *ctrl, int what)
@@ -97,7 +97,7 @@ static void mdc_set(struct mdiobb_ctrl *ctrl, int what)
 	struct mdio_gpio_info *bitbang =
 		container_of(ctrl, struct mdio_gpio_info, ctrl);
 
-	gpiod_set_value(bitbang->mdc, what);
+	gpiod_set_value_cansleep(bitbang->mdc, what);
 }
 
 static const struct mdiobb_ops mdio_gpio_ops = {
@@ -162,6 +162,10 @@ static int mdio_gpio_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
+	if (gpiod_cansleep(bitbang->mdc) || gpiod_cansleep(bitbang->mdio) ||
+	    gpiod_cansleep(bitbang->mdo))
+		dev_warn(&pdev->dev, "Slow GPIO pins might wreak havoc into MDIO bus timing");
+
 	if (pdev->dev.of_node) {
 		bus_id = of_alias_get_id(pdev->dev.of_node, "mdio-gpio");
 		if (bus_id < 0) {
-- 
GitLab


From afd594240806acc138cf696c09f2f4829d55d02f Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Fri, 16 Nov 2018 12:00:07 +0000
Subject: [PATCH 1153/1890] bpf: fix off-by-one error in adjust_subprog_starts

When patching in a new sequence for the first insn of a subprog, the start
 of that subprog does not change (it's the first insn of the sequence), so
 adjust_subprog_starts should check start <= off (rather than < off).
Also added a test to test_verifier.c (it's essentially the syz reproducer).

Fixes: cc8b0b92a169 ("bpf: introduce function calls (function boundaries)")
Reported-by: syzbot+4fc427c7af994b0948be@syzkaller.appspotmail.com
Signed-off-by: Edward Cree <ecree@solarflare.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c                       |  2 +-
 tools/testing/selftests/bpf/test_verifier.c | 19 +++++++++++++++++++
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 1971ca325fb4e..6dd419550aba4 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5650,7 +5650,7 @@ static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len
 		return;
 	/* NOTE: fake 'exit' subprog should be updated as well. */
 	for (i = 0; i <= env->subprog_cnt; i++) {
-		if (env->subprog_info[i].start < off)
+		if (env->subprog_info[i].start <= off)
 			continue;
 		env->subprog_info[i].start += len - 1;
 	}
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 6f61df62f690c..550b7e46bf4a4 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -13896,6 +13896,25 @@ static struct bpf_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.result = ACCEPT,
 	},
+	{
+		"calls: ctx read at start of subprog",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5),
+			BPF_JMP_REG(BPF_JSGT, BPF_REG_0, BPF_REG_0, 0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_1, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
+		.errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+	},
 };
 
 static int probe_filter_length(const struct bpf_insn *fp)
-- 
GitLab


From 569a933b03f3c48b392fe67c0086b3a6b9306b5a Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guroan@gmail.com>
Date: Wed, 14 Nov 2018 10:00:34 -0800
Subject: [PATCH 1154/1890] bpf: allocate local storage buffers using
 GFP_ATOMIC

Naresh reported an issue with the non-atomic memory allocation of
cgroup local storage buffers:

[   73.047526] BUG: sleeping function called from invalid context at
/srv/oe/build/tmp-rpb-glibc/work-shared/intel-corei7-64/kernel-source/mm/slab.h:421
[   73.060915] in_atomic(): 1, irqs_disabled(): 0, pid: 3157, name: test_cgroup_sto
[   73.068342] INFO: lockdep is turned off.
[   73.072293] CPU: 2 PID: 3157 Comm: test_cgroup_sto Not tainted
4.20.0-rc2-next-20181113 #1
[   73.080548] Hardware name: Supermicro SYS-5019S-ML/X11SSH-F, BIOS
2.0b 07/27/2017
[   73.088018] Call Trace:
[   73.090463]  dump_stack+0x70/0xa5
[   73.093783]  ___might_sleep+0x152/0x240
[   73.097619]  __might_sleep+0x4a/0x80
[   73.101191]  __kmalloc_node+0x1cf/0x2f0
[   73.105031]  ? cgroup_storage_update_elem+0x46/0x90
[   73.109909]  cgroup_storage_update_elem+0x46/0x90

cgroup_storage_update_elem() (as well as other update map update
callbacks) is called with disabled preemption, so GFP_ATOMIC
allocation should be used: e.g. alloc_htab_elem() in hashtab.c.

Reported-by: Naresh Kamboju <naresh.kamboju@linaro.org>
Tested-by: Naresh Kamboju <naresh.kamboju@linaro.org>
Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/local_storage.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index c97a8f968638c..bed9d48a7ae95 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -139,7 +139,8 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *_key,
 		return -ENOENT;
 
 	new = kmalloc_node(sizeof(struct bpf_storage_buffer) +
-			   map->value_size, __GFP_ZERO | GFP_USER,
+			   map->value_size,
+			   __GFP_ZERO | GFP_ATOMIC | __GFP_NOWARN,
 			   map->numa_node);
 	if (!new)
 		return -ENOMEM;
-- 
GitLab


From 160396a722e0c4dfd462f3eec779251bf944f438 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 16 Nov 2018 23:04:37 -0800
Subject: [PATCH 1155/1890] Revert "net: phy: mdio-gpio: Fix working over slow
 can_sleep GPIOs"

This reverts commit dfa0d55ff6be64e7b6881212a291cb95f8da3b08.

Discussion still ongoing, I shouldn't have applied this.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio-gpio.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/drivers/net/phy/mdio-gpio.c b/drivers/net/phy/mdio-gpio.c
index 3a5a24daf3843..33265747bf399 100644
--- a/drivers/net/phy/mdio-gpio.c
+++ b/drivers/net/phy/mdio-gpio.c
@@ -63,7 +63,7 @@ static void mdio_dir(struct mdiobb_ctrl *ctrl, int dir)
 		 * assume the pin serves as pull-up. If direction is
 		 * output, the default value is high.
 		 */
-		gpiod_set_value_cansleep(bitbang->mdo, 1);
+		gpiod_set_value(bitbang->mdo, 1);
 		return;
 	}
 
@@ -78,7 +78,7 @@ static int mdio_get(struct mdiobb_ctrl *ctrl)
 	struct mdio_gpio_info *bitbang =
 		container_of(ctrl, struct mdio_gpio_info, ctrl);
 
-	return gpiod_get_value_cansleep(bitbang->mdio);
+	return gpiod_get_value(bitbang->mdio);
 }
 
 static void mdio_set(struct mdiobb_ctrl *ctrl, int what)
@@ -87,9 +87,9 @@ static void mdio_set(struct mdiobb_ctrl *ctrl, int what)
 		container_of(ctrl, struct mdio_gpio_info, ctrl);
 
 	if (bitbang->mdo)
-		gpiod_set_value_cansleep(bitbang->mdo, what);
+		gpiod_set_value(bitbang->mdo, what);
 	else
-		gpiod_set_value_cansleep(bitbang->mdio, what);
+		gpiod_set_value(bitbang->mdio, what);
 }
 
 static void mdc_set(struct mdiobb_ctrl *ctrl, int what)
@@ -97,7 +97,7 @@ static void mdc_set(struct mdiobb_ctrl *ctrl, int what)
 	struct mdio_gpio_info *bitbang =
 		container_of(ctrl, struct mdio_gpio_info, ctrl);
 
-	gpiod_set_value_cansleep(bitbang->mdc, what);
+	gpiod_set_value(bitbang->mdc, what);
 }
 
 static const struct mdiobb_ops mdio_gpio_ops = {
@@ -162,10 +162,6 @@ static int mdio_gpio_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	if (gpiod_cansleep(bitbang->mdc) || gpiod_cansleep(bitbang->mdio) ||
-	    gpiod_cansleep(bitbang->mdo))
-		dev_warn(&pdev->dev, "Slow GPIO pins might wreak havoc into MDIO bus timing");
-
 	if (pdev->dev.of_node) {
 		bus_id = of_alias_get_id(pdev->dev.of_node, "mdio-gpio");
 		if (bus_id < 0) {
-- 
GitLab


From 53fffe29a9e664a999dd3787e4428da8c30533e0 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 17 Nov 2018 07:43:42 -0700
Subject: [PATCH 1156/1890] aio: fix failure to put the file pointer

If the ioprio capability check fails, we return without putting
the file pointer.

Fixes: d9a08a9e616b ("fs: Add aio iopriority support")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/aio.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/aio.c b/fs/aio.c
index 301e6314183b6..97f9835929256 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1436,6 +1436,7 @@ static int aio_prep_rw(struct kiocb *req, struct iocb *iocb)
 		ret = ioprio_check_cap(iocb->aio_reqprio);
 		if (ret) {
 			pr_debug("aio ioprio check cap error: %d\n", ret);
+			fput(req->ki_filp);
 			return ret;
 		}
 
-- 
GitLab


From fda490d39fc0668d92e170d95c11e35a010019aa Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Fri, 16 Nov 2018 15:07:31 -0500
Subject: [PATCH 1157/1890] dax: Fix dax_unlock_mapping_entry for PMD pages

Device DAX PMD pages do not set the PageHead bit for compound pages.
Fix for now by retrieving the PMD bit from the entry, but eventually we
will be passed the page size by the caller.

Reported-by: Dan Williams <dan.j.williams@intel.com>
Fixes: 9f32d221301c ("dax: Convert dax_lock_mapping_entry to XArray")
Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 fs/dax.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index ce87d21b38059..5426252375f63 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -98,12 +98,6 @@ static void *dax_make_entry(pfn_t pfn, unsigned long flags)
 	return xa_mk_value(flags | (pfn_t_to_pfn(pfn) << DAX_SHIFT));
 }
 
-static void *dax_make_page_entry(struct page *page)
-{
-	pfn_t pfn = page_to_pfn_t(page);
-	return dax_make_entry(pfn, PageHead(page) ? DAX_PMD : 0);
-}
-
 static bool dax_is_locked(void *entry)
 {
 	return xa_to_value(entry) & DAX_LOCKED;
@@ -116,12 +110,12 @@ static unsigned int dax_entry_order(void *entry)
 	return 0;
 }
 
-static int dax_is_pmd_entry(void *entry)
+static unsigned long dax_is_pmd_entry(void *entry)
 {
 	return xa_to_value(entry) & DAX_PMD;
 }
 
-static int dax_is_pte_entry(void *entry)
+static bool dax_is_pte_entry(void *entry)
 {
 	return !(xa_to_value(entry) & DAX_PMD);
 }
@@ -413,11 +407,16 @@ void dax_unlock_mapping_entry(struct page *page)
 {
 	struct address_space *mapping = page->mapping;
 	XA_STATE(xas, &mapping->i_pages, page->index);
+	void *entry;
 
 	if (S_ISCHR(mapping->host->i_mode))
 		return;
 
-	dax_unlock_entry(&xas, dax_make_page_entry(page));
+	rcu_read_lock();
+	entry = xas_load(&xas);
+	rcu_read_unlock();
+	entry = dax_make_entry(page_to_pfn_t(page), dax_is_pmd_entry(entry));
+	dax_unlock_entry(&xas, entry);
 }
 
 /*
-- 
GitLab


From 0e40de0338d005f73d46898a21544cd26f01b4ce Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Fri, 16 Nov 2018 15:19:13 -0500
Subject: [PATCH 1158/1890] dax: Fix huge page faults

Using xas_load() with a PMD-sized xa_state would work if either a
PMD-sized entry was present or a PTE sized entry was present in the
first 64 entries (of the 512 PTEs in a PMD on x86).  If there was no
PTE in the first 64 entries, grab_mapping_entry() would believe there
were no entries present, allocate a PMD-sized entry and overwrite the
PTE in the page cache.

Use xas_find_conflict() instead which turns out to simplify
both get_unlocked_entry() and grab_mapping_entry().  Also remove a
WARN_ON_ONCE from grab_mapping_entry() as it will have already triggered
in get_unlocked_entry().

Fixes: cfc93c6c6c96 ("dax: Convert dax_insert_pfn_mkwrite to XArray")
Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 fs/dax.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 5426252375f63..cf2394e2bf4bf 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -216,9 +216,8 @@ static void *get_unlocked_entry(struct xa_state *xas)
 	ewait.wait.func = wake_exceptional_entry_func;
 
 	for (;;) {
-		entry = xas_load(xas);
-		if (!entry || xa_is_internal(entry) ||
-				WARN_ON_ONCE(!xa_is_value(entry)) ||
+		entry = xas_find_conflict(xas);
+		if (!entry || WARN_ON_ONCE(!xa_is_value(entry)) ||
 				!dax_is_locked(entry))
 			return entry;
 
@@ -458,11 +457,9 @@ static void *grab_mapping_entry(struct xa_state *xas,
 retry:
 	xas_lock_irq(xas);
 	entry = get_unlocked_entry(xas);
-	if (xa_is_internal(entry))
-		goto fallback;
 
 	if (entry) {
-		if (WARN_ON_ONCE(!xa_is_value(entry))) {
+		if (!xa_is_value(entry)) {
 			xas_set_err(xas, EIO);
 			goto out_unlock;
 		}
@@ -1641,8 +1638,7 @@ dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order)
 	/* Did we race with someone splitting entry or so? */
 	if (!entry ||
 	    (order == 0 && !dax_is_pte_entry(entry)) ||
-	    (order == PMD_ORDER && (xa_is_internal(entry) ||
-				    !dax_is_pmd_entry(entry)))) {
+	    (order == PMD_ORDER && !dax_is_pmd_entry(entry))) {
 		put_unlocked_entry(&xas, entry);
 		xas_unlock_irq(&xas);
 		trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf,
-- 
GitLab


From b4e955e9f372035361fbc6f07b21fe2cc6a5be4a Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Fri, 16 Nov 2018 21:32:35 +0900
Subject: [PATCH 1159/1890] netfilter: xt_hashlimit: fix a possible memory leak
 in htable_create()

In the htable_create(), hinfo is allocated by vmalloc()
So that if error occurred, hinfo should be freed.

Fixes: 11d5f15723c9 ("netfilter: xt_hashlimit: Create revision 2 to support higher pps rates")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/xt_hashlimit.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 3e7d259e5d8de..1ad4017f9b734 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -295,9 +295,10 @@ static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg,
 
 	/* copy match config into hashtable config */
 	ret = cfg_copy(&hinfo->cfg, (void *)cfg, 3);
-
-	if (ret)
+	if (ret) {
+		vfree(hinfo);
 		return ret;
+	}
 
 	hinfo->cfg.size = size;
 	if (hinfo->cfg.max == 0)
@@ -814,7 +815,6 @@ hashlimit_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
 	int ret;
 
 	ret = cfg_copy(&cfg, (void *)&info->cfg, 1);
-
 	if (ret)
 		return ret;
 
@@ -830,7 +830,6 @@ hashlimit_mt_v2(const struct sk_buff *skb, struct xt_action_param *par)
 	int ret;
 
 	ret = cfg_copy(&cfg, (void *)&info->cfg, 2);
-
 	if (ret)
 		return ret;
 
@@ -921,7 +920,6 @@ static int hashlimit_mt_check_v1(const struct xt_mtchk_param *par)
 		return ret;
 
 	ret = cfg_copy(&cfg, (void *)&info->cfg, 1);
-
 	if (ret)
 		return ret;
 
@@ -940,7 +938,6 @@ static int hashlimit_mt_check_v2(const struct xt_mtchk_param *par)
 		return ret;
 
 	ret = cfg_copy(&cfg, (void *)&info->cfg, 2);
-
 	if (ret)
 		return ret;
 
-- 
GitLab


From df5a8ec64eed7fe45b556cfff503acd6429ab817 Mon Sep 17 00:00:00 2001
From: Martin Schiller <ms@dev.tdt.de>
Date: Fri, 16 Nov 2018 08:38:36 +0100
Subject: [PATCH 1160/1890] net: phy: mdio-gpio: Fix working over slow
 can_sleep GPIOs

Up until commit 7e5fbd1e0700 ("net: mdio-gpio: Convert to use gpiod
functions where possible"), the _cansleep variants of the gpio_ API was
used. After that commit and the change to gpiod_ API, the _cansleep()
was dropped. This then results in WARN_ON() when used with GPIO
devices which do sleep. Add back the _cansleep() to avoid this.

Fixes: 7e5fbd1e0700 ("net: mdio-gpio: Convert to use gpiod functions where possible")
Signed-off-by: Martin Schiller <ms@dev.tdt.de>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio-gpio.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/phy/mdio-gpio.c b/drivers/net/phy/mdio-gpio.c
index 33265747bf399..0fbcedcdf6e2a 100644
--- a/drivers/net/phy/mdio-gpio.c
+++ b/drivers/net/phy/mdio-gpio.c
@@ -63,7 +63,7 @@ static void mdio_dir(struct mdiobb_ctrl *ctrl, int dir)
 		 * assume the pin serves as pull-up. If direction is
 		 * output, the default value is high.
 		 */
-		gpiod_set_value(bitbang->mdo, 1);
+		gpiod_set_value_cansleep(bitbang->mdo, 1);
 		return;
 	}
 
@@ -78,7 +78,7 @@ static int mdio_get(struct mdiobb_ctrl *ctrl)
 	struct mdio_gpio_info *bitbang =
 		container_of(ctrl, struct mdio_gpio_info, ctrl);
 
-	return gpiod_get_value(bitbang->mdio);
+	return gpiod_get_value_cansleep(bitbang->mdio);
 }
 
 static void mdio_set(struct mdiobb_ctrl *ctrl, int what)
@@ -87,9 +87,9 @@ static void mdio_set(struct mdiobb_ctrl *ctrl, int what)
 		container_of(ctrl, struct mdio_gpio_info, ctrl);
 
 	if (bitbang->mdo)
-		gpiod_set_value(bitbang->mdo, what);
+		gpiod_set_value_cansleep(bitbang->mdo, what);
 	else
-		gpiod_set_value(bitbang->mdio, what);
+		gpiod_set_value_cansleep(bitbang->mdio, what);
 }
 
 static void mdc_set(struct mdiobb_ctrl *ctrl, int what)
@@ -97,7 +97,7 @@ static void mdc_set(struct mdiobb_ctrl *ctrl, int what)
 	struct mdio_gpio_info *bitbang =
 		container_of(ctrl, struct mdio_gpio_info, ctrl);
 
-	gpiod_set_value(bitbang->mdc, what);
+	gpiod_set_value_cansleep(bitbang->mdc, what);
 }
 
 static const struct mdiobb_ops mdio_gpio_ops = {
-- 
GitLab


From 95506588d2c1d72ca29adef8ae9bf771bcfb4ced Mon Sep 17 00:00:00 2001
From: Slavomir Kaslev <kaslevs@vmware.com>
Date: Fri, 16 Nov 2018 11:27:53 +0200
Subject: [PATCH 1161/1890] socket: do a generic_file_splice_read when
 proto_ops has no splice_read

splice(2) fails with -EINVAL when called reading on a socket with no splice_read
set in its proto_ops (such as vsock sockets). Switch this to fallbacks to a
generic_file_splice_read instead.

Signed-off-by: Slavomir Kaslev <kaslevs@vmware.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/socket.c b/net/socket.c
index 593826e11a537..334fcc617ef27 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -853,7 +853,7 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
 	struct socket *sock = file->private_data;
 
 	if (unlikely(!sock->ops->splice_read))
-		return -EINVAL;
+		return generic_file_splice_read(file, ppos, pipe, len, flags);
 
 	return sock->ops->splice_read(sock, ppos, pipe, len, flags);
 }
-- 
GitLab


From 9d332e69c1dc74dcd748de7cbd2dac5c61bda265 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Fri, 16 Nov 2018 18:50:01 +0200
Subject: [PATCH 1162/1890] net: bridge: fix vlan stats use-after-free on
 destruction

Syzbot reported a use-after-free of the global vlan context on port vlan
destruction. When I added per-port vlan stats I missed the fact that the
global vlan context can be freed before the per-port vlan rcu callback.
There're a few different ways to deal with this, I've chosen to add a
new private flag that is set only when per-port stats are allocated so
we can directly check it on destruction without dereferencing the global
context at all. The new field in net_bridge_vlan uses a hole.

v2: cosmetic change, move the check to br_process_vlan_info where the
    other checks are done
v3: add change log in the patch, add private (in-kernel only) flags in a
    hole in net_bridge_vlan struct and use that instead of mixing
    user-space flags with private flags

Fixes: 9163a0fc1f0c ("net: bridge: add support for per-port vlan stats")
Reported-by: syzbot+04681da557a0e49a52e5@syzkaller.appspotmail.com
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_private.h | 7 +++++++
 net/bridge/br_vlan.c    | 3 ++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 2920e06a54032..04c19a37e500e 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -102,12 +102,18 @@ struct br_tunnel_info {
 	struct metadata_dst	*tunnel_dst;
 };
 
+/* private vlan flags */
+enum {
+	BR_VLFLAG_PER_PORT_STATS = BIT(0),
+};
+
 /**
  * struct net_bridge_vlan - per-vlan entry
  *
  * @vnode: rhashtable member
  * @vid: VLAN id
  * @flags: bridge vlan flags
+ * @priv_flags: private (in-kernel) bridge vlan flags
  * @stats: per-cpu VLAN statistics
  * @br: if MASTER flag set, this points to a bridge struct
  * @port: if MASTER flag unset, this points to a port struct
@@ -127,6 +133,7 @@ struct net_bridge_vlan {
 	struct rhash_head		tnode;
 	u16				vid;
 	u16				flags;
+	u16				priv_flags;
 	struct br_vlan_stats __percpu	*stats;
 	union {
 		struct net_bridge	*br;
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 8c9297a019475..e84be08b82854 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -197,7 +197,7 @@ static void nbp_vlan_rcu_free(struct rcu_head *rcu)
 	v = container_of(rcu, struct net_bridge_vlan, rcu);
 	WARN_ON(br_vlan_is_master(v));
 	/* if we had per-port stats configured then free them here */
-	if (v->brvlan->stats != v->stats)
+	if (v->priv_flags & BR_VLFLAG_PER_PORT_STATS)
 		free_percpu(v->stats);
 	v->stats = NULL;
 	kfree(v);
@@ -264,6 +264,7 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags)
 				err = -ENOMEM;
 				goto out_filt;
 			}
+			v->priv_flags |= BR_VLFLAG_PER_PORT_STATS;
 		} else {
 			v->stats = masterv->stats;
 		}
-- 
GitLab


From 8840c3e2344a456267d7989b97d097e798b28b0b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@toke.dk>
Date: Fri, 16 Nov 2018 12:13:59 -0800
Subject: [PATCH 1163/1890] MAINTAINERS: Add entry for CAKE qdisc
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We would like the existing community to be kept in the loop for any new
developments on CAKE; and I certainly plan to keep maintaining it. Reflect
this in MAINTAINERS.

Signed-off-by: Toke HÃ¸iland-JÃ¸rgensen <toke@toke.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 5a4bd37d9d02a..99f2956be87b8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3276,6 +3276,12 @@ F:	include/uapi/linux/caif/
 F:	include/net/caif/
 F:	net/caif/
 
+CAKE QDISC
+M:	Toke HÃ¸iland-JÃ¸rgensen <toke@toke.dk>
+L:	cake@lists.bufferbloat.net (moderated for non-subscribers)
+S:	Maintained
+F:	net/sched/sch_cake.c
+
 CALGARY x86-64 IOMMU
 M:	Muli Ben-Yehuda <mulix@mulix.org>
 M:	Jon Mason <jdmason@kudzu.us>
-- 
GitLab


From 16f7eb2b77b55da816c4e207f3f9440a8cafc00a Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Fri, 16 Nov 2018 16:58:19 +0100
Subject: [PATCH 1164/1890] ip_tunnel: don't force DF when MTU is locked

The various types of tunnels running over IPv4 can ask to set the DF
bit to do PMTU discovery. However, PMTU discovery is subject to the
threshold set by the net.ipv4.route.min_pmtu sysctl, and is also
disabled on routes with "mtu lock". In those cases, we shouldn't set
the DF bit.

This patch makes setting the DF bit conditional on the route's MTU
locking state.

This issue seems to be older than git history.

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_tunnel_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index dde671e978298..c248e0dccbe17 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -80,7 +80,7 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
 
 	iph->version	=	4;
 	iph->ihl	=	sizeof(struct iphdr) >> 2;
-	iph->frag_off	=	df;
+	iph->frag_off	=	ip_mtu_locked(&rt->dst) ? 0 : df;
 	iph->protocol	=	proto;
 	iph->tos	=	tos;
 	iph->daddr	=	dst;
-- 
GitLab


From 5aaf6428526bcad98d6f51f2f679c919bb75d7e9 Mon Sep 17 00:00:00 2001
From: Lucas Bates <lucasb@mojatatu.com>
Date: Fri, 16 Nov 2018 17:37:55 -0500
Subject: [PATCH 1165/1890] tc-testing: tdc.py: ignore errors when decoding
 stdout/stderr

Prevent exceptions from being raised while decoding output
from an executed command. There is no impact on tdc's
execution and the verify command phase would fail the pattern
match.

Signed-off-by: Lucas Bates <lucasb@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/tc-testing/tdc.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py
index 87a04a8a5945c..9b3f414ff1e9a 100755
--- a/tools/testing/selftests/tc-testing/tdc.py
+++ b/tools/testing/selftests/tc-testing/tdc.py
@@ -134,9 +134,9 @@ def exec_cmd(args, pm, stage, command):
     (rawout, serr) = proc.communicate()
 
     if proc.returncode != 0 and len(serr) > 0:
-        foutput = serr.decode("utf-8")
+        foutput = serr.decode("utf-8", errors="ignore")
     else:
-        foutput = rawout.decode("utf-8")
+        foutput = rawout.decode("utf-8", errors="ignore")
 
     proc.stdout.close()
     proc.stderr.close()
-- 
GitLab


From c6cecf4ae44e4ce9158ef8806358142c3512cd33 Mon Sep 17 00:00:00 2001
From: "Brenda J. Butler" <bjb@mojatatu.com>
Date: Fri, 16 Nov 2018 17:37:56 -0500
Subject: [PATCH 1166/1890] tc-testing: tdc.py: Guard against lack of
 returncode in executed command

Add some defensive coding in case one of the subprocesses created by tdc
returns nothing. If no object is returned from exec_cmd, then tdc will
halt with an unhandled exception.

Signed-off-by: Brenda J. Butler <bjb@mojatatu.com>
Signed-off-by: Lucas Bates <lucasb@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/tc-testing/tdc.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py
index 9b3f414ff1e9a..7607ba3e3cbe4 100755
--- a/tools/testing/selftests/tc-testing/tdc.py
+++ b/tools/testing/selftests/tc-testing/tdc.py
@@ -169,6 +169,8 @@ def prepare_env(args, pm, stage, prefix, cmdlist, output = None):
                   file=sys.stderr)
             print("\n{} *** Error message: \"{}\"".format(prefix, foutput),
                   file=sys.stderr)
+            print("returncode {}; expected {}".format(proc.returncode,
+                                                      exit_codes))
             print("\n{} *** Aborting test run.".format(prefix), file=sys.stderr)
             print("\n\n{} *** stdout ***".format(proc.stdout), file=sys.stderr)
             print("\n\n{} *** stderr ***".format(proc.stderr), file=sys.stderr)
@@ -195,12 +197,18 @@ def run_one_test(pm, args, index, tidx):
         print('-----> execute stage')
     pm.call_pre_execute()
     (p, procout) = exec_cmd(args, pm, 'execute', tidx["cmdUnderTest"])
-    exit_code = p.returncode
+    if p:
+        exit_code = p.returncode
+    else:
+        exit_code = None
+
     pm.call_post_execute()
 
-    if (exit_code != int(tidx["expExitCode"])):
+    if (exit_code is None or exit_code != int(tidx["expExitCode"])):
         result = False
-        print("exit:", exit_code, int(tidx["expExitCode"]))
+        print("exit: {!r}".format(exit_code))
+        print("exit: {}".format(int(tidx["expExitCode"])))
+        #print("exit: {!r} {}".format(exit_code, int(tidx["expExitCode"])))
         print(procout)
     else:
         if args.verbose > 0:
-- 
GitLab


From 33d9a2c72f086cbf1087b2fd2d1a15aa9df14a7f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 17 Nov 2018 21:57:02 -0800
Subject: [PATCH 1167/1890] net-gro: reset skb->pkt_type in napi_reuse_skb()

eth_type_trans() assumes initial value for skb->pkt_type
is PACKET_HOST.

This is indeed the value right after a fresh skb allocation.

However, it is possible that GRO merged a packet with a different
value (like PACKET_OTHERHOST in case macvlan is used), so
we need to make sure napi->skb will have pkt_type set back to
PACKET_HOST.

Otherwise, valid packets might be dropped by the stack because
their pkt_type is not PACKET_HOST.

napi_reuse_skb() was added in commit 96e93eab2033 ("gro: Add
internal interfaces for VLAN"), but this bug always has
been there.

Fixes: 96e93eab2033 ("gro: Add internal interfaces for VLAN")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/core/dev.c b/net/core/dev.c
index 0ffcbdd55fa9e..066aa902d85c3 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5655,6 +5655,10 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
 	skb->vlan_tci = 0;
 	skb->dev = napi->dev;
 	skb->skb_iif = 0;
+
+	/* eth_type_trans() assumes pkt_type is PACKET_HOST */
+	skb->pkt_type = PACKET_HOST;
+
 	skb->encapsulation = 0;
 	skb_shinfo(skb)->gso_type = 0;
 	skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
-- 
GitLab


From adba75be0d23cca92a028749d92c60c8909bbdb3 Mon Sep 17 00:00:00 2001
From: Jon Maloy <donmalo99@gmail.com>
Date: Fri, 16 Nov 2018 16:55:04 -0500
Subject: [PATCH 1168/1890] tipc: fix lockdep warning when reinitilaizing
 sockets

We get the following warning:

[   47.926140] 32-bit node address hash set to 2010a0a
[   47.927202]
[   47.927433] ================================
[   47.928050] WARNING: inconsistent lock state
[   47.928661] 4.19.0+ #37 Tainted: G            E
[   47.929346] --------------------------------
[   47.929954] inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage.
[   47.930116] swapper/3/0 [HC0[0]:SC1[3]:HE1:SE0] takes:
[   47.930116] 00000000af8bc31e (&(&ht->lock)->rlock){+.?.}, at: rhashtable_walk_enter+0x36/0xb0
[   47.930116] {SOFTIRQ-ON-W} state was registered at:
[   47.930116]   _raw_spin_lock+0x29/0x60
[   47.930116]   rht_deferred_worker+0x556/0x810
[   47.930116]   process_one_work+0x1f5/0x540
[   47.930116]   worker_thread+0x64/0x3e0
[   47.930116]   kthread+0x112/0x150
[   47.930116]   ret_from_fork+0x3a/0x50
[   47.930116] irq event stamp: 14044
[   47.930116] hardirqs last  enabled at (14044): [<ffffffff9a07fbba>] __local_bh_enable_ip+0x7a/0xf0
[   47.938117] hardirqs last disabled at (14043): [<ffffffff9a07fb81>] __local_bh_enable_ip+0x41/0xf0
[   47.938117] softirqs last  enabled at (14028): [<ffffffff9a0803ee>] irq_enter+0x5e/0x60
[   47.938117] softirqs last disabled at (14029): [<ffffffff9a0804a5>] irq_exit+0xb5/0xc0
[   47.938117]
[   47.938117] other info that might help us debug this:
[   47.938117]  Possible unsafe locking scenario:
[   47.938117]
[   47.938117]        CPU0
[   47.938117]        ----
[   47.938117]   lock(&(&ht->lock)->rlock);
[   47.938117]   <Interrupt>
[   47.938117]     lock(&(&ht->lock)->rlock);
[   47.938117]
[   47.938117]  *** DEADLOCK ***
[   47.938117]
[   47.938117] 2 locks held by swapper/3/0:
[   47.938117]  #0: 0000000062c64f90 ((&d->timer)){+.-.}, at: call_timer_fn+0x5/0x280
[   47.938117]  #1: 00000000ee39619c (&(&d->lock)->rlock){+.-.}, at: tipc_disc_timeout+0xc8/0x540 [tipc]
[   47.938117]
[   47.938117] stack backtrace:
[   47.938117] CPU: 3 PID: 0 Comm: swapper/3 Tainted: G            E     4.19.0+ #37
[   47.938117] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
[   47.938117] Call Trace:
[   47.938117]  <IRQ>
[   47.938117]  dump_stack+0x5e/0x8b
[   47.938117]  print_usage_bug+0x1ed/0x1ff
[   47.938117]  mark_lock+0x5b5/0x630
[   47.938117]  __lock_acquire+0x4c0/0x18f0
[   47.938117]  ? lock_acquire+0xa6/0x180
[   47.938117]  lock_acquire+0xa6/0x180
[   47.938117]  ? rhashtable_walk_enter+0x36/0xb0
[   47.938117]  _raw_spin_lock+0x29/0x60
[   47.938117]  ? rhashtable_walk_enter+0x36/0xb0
[   47.938117]  rhashtable_walk_enter+0x36/0xb0
[   47.938117]  tipc_sk_reinit+0xb0/0x410 [tipc]
[   47.938117]  ? mark_held_locks+0x6f/0x90
[   47.938117]  ? __local_bh_enable_ip+0x7a/0xf0
[   47.938117]  ? lockdep_hardirqs_on+0x20/0x1a0
[   47.938117]  tipc_net_finalize+0xbf/0x180 [tipc]
[   47.938117]  tipc_disc_timeout+0x509/0x540 [tipc]
[   47.938117]  ? call_timer_fn+0x5/0x280
[   47.938117]  ? tipc_disc_msg_xmit.isra.19+0xa0/0xa0 [tipc]
[   47.938117]  ? tipc_disc_msg_xmit.isra.19+0xa0/0xa0 [tipc]
[   47.938117]  call_timer_fn+0xa1/0x280
[   47.938117]  ? tipc_disc_msg_xmit.isra.19+0xa0/0xa0 [tipc]
[   47.938117]  run_timer_softirq+0x1f2/0x4d0
[   47.938117]  __do_softirq+0xfc/0x413
[   47.938117]  irq_exit+0xb5/0xc0
[   47.938117]  smp_apic_timer_interrupt+0xac/0x210
[   47.938117]  apic_timer_interrupt+0xf/0x20
[   47.938117]  </IRQ>
[   47.938117] RIP: 0010:default_idle+0x1c/0x140
[   47.938117] Code: 90 90 90 90 90 90 90 90 90 90 90 90 90 90 0f 1f 44 00 00 41 54 55 53 65 8b 2d d8 2b 74 65 0f 1f 44 00 00 e8 c6 2c 8b ff fb f4 <65> 8b 2d c5 2b 74 65 0f 1f 44 00 00 5b 5d 41 5c c3 65 8b 05 b4 2b
[   47.938117] RSP: 0018:ffffaf6ac0207ec8 EFLAGS: 00000206 ORIG_RAX: ffffffffffffff13
[   47.938117] RAX: ffff8f5b3735e200 RBX: 0000000000000003 RCX: 0000000000000001
[   47.938117] RDX: 0000000000000001 RSI: 0000000000000001 RDI: ffff8f5b3735e200
[   47.938117] RBP: 0000000000000003 R08: 0000000000000001 R09: 0000000000000000
[   47.938117] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
[   47.938117] R13: 0000000000000000 R14: ffff8f5b3735e200 R15: ffff8f5b3735e200
[   47.938117]  ? default_idle+0x1a/0x140
[   47.938117]  do_idle+0x1bc/0x280
[   47.938117]  cpu_startup_entry+0x19/0x20
[   47.938117]  start_secondary+0x187/0x1c0
[   47.938117]  secondary_startup_64+0xa4/0xb0

The reason seems to be that tipc_net_finalize()->tipc_sk_reinit() is
calling the function rhashtable_walk_enter() within a timer interrupt.
We fix this by executing tipc_net_finalize() in work queue context.

Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/discover.c | 19 ++++++++++---------
 net/tipc/net.c      | 45 +++++++++++++++++++++++++++++++++++++--------
 net/tipc/net.h      |  2 +-
 3 files changed, 48 insertions(+), 18 deletions(-)

diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 2830709957bdd..c138d68e8a695 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -166,7 +166,8 @@ static bool tipc_disc_addr_trial_msg(struct tipc_discoverer *d,
 
 	/* Apply trial address if we just left trial period */
 	if (!trial && !self) {
-		tipc_net_finalize(net, tn->trial_addr);
+		tipc_sched_net_finalize(net, tn->trial_addr);
+		msg_set_prevnode(buf_msg(d->skb), tn->trial_addr);
 		msg_set_type(buf_msg(d->skb), DSC_REQ_MSG);
 	}
 
@@ -300,14 +301,12 @@ static void tipc_disc_timeout(struct timer_list *t)
 		goto exit;
 	}
 
-	/* Trial period over ? */
-	if (!time_before(jiffies, tn->addr_trial_end)) {
-		/* Did we just leave it ? */
-		if (!tipc_own_addr(net))
-			tipc_net_finalize(net, tn->trial_addr);
-
-		msg_set_type(buf_msg(d->skb), DSC_REQ_MSG);
-		msg_set_prevnode(buf_msg(d->skb), tipc_own_addr(net));
+	/* Did we just leave trial period ? */
+	if (!time_before(jiffies, tn->addr_trial_end) && !tipc_own_addr(net)) {
+		mod_timer(&d->timer, jiffies + TIPC_DISC_INIT);
+		spin_unlock_bh(&d->lock);
+		tipc_sched_net_finalize(net, tn->trial_addr);
+		return;
 	}
 
 	/* Adjust timeout interval according to discovery phase */
@@ -319,6 +318,8 @@ static void tipc_disc_timeout(struct timer_list *t)
 			d->timer_intv = TIPC_DISC_SLOW;
 		else if (!d->num_nodes && d->timer_intv > TIPC_DISC_FAST)
 			d->timer_intv = TIPC_DISC_FAST;
+		msg_set_type(buf_msg(d->skb), DSC_REQ_MSG);
+		msg_set_prevnode(buf_msg(d->skb), tn->trial_addr);
 	}
 
 	mod_timer(&d->timer, jiffies + d->timer_intv);
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 62199cf5a56c0..f076edb743382 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -104,6 +104,14 @@
  *     - A local spin_lock protecting the queue of subscriber events.
 */
 
+struct tipc_net_work {
+	struct work_struct work;
+	struct net *net;
+	u32 addr;
+};
+
+static void tipc_net_finalize(struct net *net, u32 addr);
+
 int tipc_net_init(struct net *net, u8 *node_id, u32 addr)
 {
 	if (tipc_own_id(net)) {
@@ -119,17 +127,38 @@ int tipc_net_init(struct net *net, u8 *node_id, u32 addr)
 	return 0;
 }
 
-void tipc_net_finalize(struct net *net, u32 addr)
+static void tipc_net_finalize(struct net *net, u32 addr)
 {
 	struct tipc_net *tn = tipc_net(net);
 
-	if (!cmpxchg(&tn->node_addr, 0, addr)) {
-		tipc_set_node_addr(net, addr);
-		tipc_named_reinit(net);
-		tipc_sk_reinit(net);
-		tipc_nametbl_publish(net, TIPC_CFG_SRV, addr, addr,
-				     TIPC_CLUSTER_SCOPE, 0, addr);
-	}
+	if (cmpxchg(&tn->node_addr, 0, addr))
+		return;
+	tipc_set_node_addr(net, addr);
+	tipc_named_reinit(net);
+	tipc_sk_reinit(net);
+	tipc_nametbl_publish(net, TIPC_CFG_SRV, addr, addr,
+			     TIPC_CLUSTER_SCOPE, 0, addr);
+}
+
+static void tipc_net_finalize_work(struct work_struct *work)
+{
+	struct tipc_net_work *fwork;
+
+	fwork = container_of(work, struct tipc_net_work, work);
+	tipc_net_finalize(fwork->net, fwork->addr);
+	kfree(fwork);
+}
+
+void tipc_sched_net_finalize(struct net *net, u32 addr)
+{
+	struct tipc_net_work *fwork = kzalloc(sizeof(*fwork), GFP_ATOMIC);
+
+	if (!fwork)
+		return;
+	INIT_WORK(&fwork->work, tipc_net_finalize_work);
+	fwork->net = net;
+	fwork->addr = addr;
+	schedule_work(&fwork->work);
 }
 
 void tipc_net_stop(struct net *net)
diff --git a/net/tipc/net.h b/net/tipc/net.h
index 09ad02b50bb1b..b7f2e364eb99e 100644
--- a/net/tipc/net.h
+++ b/net/tipc/net.h
@@ -42,7 +42,7 @@
 extern const struct nla_policy tipc_nl_net_policy[];
 
 int tipc_net_init(struct net *net, u8 *node_id, u32 addr);
-void tipc_net_finalize(struct net *net, u32 addr);
+void tipc_sched_net_finalize(struct net *net, u32 addr);
 void tipc_net_stop(struct net *net);
 int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb);
 int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info);
-- 
GitLab


From 1c1274a56999fbdf9cf84e332b28448bb2d55221 Mon Sep 17 00:00:00 2001
From: Jon Maloy <donmalo99@gmail.com>
Date: Sat, 17 Nov 2018 12:17:06 -0500
Subject: [PATCH 1169/1890] tipc: don't assume linear buffer when reading
 ancillary data

The code for reading ancillary data from a received buffer is assuming
the buffer is linear. To make this assumption true we have to linearize
the buffer before message data is read.

Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/socket.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 636e6131769d8..b57b1be7252ba 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1555,16 +1555,17 @@ static void tipc_sk_set_orig_addr(struct msghdr *m, struct sk_buff *skb)
 /**
  * tipc_sk_anc_data_recv - optionally capture ancillary data for received message
  * @m: descriptor for message info
- * @msg: received message header
+ * @skb: received message buffer
  * @tsk: TIPC port associated with message
  *
  * Note: Ancillary data is not captured if not requested by receiver.
  *
  * Returns 0 if successful, otherwise errno
  */
-static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
+static int tipc_sk_anc_data_recv(struct msghdr *m, struct sk_buff *skb,
 				 struct tipc_sock *tsk)
 {
+	struct tipc_msg *msg;
 	u32 anc_data[3];
 	u32 err;
 	u32 dest_type;
@@ -1573,6 +1574,7 @@ static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
 
 	if (likely(m->msg_controllen == 0))
 		return 0;
+	msg = buf_msg(skb);
 
 	/* Optionally capture errored message object(s) */
 	err = msg ? msg_errcode(msg) : 0;
@@ -1583,6 +1585,9 @@ static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
 		if (res)
 			return res;
 		if (anc_data[1]) {
+			if (skb_linearize(skb))
+				return -ENOMEM;
+			msg = buf_msg(skb);
 			res = put_cmsg(m, SOL_TIPC, TIPC_RETDATA, anc_data[1],
 				       msg_data(msg));
 			if (res)
@@ -1744,9 +1749,10 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
 
 	/* Collect msg meta data, including error code and rejected data */
 	tipc_sk_set_orig_addr(m, skb);
-	rc = tipc_sk_anc_data_recv(m, hdr, tsk);
+	rc = tipc_sk_anc_data_recv(m, skb, tsk);
 	if (unlikely(rc))
 		goto exit;
+	hdr = buf_msg(skb);
 
 	/* Capture data if non-error msg, otherwise just set return value */
 	if (likely(!err)) {
@@ -1856,9 +1862,10 @@ static int tipc_recvstream(struct socket *sock, struct msghdr *m,
 		/* Collect msg meta data, incl. error code and rejected data */
 		if (!copied) {
 			tipc_sk_set_orig_addr(m, skb);
-			rc = tipc_sk_anc_data_recv(m, hdr, tsk);
+			rc = tipc_sk_anc_data_recv(m, skb, tsk);
 			if (rc)
 				break;
+			hdr = buf_msg(skb);
 		}
 
 		/* Copy data if msg ok, otherwise return error/partial data */
-- 
GitLab


From 5d1e9c2212ea6b4dd735e4fc3dd6279a365d5d10 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 27 Aug 2018 10:21:49 +0200
Subject: [PATCH 1170/1890] mtd: rawnand: atmel: fix OF child-node lookup

Use the new of_get_compatible_child() helper to lookup the nfc child
node instead of using of_find_compatible_node(), which searches the
entire tree from a given start node and thus can return an unrelated
(i.e. non-child) node.

This also addresses a potential use-after-free (e.g. after probe
deferral) as the tree-wide helper drops a reference to its first
argument (i.e. the node of the device being probed).

While at it, also fix a related nfc-node reference leak.

Fixes: f88fc122cc34 ("mtd: nand: Cleanup/rework the atmel_nand driver")
Cc: stable <stable@vger.kernel.org>     # 4.11
Cc: Nicolas Ferre <nicolas.ferre@microchip.com>
Cc: Josh Wu <rainyfeeling@outlook.com>
Cc: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
---
 drivers/mtd/nand/raw/atmel/nand-controller.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/mtd/nand/raw/atmel/nand-controller.c b/drivers/mtd/nand/raw/atmel/nand-controller.c
index fb33f6be7c4ff..ad720494e8f78 100644
--- a/drivers/mtd/nand/raw/atmel/nand-controller.c
+++ b/drivers/mtd/nand/raw/atmel/nand-controller.c
@@ -2032,8 +2032,7 @@ atmel_hsmc_nand_controller_legacy_init(struct atmel_hsmc_nand_controller *nc)
 	int ret;
 
 	nand_np = dev->of_node;
-	nfc_np = of_find_compatible_node(dev->of_node, NULL,
-					 "atmel,sama5d3-nfc");
+	nfc_np = of_get_compatible_child(dev->of_node, "atmel,sama5d3-nfc");
 	if (!nfc_np) {
 		dev_err(dev, "Could not find device node for sama5d3-nfc\n");
 		return -ENODEV;
@@ -2447,15 +2446,19 @@ static int atmel_nand_controller_probe(struct platform_device *pdev)
 	}
 
 	if (caps->legacy_of_bindings) {
+		struct device_node *nfc_node;
 		u32 ale_offs = 21;
 
 		/*
 		 * If we are parsing legacy DT props and the DT contains a
 		 * valid NFC node, forward the request to the sama5 logic.
 		 */
-		if (of_find_compatible_node(pdev->dev.of_node, NULL,
-					    "atmel,sama5d3-nfc"))
+		nfc_node = of_get_compatible_child(pdev->dev.of_node,
+						   "atmel,sama5d3-nfc");
+		if (nfc_node) {
 			caps = &atmel_sama5_nand_caps;
+			of_node_put(nfc_node);
+		}
 
 		/*
 		 * Even if the compatible says we are dealing with an
-- 
GitLab


From 33bf5519ae5dd356b182a94e3622f42860274a38 Mon Sep 17 00:00:00 2001
From: Olof Johansson <olof@lixom.net>
Date: Fri, 16 Nov 2018 19:43:27 -0800
Subject: [PATCH 1171/1890] mtd: rawnand: qcom: Namespace prefix some commands

PAGE_READ is used by RISC-V arch code included through mm headers,
and it makes sense to bring in a prefix on these in the driver.

drivers/mtd/nand/raw/qcom_nandc.c:153: warning: "PAGE_READ" redefined
 #define PAGE_READ   0x2
In file included from include/linux/memremap.h:7,
                 from include/linux/mm.h:27,
                 from include/linux/scatterlist.h:8,
                 from include/linux/dma-mapping.h:11,
                 from drivers/mtd/nand/raw/qcom_nandc.c:17:
arch/riscv/include/asm/pgtable.h:48: note: this is the location of the previous definition

Caught by riscv allmodconfig.

Signed-off-by: Olof Johansson <olof@lixom.net>
Reviewed-by: Miquel Raynal <miquel.raynal@bootlin.com>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
---
 drivers/mtd/nand/raw/qcom_nandc.c | 32 +++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/mtd/nand/raw/qcom_nandc.c b/drivers/mtd/nand/raw/qcom_nandc.c
index ef75dfa62a4f8..699d3cf49c6da 100644
--- a/drivers/mtd/nand/raw/qcom_nandc.c
+++ b/drivers/mtd/nand/raw/qcom_nandc.c
@@ -150,15 +150,15 @@
 #define	NAND_VERSION_MINOR_SHIFT	16
 
 /* NAND OP_CMDs */
-#define	PAGE_READ			0x2
-#define	PAGE_READ_WITH_ECC		0x3
-#define	PAGE_READ_WITH_ECC_SPARE	0x4
-#define	PROGRAM_PAGE			0x6
-#define	PAGE_PROGRAM_WITH_ECC		0x7
-#define	PROGRAM_PAGE_SPARE		0x9
-#define	BLOCK_ERASE			0xa
-#define	FETCH_ID			0xb
-#define	RESET_DEVICE			0xd
+#define	OP_PAGE_READ			0x2
+#define	OP_PAGE_READ_WITH_ECC		0x3
+#define	OP_PAGE_READ_WITH_ECC_SPARE	0x4
+#define	OP_PROGRAM_PAGE			0x6
+#define	OP_PAGE_PROGRAM_WITH_ECC	0x7
+#define	OP_PROGRAM_PAGE_SPARE		0x9
+#define	OP_BLOCK_ERASE			0xa
+#define	OP_FETCH_ID			0xb
+#define	OP_RESET_DEVICE			0xd
 
 /* Default Value for NAND_DEV_CMD_VLD */
 #define NAND_DEV_CMD_VLD_VAL		(READ_START_VLD | WRITE_START_VLD | \
@@ -692,11 +692,11 @@ static void update_rw_regs(struct qcom_nand_host *host, int num_cw, bool read)
 
 	if (read) {
 		if (host->use_ecc)
-			cmd = PAGE_READ_WITH_ECC | PAGE_ACC | LAST_PAGE;
+			cmd = OP_PAGE_READ_WITH_ECC | PAGE_ACC | LAST_PAGE;
 		else
-			cmd = PAGE_READ | PAGE_ACC | LAST_PAGE;
+			cmd = OP_PAGE_READ | PAGE_ACC | LAST_PAGE;
 	} else {
-			cmd = PROGRAM_PAGE | PAGE_ACC | LAST_PAGE;
+		cmd = OP_PROGRAM_PAGE | PAGE_ACC | LAST_PAGE;
 	}
 
 	if (host->use_ecc) {
@@ -1170,7 +1170,7 @@ static int nandc_param(struct qcom_nand_host *host)
 	 * in use. we configure the controller to perform a raw read of 512
 	 * bytes to read onfi params
 	 */
-	nandc_set_reg(nandc, NAND_FLASH_CMD, PAGE_READ | PAGE_ACC | LAST_PAGE);
+	nandc_set_reg(nandc, NAND_FLASH_CMD, OP_PAGE_READ | PAGE_ACC | LAST_PAGE);
 	nandc_set_reg(nandc, NAND_ADDR0, 0);
 	nandc_set_reg(nandc, NAND_ADDR1, 0);
 	nandc_set_reg(nandc, NAND_DEV0_CFG0, 0 << CW_PER_PAGE
@@ -1224,7 +1224,7 @@ static int erase_block(struct qcom_nand_host *host, int page_addr)
 	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
 
 	nandc_set_reg(nandc, NAND_FLASH_CMD,
-		      BLOCK_ERASE | PAGE_ACC | LAST_PAGE);
+		      OP_BLOCK_ERASE | PAGE_ACC | LAST_PAGE);
 	nandc_set_reg(nandc, NAND_ADDR0, page_addr);
 	nandc_set_reg(nandc, NAND_ADDR1, 0);
 	nandc_set_reg(nandc, NAND_DEV0_CFG0,
@@ -1255,7 +1255,7 @@ static int read_id(struct qcom_nand_host *host, int column)
 	if (column == -1)
 		return 0;
 
-	nandc_set_reg(nandc, NAND_FLASH_CMD, FETCH_ID);
+	nandc_set_reg(nandc, NAND_FLASH_CMD, OP_FETCH_ID);
 	nandc_set_reg(nandc, NAND_ADDR0, column);
 	nandc_set_reg(nandc, NAND_ADDR1, 0);
 	nandc_set_reg(nandc, NAND_FLASH_CHIP_SELECT,
@@ -1276,7 +1276,7 @@ static int reset(struct qcom_nand_host *host)
 	struct nand_chip *chip = &host->chip;
 	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
 
-	nandc_set_reg(nandc, NAND_FLASH_CMD, RESET_DEVICE);
+	nandc_set_reg(nandc, NAND_FLASH_CMD, OP_RESET_DEVICE);
 	nandc_set_reg(nandc, NAND_EXEC_CMD, 1);
 
 	write_reg_dma(nandc, NAND_FLASH_CMD, 1, NAND_BAM_NEXT_SGL);
-- 
GitLab


From ca0246bb97c23da9d267c2107c07fb77e38205c9 Mon Sep 17 00:00:00 2001
From: Vitaly Wool <vitalywool@gmail.com>
Date: Fri, 16 Nov 2018 15:07:56 -0800
Subject: [PATCH 1172/1890] z3fold: fix possible reclaim races

Reclaim and free can race on an object which is basically fine but in
order for reclaim to be able to map "freed" object we need to encode
object length in the handle.  handle_to_chunks() is then introduced to
extract object length from a handle and use it during mapping.

Moreover, to avoid racing on a z3fold "headless" page release, we should
not try to free that page in z3fold_free() if the reclaim bit is set.
Also, in the unlikely case of trying to reclaim a page being freed, we
should not proceed with that page.

While at it, fix the page accounting in reclaim function.

This patch supersedes "[PATCH] z3fold: fix reclaim lock-ups".

Link: http://lkml.kernel.org/r/20181105162225.74e8837d03583a9b707cf559@gmail.com
Signed-off-by: Vitaly Wool <vitaly.vul@sony.com>
Signed-off-by: Jongseok Kim <ks77sj@gmail.com>
Reported-by-by: Jongseok Kim <ks77sj@gmail.com>
Reviewed-by: Snild Dolkow <snild@sony.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/z3fold.c | 101 ++++++++++++++++++++++++++++++++--------------------
 1 file changed, 62 insertions(+), 39 deletions(-)

diff --git a/mm/z3fold.c b/mm/z3fold.c
index 4b366d181f35d..aee9b0b8d9078 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -99,6 +99,7 @@ struct z3fold_header {
 #define NCHUNKS		((PAGE_SIZE - ZHDR_SIZE_ALIGNED) >> CHUNK_SHIFT)
 
 #define BUDDY_MASK	(0x3)
+#define BUDDY_SHIFT	2
 
 /**
  * struct z3fold_pool - stores metadata for each z3fold pool
@@ -145,7 +146,7 @@ enum z3fold_page_flags {
 	MIDDLE_CHUNK_MAPPED,
 	NEEDS_COMPACTING,
 	PAGE_STALE,
-	UNDER_RECLAIM
+	PAGE_CLAIMED, /* by either reclaim or free */
 };
 
 /*****************
@@ -174,7 +175,7 @@ static struct z3fold_header *init_z3fold_page(struct page *page,
 	clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
 	clear_bit(NEEDS_COMPACTING, &page->private);
 	clear_bit(PAGE_STALE, &page->private);
-	clear_bit(UNDER_RECLAIM, &page->private);
+	clear_bit(PAGE_CLAIMED, &page->private);
 
 	spin_lock_init(&zhdr->page_lock);
 	kref_init(&zhdr->refcount);
@@ -223,8 +224,11 @@ static unsigned long encode_handle(struct z3fold_header *zhdr, enum buddy bud)
 	unsigned long handle;
 
 	handle = (unsigned long)zhdr;
-	if (bud != HEADLESS)
-		handle += (bud + zhdr->first_num) & BUDDY_MASK;
+	if (bud != HEADLESS) {
+		handle |= (bud + zhdr->first_num) & BUDDY_MASK;
+		if (bud == LAST)
+			handle |= (zhdr->last_chunks << BUDDY_SHIFT);
+	}
 	return handle;
 }
 
@@ -234,6 +238,12 @@ static struct z3fold_header *handle_to_z3fold_header(unsigned long handle)
 	return (struct z3fold_header *)(handle & PAGE_MASK);
 }
 
+/* only for LAST bud, returns zero otherwise */
+static unsigned short handle_to_chunks(unsigned long handle)
+{
+	return (handle & ~PAGE_MASK) >> BUDDY_SHIFT;
+}
+
 /*
  * (handle & BUDDY_MASK) < zhdr->first_num is possible in encode_handle
  *  but that doesn't matter. because the masking will result in the
@@ -720,37 +730,39 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
 	page = virt_to_page(zhdr);
 
 	if (test_bit(PAGE_HEADLESS, &page->private)) {
-		/* HEADLESS page stored */
-		bud = HEADLESS;
-	} else {
-		z3fold_page_lock(zhdr);
-		bud = handle_to_buddy(handle);
-
-		switch (bud) {
-		case FIRST:
-			zhdr->first_chunks = 0;
-			break;
-		case MIDDLE:
-			zhdr->middle_chunks = 0;
-			zhdr->start_middle = 0;
-			break;
-		case LAST:
-			zhdr->last_chunks = 0;
-			break;
-		default:
-			pr_err("%s: unknown bud %d\n", __func__, bud);
-			WARN_ON(1);
-			z3fold_page_unlock(zhdr);
-			return;
+		/* if a headless page is under reclaim, just leave.
+		 * NB: we use test_and_set_bit for a reason: if the bit
+		 * has not been set before, we release this page
+		 * immediately so we don't care about its value any more.
+		 */
+		if (!test_and_set_bit(PAGE_CLAIMED, &page->private)) {
+			spin_lock(&pool->lock);
+			list_del(&page->lru);
+			spin_unlock(&pool->lock);
+			free_z3fold_page(page);
+			atomic64_dec(&pool->pages_nr);
 		}
+		return;
 	}
 
-	if (bud == HEADLESS) {
-		spin_lock(&pool->lock);
-		list_del(&page->lru);
-		spin_unlock(&pool->lock);
-		free_z3fold_page(page);
-		atomic64_dec(&pool->pages_nr);
+	/* Non-headless case */
+	z3fold_page_lock(zhdr);
+	bud = handle_to_buddy(handle);
+
+	switch (bud) {
+	case FIRST:
+		zhdr->first_chunks = 0;
+		break;
+	case MIDDLE:
+		zhdr->middle_chunks = 0;
+		break;
+	case LAST:
+		zhdr->last_chunks = 0;
+		break;
+	default:
+		pr_err("%s: unknown bud %d\n", __func__, bud);
+		WARN_ON(1);
+		z3fold_page_unlock(zhdr);
 		return;
 	}
 
@@ -758,7 +770,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
 		atomic64_dec(&pool->pages_nr);
 		return;
 	}
-	if (test_bit(UNDER_RECLAIM, &page->private)) {
+	if (test_bit(PAGE_CLAIMED, &page->private)) {
 		z3fold_page_unlock(zhdr);
 		return;
 	}
@@ -836,20 +848,30 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
 		}
 		list_for_each_prev(pos, &pool->lru) {
 			page = list_entry(pos, struct page, lru);
+
+			/* this bit could have been set by free, in which case
+			 * we pass over to the next page in the pool.
+			 */
+			if (test_and_set_bit(PAGE_CLAIMED, &page->private))
+				continue;
+
+			zhdr = page_address(page);
 			if (test_bit(PAGE_HEADLESS, &page->private))
-				/* candidate found */
 				break;
 
-			zhdr = page_address(page);
-			if (!z3fold_page_trylock(zhdr))
+			if (!z3fold_page_trylock(zhdr)) {
+				zhdr = NULL;
 				continue; /* can't evict at this point */
+			}
 			kref_get(&zhdr->refcount);
 			list_del_init(&zhdr->buddy);
 			zhdr->cpu = -1;
-			set_bit(UNDER_RECLAIM, &page->private);
 			break;
 		}
 
+		if (!zhdr)
+			break;
+
 		list_del_init(&page->lru);
 		spin_unlock(&pool->lock);
 
@@ -898,6 +920,7 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
 		if (test_bit(PAGE_HEADLESS, &page->private)) {
 			if (ret == 0) {
 				free_z3fold_page(page);
+				atomic64_dec(&pool->pages_nr);
 				return 0;
 			}
 			spin_lock(&pool->lock);
@@ -905,7 +928,7 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
 			spin_unlock(&pool->lock);
 		} else {
 			z3fold_page_lock(zhdr);
-			clear_bit(UNDER_RECLAIM, &page->private);
+			clear_bit(PAGE_CLAIMED, &page->private);
 			if (kref_put(&zhdr->refcount,
 					release_z3fold_page_locked)) {
 				atomic64_dec(&pool->pages_nr);
@@ -964,7 +987,7 @@ static void *z3fold_map(struct z3fold_pool *pool, unsigned long handle)
 		set_bit(MIDDLE_CHUNK_MAPPED, &page->private);
 		break;
 	case LAST:
-		addr += PAGE_SIZE - (zhdr->last_chunks << CHUNK_SHIFT);
+		addr += PAGE_SIZE - (handle_to_chunks(handle) << CHUNK_SHIFT);
 		break;
 	default:
 		pr_err("unknown buddy id %d\n", buddy);
-- 
GitLab


From 8fcb2312d1e3300e81aa871aad00d4c038cfc184 Mon Sep 17 00:00:00 2001
From: Olof Johansson <olof@lixom.net>
Date: Fri, 16 Nov 2018 15:08:00 -0800
Subject: [PATCH 1173/1890] kernel/sched/psi.c: simplify cgroup_move_task()

The existing code triggered an invalid warning about 'rq' possibly being
used uninitialized.  Instead of doing the silly warning suppression by
initializa it to NULL, refactor the code to bail out early instead.

Warning was:

  kernel/sched/psi.c: In function `cgroup_move_task':
  kernel/sched/psi.c:639:13: warning: `rq' may be used uninitialized in this function [-Wmaybe-uninitialized]

Link: http://lkml.kernel.org/r/20181103183339.8669-1-olof@lixom.net
Fixes: 2ce7135adc9ad ("psi: cgroup support")
Signed-off-by: Olof Johansson <olof@lixom.net>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/sched/psi.c | 43 ++++++++++++++++++++++---------------------
 1 file changed, 22 insertions(+), 21 deletions(-)

diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index 7cdecfc010af8..3d7355d7c3e38 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -633,38 +633,39 @@ void psi_cgroup_free(struct cgroup *cgroup)
  */
 void cgroup_move_task(struct task_struct *task, struct css_set *to)
 {
-	bool move_psi = !psi_disabled;
 	unsigned int task_flags = 0;
 	struct rq_flags rf;
 	struct rq *rq;
 
-	if (move_psi) {
-		rq = task_rq_lock(task, &rf);
+	if (psi_disabled) {
+		/*
+		 * Lame to do this here, but the scheduler cannot be locked
+		 * from the outside, so we move cgroups from inside sched/.
+		 */
+		rcu_assign_pointer(task->cgroups, to);
+		return;
+	}
 
-		if (task_on_rq_queued(task))
-			task_flags = TSK_RUNNING;
-		else if (task->in_iowait)
-			task_flags = TSK_IOWAIT;
+	rq = task_rq_lock(task, &rf);
 
-		if (task->flags & PF_MEMSTALL)
-			task_flags |= TSK_MEMSTALL;
+	if (task_on_rq_queued(task))
+		task_flags = TSK_RUNNING;
+	else if (task->in_iowait)
+		task_flags = TSK_IOWAIT;
 
-		if (task_flags)
-			psi_task_change(task, task_flags, 0);
-	}
+	if (task->flags & PF_MEMSTALL)
+		task_flags |= TSK_MEMSTALL;
 
-	/*
-	 * Lame to do this here, but the scheduler cannot be locked
-	 * from the outside, so we move cgroups from inside sched/.
-	 */
+	if (task_flags)
+		psi_task_change(task, task_flags, 0);
+
+	/* See comment above */
 	rcu_assign_pointer(task->cgroups, to);
 
-	if (move_psi) {
-		if (task_flags)
-			psi_task_change(task, 0, task_flags);
+	if (task_flags)
+		psi_task_change(task, 0, task_flags);
 
-		task_rq_unlock(rq, task, &rf);
-	}
+	task_rq_unlock(rq, task, &rf);
 }
 #endif /* CONFIG_CGROUPS */
 
-- 
GitLab


From 5e41540c8a0f0e98c337dda8b391e5dda0cde7cf Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Fri, 16 Nov 2018 15:08:04 -0800
Subject: [PATCH 1174/1890] hugetlbfs: fix kernel BUG at
 fs/hugetlbfs/inode.c:444!

This bug has been experienced several times by the Oracle DB team.  The
BUG is in remove_inode_hugepages() as follows:

	/*
	 * If page is mapped, it was faulted in after being
	 * unmapped in caller.  Unmap (again) now after taking
	 * the fault mutex.  The mutex will prevent faults
	 * until we finish removing the page.
	 *
	 * This race can only happen in the hole punch case.
	 * Getting here in a truncate operation is a bug.
	 */
	if (unlikely(page_mapped(page))) {
		BUG_ON(truncate_op);

In this case, the elevated map count is not the result of a race.
Rather it was incorrectly incremented as the result of a bug in the huge
pmd sharing code.  Consider the following:

 - Process A maps a hugetlbfs file of sufficient size and alignment
   (PUD_SIZE) that a pmd page could be shared.

 - Process B maps the same hugetlbfs file with the same size and
   alignment such that a pmd page is shared.

 - Process B then calls mprotect() to change protections for the mapping
   with the shared pmd. As a result, the pmd is 'unshared'.

 - Process B then calls mprotect() again to chage protections for the
   mapping back to their original value. pmd remains unshared.

 - Process B then forks and process C is created. During the fork
   process, we do dup_mm -> dup_mmap -> copy_page_range to copy page
   tables. Copying page tables for hugetlb mappings is done in the
   routine copy_hugetlb_page_range.

In copy_hugetlb_page_range(), the destination pte is obtained by:

	dst_pte = huge_pte_alloc(dst, addr, sz);

If pmd sharing is possible, the returned pointer will be to a pte in an
existing page table.  In the situation above, process C could share with
either process A or process B.  Since process A is first in the list,
the returned pte is a pointer to a pte in process A's page table.

However, the check for pmd sharing in copy_hugetlb_page_range is:

	/* If the pagetables are shared don't copy or take references */
	if (dst_pte == src_pte)
		continue;

Since process C is sharing with process A instead of process B, the
above test fails.  The code in copy_hugetlb_page_range which follows
assumes dst_pte points to a huge_pte_none pte.  It copies the pte entry
from src_pte to dst_pte and increments this map count of the associated
page.  This is how we end up with an elevated map count.

To solve, check the dst_pte entry for huge_pte_none.  If !none, this
implies PMD sharing so do not copy.

Link: http://lkml.kernel.org/r/20181105212315.14125-1-mike.kravetz@oracle.com
Fixes: c5c99429fa57 ("fix hugepages leak due to pagetable page sharing")
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Prakash Sangappa <prakash.sangappa@oracle.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/hugetlb.c | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index c007fb5fb8d5f..7f2a28ab46d53 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3233,7 +3233,7 @@ static int is_hugetlb_entry_hwpoisoned(pte_t pte)
 int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
 			    struct vm_area_struct *vma)
 {
-	pte_t *src_pte, *dst_pte, entry;
+	pte_t *src_pte, *dst_pte, entry, dst_entry;
 	struct page *ptepage;
 	unsigned long addr;
 	int cow;
@@ -3261,15 +3261,30 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
 			break;
 		}
 
-		/* If the pagetables are shared don't copy or take references */
-		if (dst_pte == src_pte)
+		/*
+		 * If the pagetables are shared don't copy or take references.
+		 * dst_pte == src_pte is the common case of src/dest sharing.
+		 *
+		 * However, src could have 'unshared' and dst shares with
+		 * another vma.  If dst_pte !none, this implies sharing.
+		 * Check here before taking page table lock, and once again
+		 * after taking the lock below.
+		 */
+		dst_entry = huge_ptep_get(dst_pte);
+		if ((dst_pte == src_pte) || !huge_pte_none(dst_entry))
 			continue;
 
 		dst_ptl = huge_pte_lock(h, dst, dst_pte);
 		src_ptl = huge_pte_lockptr(h, src, src_pte);
 		spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
 		entry = huge_ptep_get(src_pte);
-		if (huge_pte_none(entry)) { /* skip none entry */
+		dst_entry = huge_ptep_get(dst_pte);
+		if (huge_pte_none(entry) || !huge_pte_none(dst_entry)) {
+			/*
+			 * Skip if src entry none.  Also, skip in the
+			 * unlikely case dst entry !none as this implies
+			 * sharing with another vma.
+			 */
 			;
 		} else if (unlikely(is_hugetlb_entry_migration(entry) ||
 				    is_hugetlb_entry_hwpoisoned(entry))) {
-- 
GitLab


From f341e16fb67ddc199582d187b0d39120a9dfd2bf Mon Sep 17 00:00:00 2001
From: Aaro Koskinen <aaro.koskinen@iki.fi>
Date: Fri, 16 Nov 2018 15:08:08 -0800
Subject: [PATCH 1175/1890] MAINTAINERS: update OMAP MMC entry

Jarkko's e-mail address hasn't worked for a long time.  We still want to
keep this driver working as it is critical for some of the OMAP boards.
I use and test this driver frequently, so change myself as a maintainer
with "Odd Fixes" status.

Link: http://lkml.kernel.org/r/20181106222750.12939-1-aaro.koskinen@iki.fi
Signed-off-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 CREDITS     | 4 ++++
 MAINTAINERS | 4 ++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/CREDITS b/CREDITS
index 5befd2d714d00..84cbec4c62115 100644
--- a/CREDITS
+++ b/CREDITS
@@ -2138,6 +2138,10 @@ E: paul@laufernet.com
 D: Soundblaster driver fixes, ISAPnP quirk
 S: California, USA
 
+N: Jarkko Lavinen
+E: jarkko.lavinen@nokia.com
+D: OMAP MMC support
+
 N: Jonathan Layes
 D: ARPD support
 
diff --git a/MAINTAINERS b/MAINTAINERS
index 6c3fbbb361f82..b755a89fa3256 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10808,9 +10808,9 @@ F:	drivers/media/platform/omap3isp/
 F:	drivers/staging/media/omap4iss/
 
 OMAP MMC SUPPORT
-M:	Jarkko Lavinen <jarkko.lavinen@nokia.com>
+M:	Aaro Koskinen <aaro.koskinen@iki.fi>
 L:	linux-omap@vger.kernel.org
-S:	Maintained
+S:	Odd Fixes
 F:	drivers/mmc/host/omap.c
 
 OMAP POWER MANAGEMENT SUPPORT
-- 
GitLab


From 873d7bcfd066663e3e50113dc4a0de19289b6354 Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@virtuozzo.com>
Date: Fri, 16 Nov 2018 15:08:11 -0800
Subject: [PATCH 1176/1890] mm/swapfile.c: use kvzalloc for swap_info_struct
 allocation

Commit a2468cc9bfdf ("swap: choose swap device according to numa node")
changed 'avail_lists' field of 'struct swap_info_struct' to an array.
In popular linux distros it increased size of swap_info_struct up to 40
Kbytes and now swap_info_struct allocation requires order-4 page.
Switch to kvzmalloc allows to avoid unexpected allocation failures.

Link: http://lkml.kernel.org/r/fc23172d-3c75-21e2-d551-8b1808cbe593@virtuozzo.com
Fixes: a2468cc9bfdf ("swap: choose swap device according to numa node")
Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
Acked-by: Aaron Lu <aaron.lu@intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Huang Ying <ying.huang@intel.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/swapfile.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mm/swapfile.c b/mm/swapfile.c
index 644f746e167ac..8688ae65ef58a 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2813,7 +2813,7 @@ static struct swap_info_struct *alloc_swap_info(void)
 	unsigned int type;
 	int i;
 
-	p = kzalloc(sizeof(*p), GFP_KERNEL);
+	p = kvzalloc(sizeof(*p), GFP_KERNEL);
 	if (!p)
 		return ERR_PTR(-ENOMEM);
 
@@ -2824,7 +2824,7 @@ static struct swap_info_struct *alloc_swap_info(void)
 	}
 	if (type >= MAX_SWAPFILES) {
 		spin_unlock(&swap_lock);
-		kfree(p);
+		kvfree(p);
 		return ERR_PTR(-EPERM);
 	}
 	if (type >= nr_swapfiles) {
@@ -2838,7 +2838,7 @@ static struct swap_info_struct *alloc_swap_info(void)
 		smp_wmb();
 		nr_swapfiles++;
 	} else {
-		kfree(p);
+		kvfree(p);
 		p = swap_info[type];
 		/*
 		 * Do not memset this entry: a racing procfs swap_next()
-- 
GitLab


From 9d7899999c62c1a81129b76d2a6ecbc4655e1597 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 16 Nov 2018 15:08:15 -0800
Subject: [PATCH 1177/1890] mm, memory_hotplug: check zone_movable in
 has_unmovable_pages

Page state checks are racy.  Under a heavy memory workload (e.g.  stress
-m 200 -t 2h) it is quite easy to hit a race window when the page is
allocated but its state is not fully populated yet.  A debugging patch to
dump the struct page state shows

  has_unmovable_pages: pfn:0x10dfec00, found:0x1, count:0x0
  page:ffffea0437fb0000 count:1 mapcount:1 mapping:ffff880e05239841 index:0x7f26e5000 compound_mapcount: 1
  flags: 0x5fffffc0090034(uptodate|lru|active|head|swapbacked)

Note that the state has been checked for both PageLRU and PageSwapBacked
already.  Closing this race completely would require some sort of retry
logic.  This can be tricky and error prone (think of potential endless
or long taking loops).

Workaround this problem for movable zones at least.  Such a zone should
only contain movable pages.  Commit 15c30bc09085 ("mm, memory_hotplug:
make has_unmovable_pages more robust") has told us that this is not
strictly true though.  Bootmem pages should be marked reserved though so
we can move the original check after the PageReserved check.  Pages from
other zones are still prone to races but we even do not pretend that
memory hotremove works for those so pre-mature failure doesn't hurt that
much.

Link: http://lkml.kernel.org/r/20181106095524.14629-1-mhocko@kernel.org
Fixes: 15c30bc09085 ("mm, memory_hotplug: make has_unmovable_pages more robust")
Signed-off-by: Michal Hocko <mhocko@suse.com>
Reported-by: Baoquan He <bhe@redhat.com>
Tested-by: Baoquan He <bhe@redhat.com>
Acked-by: Baoquan He <bhe@redhat.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Acked-by: Balbir Singh <bsingharora@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a919ba5cb3c84..a0e5ec0addd24 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -7788,6 +7788,14 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
 		if (PageReserved(page))
 			goto unmovable;
 
+		/*
+		 * If the zone is movable and we have ruled out all reserved
+		 * pages then it should be reasonably safe to assume the rest
+		 * is movable.
+		 */
+		if (zone_idx(zone) == ZONE_MOVABLE)
+			continue;
+
 		/*
 		 * Hugepages are not in LRU lists, but they're movable.
 		 * We need not scan over tail pages bacause we don't
-- 
GitLab


From a76cf1a474d7dbcd9336b5f5afb0162baa142cf0 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Fri, 16 Nov 2018 15:08:18 -0800
Subject: [PATCH 1178/1890] mm: don't reclaim inodes with many attached pages

Spock reported that commit 172b06c32b94 ("mm: slowly shrink slabs with a
relatively small number of objects") leads to a regression on his setup:
periodically the majority of the pagecache is evicted without an obvious
reason, while before the change the amount of free memory was balancing
around the watermark.

The reason behind is that the mentioned above change created some
minimal background pressure on the inode cache.  The problem is that if
an inode is considered to be reclaimed, all belonging pagecache page are
stripped, no matter how many of them are there.  So, if a huge
multi-gigabyte file is cached in the memory, and the goal is to reclaim
only few slab objects (unused inodes), we still can eventually evict all
gigabytes of the pagecache at once.

The workload described by Spock has few large non-mapped files in the
pagecache, so it's especially noticeable.

To solve the problem let's postpone the reclaim of inodes, which have
more than 1 attached page.  Let's wait until the pagecache pages will be
evicted naturally by scanning the corresponding LRU lists, and only then
reclaim the inode structure.

Link: http://lkml.kernel.org/r/20181023164302.20436-1-guro@fb.com
Signed-off-by: Roman Gushchin <guro@fb.com>
Reported-by: Spock <dairinin@gmail.com>
Tested-by: Spock <dairinin@gmail.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: <stable@vger.kernel.org>	[4.19.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/inode.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/fs/inode.c b/fs/inode.c
index 9e198f00b64c6..35d2108d567c2 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -730,8 +730,11 @@ static enum lru_status inode_lru_isolate(struct list_head *item,
 		return LRU_REMOVED;
 	}
 
-	/* recently referenced inodes get one more pass */
-	if (inode->i_state & I_REFERENCED) {
+	/*
+	 * Recently referenced inodes and inodes with many attached pages
+	 * get one more pass.
+	 */
+	if (inode->i_state & I_REFERENCED || inode->i_data.nrpages > 1) {
 		inode->i_state &= ~I_REFERENCED;
 		spin_unlock(&inode->i_lock);
 		return LRU_ROTATE;
-- 
GitLab


From f5f67cc0e0d3d17ee046ba83f831f267767b8554 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Fri, 16 Nov 2018 15:08:22 -0800
Subject: [PATCH 1179/1890] scripts/faddr2line: fix location of start_kernel in
 comment

Fix a source file reference location to the correct path name.

Link: http://lkml.kernel.org/r/1d50bd3d-178e-dcd8-779f-9711887440eb@infradead.org
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/faddr2line | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/faddr2line b/scripts/faddr2line
index a0149db00be75..6c6439f69a725 100755
--- a/scripts/faddr2line
+++ b/scripts/faddr2line
@@ -71,7 +71,7 @@ die() {
 
 # Try to figure out the source directory prefix so we can remove it from the
 # addr2line output.  HACK ALERT: This assumes that start_kernel() is in
-# kernel/init.c!  This only works for vmlinux.  Otherwise it falls back to
+# init/main.c!  This only works for vmlinux.  Otherwise it falls back to
 # printing the absolute path.
 find_dir_prefix() {
 	local objfile=$1
-- 
GitLab


From 5040f8df56fb90c7919f1c9b0b6e54c843437456 Mon Sep 17 00:00:00 2001
From: Wengang Wang <wen.gang.wang@oracle.com>
Date: Fri, 16 Nov 2018 15:08:25 -0800
Subject: [PATCH 1180/1890] ocfs2: free up write context when direct IO failed

The write context should also be freed even when direct IO failed.
Otherwise a memory leak is introduced and entries remain in
oi->ip_unwritten_list causing the following BUG later in unlink path:

  ERROR: bug expression: !list_empty(&oi->ip_unwritten_list)
  ERROR: Clear inode of 215043, inode has unwritten extents
  ...
  Call Trace:
  ? __set_current_blocked+0x42/0x68
  ocfs2_evict_inode+0x91/0x6a0 [ocfs2]
  ? bit_waitqueue+0x40/0x33
  evict+0xdb/0x1af
  iput+0x1a2/0x1f7
  do_unlinkat+0x194/0x28f
  SyS_unlinkat+0x1b/0x2f
  do_syscall_64+0x79/0x1ae
  entry_SYSCALL_64_after_hwframe+0x151/0x0

This patch also logs, with frequency limit, direct IO failures.

Link: http://lkml.kernel.org/r/20181102170632.25921-1-wen.gang.wang@oracle.com
Signed-off-by: Wengang Wang <wen.gang.wang@oracle.com>
Reviewed-by: Junxiao Bi <junxiao.bi@oracle.com>
Reviewed-by: Changwei Ge <ge.changwei@h3c.com>
Reviewed-by: Joseph Qi <jiangqi903@gmail.com>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ocfs2/aops.c            | 12 ++++++++++--
 fs/ocfs2/cluster/masklog.h |  9 +++++++++
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index da578ad4c08f4..eb1ce30412dc3 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -2411,8 +2411,16 @@ static int ocfs2_dio_end_io(struct kiocb *iocb,
 	/* this io's submitter should not have unlocked this before we could */
 	BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
 
-	if (bytes > 0 && private)
-		ret = ocfs2_dio_end_io_write(inode, private, offset, bytes);
+	if (bytes <= 0)
+		mlog_ratelimited(ML_ERROR, "Direct IO failed, bytes = %lld",
+				 (long long)bytes);
+	if (private) {
+		if (bytes > 0)
+			ret = ocfs2_dio_end_io_write(inode, private, offset,
+						     bytes);
+		else
+			ocfs2_dio_free_write_ctx(inode, private);
+	}
 
 	ocfs2_iocb_clear_rw_locked(iocb);
 
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index 308ea0eb35fd1..a396096a5099f 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -178,6 +178,15 @@ do {									\
 			      ##__VA_ARGS__);				\
 } while (0)
 
+#define mlog_ratelimited(mask, fmt, ...)				\
+do {									\
+	static DEFINE_RATELIMIT_STATE(_rs,				\
+				      DEFAULT_RATELIMIT_INTERVAL,	\
+				      DEFAULT_RATELIMIT_BURST);		\
+	if (__ratelimit(&_rs))						\
+		mlog(mask, fmt, ##__VA_ARGS__);				\
+} while (0)
+
 #define mlog_errno(st) ({						\
 	int _st = (st);							\
 	if (_st != -ERESTARTSYS && _st != -EINTR &&			\
-- 
GitLab


From 78179556e7605ba07fd3ddba6ab8891fa457b93e Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Fri, 16 Nov 2018 15:08:29 -0800
Subject: [PATCH 1181/1890] mm/gup.c: fix follow_page_mask() kerneldoc comment

Commit df06b37ffe5a ("mm/gup: cache dev_pagemap while pinning pages")
modified the signature of follow_page_mask() but left the parameter
description behind.

Update the description to make the code and comments agree again.

While at it, update formatting of the return value description to match
Documentation/doc-guide/kernel-doc.rst guidelines.

Link: http://lkml.kernel.org/r/1541603316-27832-1-git-send-email-rppt@linux.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/gup.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/mm/gup.c b/mm/gup.c
index f76e77a2d34b7..aa43620a3270e 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -385,11 +385,17 @@ static struct page *follow_p4d_mask(struct vm_area_struct *vma,
  * @vma: vm_area_struct mapping @address
  * @address: virtual address to look up
  * @flags: flags modifying lookup behaviour
- * @page_mask: on output, *page_mask is set according to the size of the page
+ * @ctx: contains dev_pagemap for %ZONE_DEVICE memory pinning and a
+ *       pointer to output page_mask
  *
  * @flags can have FOLL_ flags set, defined in <linux/mm.h>
  *
- * Returns the mapped (struct page *), %NULL if no mapping exists, or
+ * When getting pages from ZONE_DEVICE memory, the @ctx->pgmap caches
+ * the device's dev_pagemap metadata to avoid repeating expensive lookups.
+ *
+ * On output, the @ctx->page_mask is set according to the size of the page.
+ *
+ * Return: the mapped (struct page *), %NULL if no mapping exists, or
  * an error pointer if there is a mapping to something not represented
  * by a page descriptor (see also vm_normal_page()).
  */
-- 
GitLab


From 13c9aaf7fa01cc7600c61981609feadeef3354ec Mon Sep 17 00:00:00 2001
From: Janne Huttunen <janne.huttunen@nokia.com>
Date: Fri, 16 Nov 2018 15:08:32 -0800
Subject: [PATCH 1182/1890] mm/vmstat.c: fix NUMA statistics updates

Scan through the whole array to see if an update is needed.  While we're
at it, use sizeof() to be safe against any possible type changes in the
future.

The bug here is that we wouldn't sync per-cpu counters into global ones
if there was an update of numa_stats for higher cpus.  Highly
theoretical one though because it is much more probable that zone_stats
are updated so we would refresh anyway.  So I wouldn't bother to mark
this for stable, yet something nice to fix.

[mhocko@suse.com: changelog enhancement]
Link: http://lkml.kernel.org/r/1541601517-17282-1-git-send-email-janne.huttunen@nokia.com
Fixes: 1d90ca897cb0 ("mm: update NUMA counter threshold size")
Signed-off-by: Janne Huttunen <janne.huttunen@nokia.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/vmstat.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/mm/vmstat.c b/mm/vmstat.c
index 6038ce593ce3e..9c624595e9041 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1827,12 +1827,13 @@ static bool need_update(int cpu)
 
 		/*
 		 * The fast way of checking if there are any vmstat diffs.
-		 * This works because the diffs are byte sized items.
 		 */
-		if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS))
+		if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS *
+			       sizeof(p->vm_stat_diff[0])))
 			return true;
 #ifdef CONFIG_NUMA
-		if (memchr_inv(p->vm_numa_stat_diff, 0, NR_VM_NUMA_STAT_ITEMS))
+		if (memchr_inv(p->vm_numa_stat_diff, 0, NR_VM_NUMA_STAT_ITEMS *
+			       sizeof(p->vm_numa_stat_diff[0])))
 			return true;
 #endif
 	}
-- 
GitLab


From 1c23b4108d716cc848b38532063a8aca4f86add8 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 16 Nov 2018 15:08:35 -0800
Subject: [PATCH 1183/1890] lib/ubsan.c: don't mark
 __ubsan_handle_builtin_unreachable as noreturn

gcc-8 complains about the prototype for this function:

  lib/ubsan.c:432:1: error: ignoring attribute 'noreturn' in declaration of a built-in function '__ubsan_handle_builtin_unreachable' because it conflicts with attribute 'const' [-Werror=attributes]

This is actually a GCC's bug. In GCC internals
__ubsan_handle_builtin_unreachable() declared with both 'noreturn' and
'const' attributes instead of only 'noreturn':

   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=84210

Workaround this by removing the noreturn attribute.

[aryabinin: add information about GCC bug in changelog]
Link: http://lkml.kernel.org/r/20181107144516.4587-1-aryabinin@virtuozzo.com
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Acked-by: Olof Johansson <olof@lixom.net>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/ubsan.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/lib/ubsan.c b/lib/ubsan.c
index 59fee96c29a0f..e4162f59a81cc 100644
--- a/lib/ubsan.c
+++ b/lib/ubsan.c
@@ -427,8 +427,7 @@ void __ubsan_handle_shift_out_of_bounds(struct shift_out_of_bounds_data *data,
 EXPORT_SYMBOL(__ubsan_handle_shift_out_of_bounds);
 
 
-void __noreturn
-__ubsan_handle_builtin_unreachable(struct unreachable_data *data)
+void __ubsan_handle_builtin_unreachable(struct unreachable_data *data)
 {
 	unsigned long flags;
 
-- 
GitLab


From 1a413646931cb14442065cfc17561e50f5b5bb44 Mon Sep 17 00:00:00 2001
From: Yufen Yu <yuyufen@huawei.com>
Date: Fri, 16 Nov 2018 15:08:39 -0800
Subject: [PATCH 1184/1890] tmpfs: make lseek(SEEK_DATA/SEK_HOLE) return ENXIO
 with a negative offset

Other filesystems such as ext4, f2fs and ubifs all return ENXIO when
lseek (SEEK_DATA or SEEK_HOLE) requests a negative offset.

man 2 lseek says

:      EINVAL whence  is  not  valid.   Or: the resulting file offset would be
:             negative, or beyond the end of a seekable device.
:
:      ENXIO  whence is SEEK_DATA or SEEK_HOLE, and the file offset is  beyond
:             the end of the file.

Make tmpfs return ENXIO under these circumstances as well.  After this,
tmpfs also passes xfstests's generic/448.

[akpm@linux-foundation.org: rewrite changelog]
Link: http://lkml.kernel.org/r/1540434176-14349-1-git-send-email-yuyufen@huawei.com
Signed-off-by: Yufen Yu <yuyufen@huawei.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Hugh Dickins <hughd@google.com>
Cc: William Kucharski <william.kucharski@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/shmem.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index ea26d7a0342d7..d44991ea5ed4b 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2563,9 +2563,7 @@ static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
 	inode_lock(inode);
 	/* We're holding i_mutex so we can access i_size directly */
 
-	if (offset < 0)
-		offset = -EINVAL;
-	else if (offset >= inode->i_size)
+	if (offset < 0 || offset >= inode->i_size)
 		offset = -ENXIO;
 	else {
 		start = offset >> PAGE_SHIFT;
-- 
GitLab


From 6f4d29df66acd49303a99025046b85cabe7aa17a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 16 Nov 2018 15:08:43 -0800
Subject: [PATCH 1185/1890] scripts/spdxcheck.py: make python3 compliant
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Without this change the following happens when using Python3 (3.6.6):

	$ echo "GPL-2.0" | python3 scripts/spdxcheck.py -
	FAIL: 'str' object has no attribute 'decode'
	Traceback (most recent call last):
	  File "scripts/spdxcheck.py", line 253, in <module>
	    parser.parse_lines(sys.stdin, args.maxlines, '-')
	  File "scripts/spdxcheck.py", line 171, in parse_lines
	    line = line.decode(locale.getpreferredencoding(False), errors='ignore')
	AttributeError: 'str' object has no attribute 'decode'

So as the line is already a string, there is no need to decode it and
the line can be dropped.

/usr/bin/python on Arch is Python 3.  So this would indeed be worth
going into 4.19.

Link: http://lkml.kernel.org/r/20181023070802.22558-1-u.kleine-koenig@pengutronix.de
Signed-off-by: Uwe Kleine-KÃ¶nig <u.kleine-koenig@pengutronix.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Joe Perches <joe@perches.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/spdxcheck.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/scripts/spdxcheck.py b/scripts/spdxcheck.py
index 839e190bbd7a0..5056fb3b897d0 100755
--- a/scripts/spdxcheck.py
+++ b/scripts/spdxcheck.py
@@ -168,7 +168,6 @@ class id_parser(object):
         self.curline = 0
         try:
             for line in fd:
-                line = line.decode(locale.getpreferredencoding(False), errors='ignore')
                 self.curline += 1
                 if self.curline > maxlines:
                     break
-- 
GitLab


From c63ae43ba53bc432b414fd73dd5f4b01fcb1ab43 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 16 Nov 2018 15:08:53 -0800
Subject: [PATCH 1186/1890] mm, page_alloc: check for max order in hot path

Konstantin has noticed that kvmalloc might trigger the following
warning:

  WARNING: CPU: 0 PID: 6676 at mm/vmstat.c:986 __fragmentation_index+0x54/0x60
  [...]
  Call Trace:
   fragmentation_index+0x76/0x90
   compaction_suitable+0x4f/0xf0
   shrink_node+0x295/0x310
   node_reclaim+0x205/0x250
   get_page_from_freelist+0x649/0xad0
   __alloc_pages_nodemask+0x12a/0x2a0
   kmalloc_large_node+0x47/0x90
   __kmalloc_node+0x22b/0x2e0
   kvmalloc_node+0x3e/0x70
   xt_alloc_table_info+0x3a/0x80 [x_tables]
   do_ip6t_set_ctl+0xcd/0x1c0 [ip6_tables]
   nf_setsockopt+0x44/0x60
   SyS_setsockopt+0x6f/0xc0
   do_syscall_64+0x67/0x120
   entry_SYSCALL_64_after_hwframe+0x3d/0xa2

the problem is that we only check for an out of bound order in the slow
path and the node reclaim might happen from the fast path already.  This
is fixable by making sure that kvmalloc doesn't ever use kmalloc for
requests that are larger than KMALLOC_MAX_SIZE but this also shows that
the code is rather fragile.  A recent UBSAN report just underlines that
by the following report

  UBSAN: Undefined behaviour in mm/page_alloc.c:3117:19
  shift exponent 51 is too large for 32-bit type 'int'
  CPU: 0 PID: 6520 Comm: syz-executor1 Not tainted 4.19.0-rc2 #1
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
  Call Trace:
   __dump_stack lib/dump_stack.c:77 [inline]
   dump_stack+0xd2/0x148 lib/dump_stack.c:113
   ubsan_epilogue+0x12/0x94 lib/ubsan.c:159
   __ubsan_handle_shift_out_of_bounds+0x2b6/0x30b lib/ubsan.c:425
   __zone_watermark_ok+0x2c7/0x400 mm/page_alloc.c:3117
   zone_watermark_fast mm/page_alloc.c:3216 [inline]
   get_page_from_freelist+0xc49/0x44c0 mm/page_alloc.c:3300
   __alloc_pages_nodemask+0x21e/0x640 mm/page_alloc.c:4370
   alloc_pages_current+0xcc/0x210 mm/mempolicy.c:2093
   alloc_pages include/linux/gfp.h:509 [inline]
   __get_free_pages+0x12/0x60 mm/page_alloc.c:4414
   dma_mem_alloc+0x36/0x50 arch/x86/include/asm/floppy.h:156
   raw_cmd_copyin drivers/block/floppy.c:3159 [inline]
   raw_cmd_ioctl drivers/block/floppy.c:3206 [inline]
   fd_locked_ioctl+0xa00/0x2c10 drivers/block/floppy.c:3544
   fd_ioctl+0x40/0x60 drivers/block/floppy.c:3571
   __blkdev_driver_ioctl block/ioctl.c:303 [inline]
   blkdev_ioctl+0xb3c/0x1a30 block/ioctl.c:601
   block_ioctl+0x105/0x150 fs/block_dev.c:1883
   vfs_ioctl fs/ioctl.c:46 [inline]
   do_vfs_ioctl+0x1c0/0x1150 fs/ioctl.c:687
   ksys_ioctl+0x9e/0xb0 fs/ioctl.c:702
   __do_sys_ioctl fs/ioctl.c:709 [inline]
   __se_sys_ioctl fs/ioctl.c:707 [inline]
   __x64_sys_ioctl+0x7e/0xc0 fs/ioctl.c:707
   do_syscall_64+0xc4/0x510 arch/x86/entry/common.c:290
   entry_SYSCALL_64_after_hwframe+0x49/0xbe

Note that this is not a kvmalloc path.  It is just that the fast path
really depends on having sanitzed order as well.  Therefore move the
order check to the fast path.

Link: http://lkml.kernel.org/r/20181113094305.GM15120@dhcp22.suse.cz
Signed-off-by: Michal Hocko <mhocko@suse.com>
Reported-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Reported-by: Kyungtae Kim <kt0755@gmail.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Pavel Tatashin <pavel.tatashin@microsoft.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Aaron Lu <aaron.lu@intel.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Byoungyoung Lee <lifeasageek@gmail.com>
Cc: "Dae R. Jeong" <threeearcat@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a0e5ec0addd24..6847177dc4a1a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4060,17 +4060,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 	unsigned int cpuset_mems_cookie;
 	int reserve_flags;
 
-	/*
-	 * In the slowpath, we sanity check order to avoid ever trying to
-	 * reclaim >= MAX_ORDER areas which will never succeed. Callers may
-	 * be using allocators in order of preference for an area that is
-	 * too large.
-	 */
-	if (order >= MAX_ORDER) {
-		WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN));
-		return NULL;
-	}
-
 	/*
 	 * We also sanity check to catch abuse of atomic reserves being used by
 	 * callers that are not in atomic context.
@@ -4364,6 +4353,15 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
 	gfp_t alloc_mask; /* The gfp_t that was actually used for allocation */
 	struct alloc_context ac = { };
 
+	/*
+	 * There are several places where we assume that the order value is sane
+	 * so bail out early if the request is out of bound.
+	 */
+	if (unlikely(order >= MAX_ORDER)) {
+		WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN));
+		return NULL;
+	}
+
 	gfp_mask &= gfp_allowed_mask;
 	alloc_mask = gfp_mask;
 	if (!prepare_alloc_pages(gfp_mask, order, preferred_nid, nodemask, &ac, &alloc_mask, &alloc_flags))
-- 
GitLab


From 45e79815b89149dc6698e71b587c86ffaf4062aa Mon Sep 17 00:00:00 2001
From: Chen Chang <rainccrun@gmail.com>
Date: Fri, 16 Nov 2018 15:08:57 -0800
Subject: [PATCH 1187/1890] mm/memblock.c: fix a typo in __next_mem_pfn_range()
 comments

Link: http://lkml.kernel.org/r/20181107100247.13359-1-rainccrun@gmail.com
Signed-off-by: Chen Chang <rainccrun@gmail.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memblock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/memblock.c b/mm/memblock.c
index 7df468c8ebc8c..9a2d5ae81ae1c 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1179,7 +1179,7 @@ void __init_memblock __next_mem_range_rev(u64 *idx, int nid,
 
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
 /*
- * Common iterator interface used to define for_each_mem_range().
+ * Common iterator interface used to define for_each_mem_pfn_range().
  */
 void __init_memblock __next_mem_pfn_range(int *idx, int nid,
 				unsigned long *out_start_pfn,
-- 
GitLab


From 9ff01193a20d391e8dbce4403dd5ef87c7eaaca6 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 18 Nov 2018 13:33:44 -0800
Subject: [PATCH 1188/1890] Linux 4.20-rc3

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 2f36db8978953..ddbf627cad8f5 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 4
 PATCHLEVEL = 20
 SUBLEVEL = 0
-EXTRAVERSION = -rc2
+EXTRAVERSION = -rc3
 NAME = "People's Front"
 
 # *DOCUMENTATION*
-- 
GitLab


From 09aaf6813cfca4c18034fda7a43e68763f34abb1 Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhc@lemote.com>
Date: Thu, 15 Nov 2018 10:44:57 +0800
Subject: [PATCH 1189/1890] hwmon: (w83795) temp4_type has writable permission

Both datasheet and comments of store_temp_mode() tell us that temp1~4_type
is writable, so fix it.

Signed-off-by: Yao Wang <wangyao@lemote.com>
Signed-off-by: Huacai Chen <chenhc@lemote.com>
Fixes: 39deb6993e7c (" hwmon: (w83795) Simplify temperature sensor type handling")
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/w83795.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/w83795.c b/drivers/hwmon/w83795.c
index 49276bbdac3dd..1bb80f992aa86 100644
--- a/drivers/hwmon/w83795.c
+++ b/drivers/hwmon/w83795.c
@@ -1691,7 +1691,7 @@ store_sf_setup(struct device *dev, struct device_attribute *attr,
  * somewhere else in the code
  */
 #define SENSOR_ATTR_TEMP(index) {					\
-	SENSOR_ATTR_2(temp##index##_type, S_IRUGO | (index < 4 ? S_IWUSR : 0), \
+	SENSOR_ATTR_2(temp##index##_type, S_IRUGO | (index < 5 ? S_IWUSR : 0), \
 		show_temp_mode, store_temp_mode, NOT_USED, index - 1),	\
 	SENSOR_ATTR_2(temp##index##_input, S_IRUGO, show_temp,		\
 		NULL, TEMP_READ, index - 1),				\
-- 
GitLab


From 5478ad10e7850ce3d8b7056db05ddfa3c9ddad9a Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Thu, 15 Nov 2018 11:42:16 +0100
Subject: [PATCH 1190/1890] drm/ast: Remove existing framebuffers before
 loading driver

If vesafb attaches to the AST device, it configures the framebuffer memory
for uncached access by default. When ast.ko later tries to attach itself to
the device, it wants to use write-combining on the framebuffer memory, but
vesefb's existing configuration for uncached access takes precedence. This
results in reduced performance.

Removing the framebuffer's configuration before loding the AST driver fixes
the problem. Other DRM drivers already contain equivalent code.

Link: https://bugzilla.opensuse.org/show_bug.cgi?id=1112963
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Cc: <stable@vger.kernel.org>
Tested-by: Y.C. Chen <yc_chen@aspeedtech.com>
Reviewed-by: Jean Delvare <jdelvare@suse.de>
Tested-by: Jean Delvare <jdelvare@suse.de>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/ast/ast_drv.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/drivers/gpu/drm/ast/ast_drv.c b/drivers/gpu/drm/ast/ast_drv.c
index 69dab82a37714..bf589c53b908d 100644
--- a/drivers/gpu/drm/ast/ast_drv.c
+++ b/drivers/gpu/drm/ast/ast_drv.c
@@ -60,8 +60,29 @@ static const struct pci_device_id pciidlist[] = {
 
 MODULE_DEVICE_TABLE(pci, pciidlist);
 
+static void ast_kick_out_firmware_fb(struct pci_dev *pdev)
+{
+	struct apertures_struct *ap;
+	bool primary = false;
+
+	ap = alloc_apertures(1);
+	if (!ap)
+		return;
+
+	ap->ranges[0].base = pci_resource_start(pdev, 0);
+	ap->ranges[0].size = pci_resource_len(pdev, 0);
+
+#ifdef CONFIG_X86
+	primary = pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW;
+#endif
+	drm_fb_helper_remove_conflicting_framebuffers(ap, "astdrmfb", primary);
+	kfree(ap);
+}
+
 static int ast_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
+	ast_kick_out_firmware_fb(pdev);
+
 	return drm_get_pci_dev(pdev, ent, &driver);
 }
 
-- 
GitLab


From 86322ba9571a36b0b149a5f27fc3423c2dadbe39 Mon Sep 17 00:00:00 2001
From: Sabyasachi Gupta <sabyasachi.linux@gmail.com>
Date: Sat, 3 Nov 2018 10:54:52 +0530
Subject: [PATCH 1191/1890] arch/sparc: Use kzalloc_node

Replaced kmalloc_node + memset with kzalloc_node

Signed-off-by: Sabyasachi Gupta <sabyasachi.linux@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/iommu.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c
index 40d008b0bd3e9..05eb016fc41be 100644
--- a/arch/sparc/kernel/iommu.c
+++ b/arch/sparc/kernel/iommu.c
@@ -108,10 +108,9 @@ int iommu_table_init(struct iommu *iommu, int tsbsize,
 	/* Allocate and initialize the free area map.  */
 	sz = num_tsb_entries / 8;
 	sz = (sz + 7UL) & ~7UL;
-	iommu->tbl.map = kmalloc_node(sz, GFP_KERNEL, numa_node);
+	iommu->tbl.map = kzalloc_node(sz, GFP_KERNEL, numa_node);
 	if (!iommu->tbl.map)
 		return -ENOMEM;
-	memset(iommu->tbl.map, 0, sz);
 
 	iommu_tbl_pool_init(&iommu->tbl, num_tsb_entries, IO_PAGE_SHIFT,
 			    (tlb_type != hypervisor ? iommu_flushall : NULL),
-- 
GitLab


From 7ddacfa564870cdd97275fd87decb6174abc6380 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Sun, 18 Nov 2018 10:45:30 -0800
Subject: [PATCH 1192/1890] ipv6: Fix PMTU updates for UDP/raw sockets in
 presence of VRF

Preethi reported that PMTU discovery for UDP/raw applications is not
working in the presence of VRF when the socket is not bound to a device.
The problem is that ip6_sk_update_pmtu does not consider the L3 domain
of the skb device if the socket is not bound. Update the function to
set oif to the L3 master device if relevant.

Fixes: ca254490c8df ("net: Add VRF support to IPv6 stack")
Reported-by: Preethi Ramachandra <preethir@juniper.net>
Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 14b422f35504f..059f0531f7c1c 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2359,10 +2359,13 @@ EXPORT_SYMBOL_GPL(ip6_update_pmtu);
 
 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
 {
+	int oif = sk->sk_bound_dev_if;
 	struct dst_entry *dst;
 
-	ip6_update_pmtu(skb, sock_net(sk), mtu,
-			sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
+	if (!oif && skb->dev)
+		oif = l3mdev_master_ifindex(skb->dev);
+
+	ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid);
 
 	dst = __sk_dst_get(sk);
 	if (!dst || !dst->obsolete ||
-- 
GitLab


From 8ebebcba559a1bfbaec7bbda64feb9870b9c58da Mon Sep 17 00:00:00 2001
From: Matthew Cover <werekraken@gmail.com>
Date: Sun, 18 Nov 2018 00:46:00 -0700
Subject: [PATCH 1193/1890] tuntap: fix multiqueue rx

When writing packets to a descriptor associated with a combined queue, the
packets should end up on that queue.

Before this change all packets written to any descriptor associated with a
tap interface end up on rx-0, even when the descriptor is associated with a
different queue.

The rx traffic can be generated by either of the following.
  1. a simple tap program which spins up multiple queues and writes packets
     to each of the file descriptors
  2. tx from a qemu vm with a tap multiqueue netdev

The queue for rx traffic can be observed by either of the following (done
on the hypervisor in the qemu case).
  1. a simple netmap program which opens and reads from per-queue
     descriptors
  2. configuring RPS and doing per-cpu captures with rxtxcpu

Alternatively, if you printk() the return value of skb_get_rx_queue() just
before each instance of netif_receive_skb() in tun.c, you will get 65535
for every skb.

Calling skb_record_rx_queue() to set the rx queue to the queue_index fixes
the association between descriptor and rx queue.

Signed-off-by: Matthew Cover <matthew.cover@stackpath.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tun.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 060135ceaf0e1..e244f5d7512a6 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1536,6 +1536,7 @@ static void tun_rx_batched(struct tun_struct *tun, struct tun_file *tfile,
 
 	if (!rx_batched || (!more && skb_queue_empty(queue))) {
 		local_bh_disable();
+		skb_record_rx_queue(skb, tfile->queue_index);
 		netif_receive_skb(skb);
 		local_bh_enable();
 		return;
@@ -1555,8 +1556,11 @@ static void tun_rx_batched(struct tun_struct *tun, struct tun_file *tfile,
 		struct sk_buff *nskb;
 
 		local_bh_disable();
-		while ((nskb = __skb_dequeue(&process_queue)))
+		while ((nskb = __skb_dequeue(&process_queue))) {
+			skb_record_rx_queue(nskb, tfile->queue_index);
 			netif_receive_skb(nskb);
+		}
+		skb_record_rx_queue(skb, tfile->queue_index);
 		netif_receive_skb(skb);
 		local_bh_enable();
 	}
@@ -2451,6 +2455,7 @@ static int tun_xdp_one(struct tun_struct *tun,
 	if (!rcu_dereference(tun->steering_prog))
 		rxhash = __skb_get_hash_symmetric(skb);
 
+	skb_record_rx_queue(skb, tfile->queue_index);
 	netif_receive_skb(skb);
 
 	stats = get_cpu_ptr(tun->pcpu_stats);
-- 
GitLab


From 909e22e05353a783c526829427e9a8de122fba9c Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Mon, 19 Nov 2018 11:32:41 +0800
Subject: [PATCH 1194/1890] exportfs: fix 'passing zero to ERR_PTR()' warning

Fix a static code checker warning:
  fs/exportfs/expfs.c:171 reconnect_one() warn: passing zero to 'ERR_PTR'

The error path for lookup_one_len_unlocked failure
should set err to PTR_ERR.

Fixes: bbf7a8a3562f ("exportfs: move most of reconnect_path to helper function")
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/exportfs/expfs.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 645158dc33f1f..c8a3dfda17646 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -147,6 +147,7 @@ static struct dentry *reconnect_one(struct vfsmount *mnt,
 	tmp = lookup_one_len_unlocked(nbuf, parent, strlen(nbuf));
 	if (IS_ERR(tmp)) {
 		dprintk("%s: lookup failed: %d\n", __func__, PTR_ERR(tmp));
+		err = PTR_ERR(tmp);
 		goto out_err;
 	}
 	if (tmp != dentry) {
-- 
GitLab


From 672e60b72bbe7aace88721db55b380b6a51fb8f9 Mon Sep 17 00:00:00 2001
From: Heiko Stuebner <heiko@sntech.de>
Date: Sun, 18 Nov 2018 20:03:02 +0100
Subject: [PATCH 1195/1890] ARM: dts: rockchip: Remove @0 from the veyron
 memory node

The Coreboot version on veyron ChromeOS devices seems to ignore
memory@0 nodes when updating the available memory and instead
inserts another memory node without the address.

This leads to 4GB systems only ever be using 2GB as the memory@0
node takes precedence. So remove the @0 for veyron devices.

Fixes: 0b639b815f15 ("ARM: dts: rockchip: Add missing unit name to memory nodes in rk3288 boards")
Cc: stable@vger.kernel.org
Reported-by: Heikki Lindholm <holin@iki.fi>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
---
 arch/arm/boot/dts/rk3288-veyron.dtsi | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/rk3288-veyron.dtsi b/arch/arm/boot/dts/rk3288-veyron.dtsi
index 2075120cfc4d7..d8bf939a3aff9 100644
--- a/arch/arm/boot/dts/rk3288-veyron.dtsi
+++ b/arch/arm/boot/dts/rk3288-veyron.dtsi
@@ -10,7 +10,11 @@
 #include "rk3288.dtsi"
 
 / {
-	memory@0 {
+	/*
+	 * The default coreboot on veyron devices ignores memory@0 nodes
+	 * and would instead create another memory node.
+	 */
+	memory {
 		device_type = "memory";
 		reg = <0x0 0x0 0x0 0x80000000>;
 	};
-- 
GitLab


From b54e41f5efcb4316b2f30b30c2535cc194270373 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 16 Nov 2018 13:43:17 +0100
Subject: [PATCH 1196/1890] udf: Allow mounting volumes with incorrect
 identification strings

Commit c26f6c615788 ("udf: Fix conversion of 'dstring' fields to UTF8")
started to be more strict when checking whether converted strings are
properly formatted. Sudip reports that there are DVDs where the volume
identification string is actually too long - UDF reports:

[  632.309320] UDF-fs: incorrect dstring lengths (32/32)

during mount and fails the mount. This is mostly harmless failure as we
don't need volume identification (and even less volume set
identification) for anything. So just truncate the volume identification
string if it is too long and replace it with 'Invalid' if we just cannot
convert it for other reasons. This keeps slightly incorrect media still
mountable.

CC: stable@vger.kernel.org
Fixes: c26f6c615788 ("udf: Fix conversion of 'dstring' fields to UTF8")
Reported-and-tested-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/udf/super.c   | 16 ++++++++++------
 fs/udf/unicode.c | 14 +++++++++++---
 2 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/fs/udf/super.c b/fs/udf/super.c
index 8f2f56d9a1bbf..e3d684ea32030 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -827,16 +827,20 @@ static int udf_load_pvoldesc(struct super_block *sb, sector_t block)
 
 
 	ret = udf_dstrCS0toChar(sb, outstr, 31, pvoldesc->volIdent, 32);
-	if (ret < 0)
-		goto out_bh;
-
-	strncpy(UDF_SB(sb)->s_volume_ident, outstr, ret);
+	if (ret < 0) {
+		strcpy(UDF_SB(sb)->s_volume_ident, "InvalidName");
+		pr_warn("incorrect volume identification, setting to "
+			"'InvalidName'\n");
+	} else {
+		strncpy(UDF_SB(sb)->s_volume_ident, outstr, ret);
+	}
 	udf_debug("volIdent[] = '%s'\n", UDF_SB(sb)->s_volume_ident);
 
 	ret = udf_dstrCS0toChar(sb, outstr, 127, pvoldesc->volSetIdent, 128);
-	if (ret < 0)
+	if (ret < 0) {
+		ret = 0;
 		goto out_bh;
-
+	}
 	outstr[ret] = 0;
 	udf_debug("volSetIdent[] = '%s'\n", outstr);
 
diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c
index 45234791fec28..5fcfa96463ebb 100644
--- a/fs/udf/unicode.c
+++ b/fs/udf/unicode.c
@@ -351,6 +351,11 @@ static int udf_name_to_CS0(struct super_block *sb,
 	return u_len;
 }
 
+/*
+ * Convert CS0 dstring to output charset. Warning: This function may truncate
+ * input string if it is too long as it is used for informational strings only
+ * and it is better to truncate the string than to refuse mounting a media.
+ */
 int udf_dstrCS0toChar(struct super_block *sb, uint8_t *utf_o, int o_len,
 		      const uint8_t *ocu_i, int i_len)
 {
@@ -359,9 +364,12 @@ int udf_dstrCS0toChar(struct super_block *sb, uint8_t *utf_o, int o_len,
 	if (i_len > 0) {
 		s_len = ocu_i[i_len - 1];
 		if (s_len >= i_len) {
-			pr_err("incorrect dstring lengths (%d/%d)\n",
-			       s_len, i_len);
-			return -EINVAL;
+			pr_warn("incorrect dstring lengths (%d/%d),"
+				" truncating\n", s_len, i_len);
+			s_len = i_len - 1;
+			/* 2-byte encoding? Need to round properly... */
+			if (ocu_i[0] == 16)
+				s_len -= (s_len - 1) & 2;
 		}
 	}
 
-- 
GitLab


From ae6b3e54aa52cd29965b8e4e47000ed2c5d78eb8 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sun, 18 Nov 2018 20:25:35 +0100
Subject: [PATCH 1197/1890] ACPICA: Fix handling of buffer-size in
 acpi_ex_write_data_to_field()

Generic Serial Bus transfers use a data struct like this:

struct gsb_buffer {
        u8      status;
        u8      len;
        u8      data[0];
};

acpi_ex_write_data_to_field() copies the data which is to be written from
the source-buffer to a temp-buffer. This is done because the OpReg-handler
overwrites the status field and some transfers do a write + read-back.

Commit f99b89eefeb6 ("ACPICA: Update for generic_serial_bus and
attrib_raw_process_bytes protocol") acpi_ex_write_data_to_field()
introduces a number of problems with this:

 1) It drops a "length += 2" statement used to calculate the temp-buffer
 size causing the temp-buffer to only be 1/2 bytes large for byte/word
 transfers while it should be 3/4 bytes (taking the status and len field
 into account). This is already fixed in commit e324e10109fc ("ACPICA:
 Update for field unit access") which refactors the code.

The ACPI 6.0 spec (ACPI_6.0.pdf) "5.5.2.4.5.2 Declaring and Using a
GenericSerialBusData Buffer" (page 232) states that the GenericSerialBus
Data Buffer Length field is only valid when doing a Read/Write Block
(AttribBlock) transfer, but since the troublesome commit we unconditionally
use the len field to determine how much data to copy from the source-buffer
into the temp-buffer passed to the OpRegion.

This causes 3 further issues:

 2) This may lead to not copying enough data to the temp-buffer causing the
 OpRegion handler for the serial-bus to write garbage to the hardware.

 3) The temp-buffer passed to the OpRegion is allocated to the size
 returned by acpi_ex_get_serial_access_length(), which may be as little
 as 1, so potentially this may lead to a write overflow of the temp-buffer.

 4) Commit e324e10109fc ("ACPICA: Update for field unit access") drops a
 length check on the source-buffer, leading to a potential read overflow
 of the source-buffer.

This commit fixes all 3 remaining issues by not looking at the len field at
all (the interpretation of this field is left up to the OpRegion handler),
and copying the minimum of the source- and temp-buffer sizes from the
source-buffer to the temp-buffer.

This fixes e.g. an Acer S1003 no longer booting since the troublesome
commit.

Fixes: f99b89eefeb6 (ACPICA: Update for generic_serial_bus and ...)
Fixes: e324e10109fc (ACPICA: Update for field unit access)
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/exserial.c | 21 ++-------------------
 1 file changed, 2 insertions(+), 19 deletions(-)

diff --git a/drivers/acpi/acpica/exserial.c b/drivers/acpi/acpica/exserial.c
index 0d42f30e5b25d..9920fac6413ff 100644
--- a/drivers/acpi/acpica/exserial.c
+++ b/drivers/acpi/acpica/exserial.c
@@ -244,7 +244,6 @@ acpi_ex_write_serial_bus(union acpi_operand_object *source_desc,
 {
 	acpi_status status;
 	u32 buffer_length;
-	u32 data_length;
 	void *buffer;
 	union acpi_operand_object *buffer_desc;
 	u32 function;
@@ -282,14 +281,12 @@ acpi_ex_write_serial_bus(union acpi_operand_object *source_desc,
 	case ACPI_ADR_SPACE_SMBUS:
 
 		buffer_length = ACPI_SMBUS_BUFFER_SIZE;
-		data_length = ACPI_SMBUS_DATA_SIZE;
 		function = ACPI_WRITE | (obj_desc->field.attribute << 16);
 		break;
 
 	case ACPI_ADR_SPACE_IPMI:
 
 		buffer_length = ACPI_IPMI_BUFFER_SIZE;
-		data_length = ACPI_IPMI_DATA_SIZE;
 		function = ACPI_WRITE;
 		break;
 
@@ -310,7 +307,6 @@ acpi_ex_write_serial_bus(union acpi_operand_object *source_desc,
 		/* Add header length to get the full size of the buffer */
 
 		buffer_length += ACPI_SERIAL_HEADER_SIZE;
-		data_length = source_desc->buffer.pointer[1];
 		function = ACPI_WRITE | (accessor_type << 16);
 		break;
 
@@ -318,20 +314,6 @@ acpi_ex_write_serial_bus(union acpi_operand_object *source_desc,
 		return_ACPI_STATUS(AE_AML_INVALID_SPACE_ID);
 	}
 
-#if 0
-	OBSOLETE ?
-	    /* Check for possible buffer overflow */
-	    if (data_length > source_desc->buffer.length) {
-		ACPI_ERROR((AE_INFO,
-			    "Length in buffer header (%u)(%u) is greater than "
-			    "the physical buffer length (%u) and will overflow",
-			    data_length, buffer_length,
-			    source_desc->buffer.length));
-
-		return_ACPI_STATUS(AE_AML_BUFFER_LIMIT);
-	}
-#endif
-
 	/* Create the transfer/bidirectional/return buffer */
 
 	buffer_desc = acpi_ut_create_buffer_object(buffer_length);
@@ -342,7 +324,8 @@ acpi_ex_write_serial_bus(union acpi_operand_object *source_desc,
 	/* Copy the input buffer data to the transfer buffer */
 
 	buffer = buffer_desc->buffer.pointer;
-	memcpy(buffer, source_desc->buffer.pointer, data_length);
+	memcpy(buffer, source_desc->buffer.pointer,
+	       min(buffer_length, source_desc->buffer.length));
 
 	/* Lock entire transaction if requested */
 
-- 
GitLab


From d98ccfc3948ab63152494bb6b9c17e15295c0310 Mon Sep 17 00:00:00 2001
From: Dave Gerlach <d-gerlach@ti.com>
Date: Tue, 13 Nov 2018 13:30:40 -0600
Subject: [PATCH 1198/1890] cpufreq: ti-cpufreq: Only register platform_device
 when supported

Currently the ti-cpufreq driver blindly registers a 'ti-cpufreq' to force
the driver to probe on any platforms where the driver is built in.
However, this should only happen on platforms that actually can make use
of the driver. There is already functionality in place to match the
SoC compatible so let's factor this out into a separate call and
make sure we find a match before creating the ti-cpufreq platform device.

Reviewed-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Dave Gerlach <d-gerlach@ti.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/ti-cpufreq.c | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/drivers/cpufreq/ti-cpufreq.c b/drivers/cpufreq/ti-cpufreq.c
index 3f0e2a14895a0..22b53bf268179 100644
--- a/drivers/cpufreq/ti-cpufreq.c
+++ b/drivers/cpufreq/ti-cpufreq.c
@@ -201,19 +201,28 @@ static const struct of_device_id ti_cpufreq_of_match[] = {
 	{},
 };
 
+static const struct of_device_id *ti_cpufreq_match_node(void)
+{
+	struct device_node *np;
+	const struct of_device_id *match;
+
+	np = of_find_node_by_path("/");
+	match = of_match_node(ti_cpufreq_of_match, np);
+	of_node_put(np);
+
+	return match;
+}
+
 static int ti_cpufreq_probe(struct platform_device *pdev)
 {
 	u32 version[VERSION_COUNT];
-	struct device_node *np;
 	const struct of_device_id *match;
 	struct opp_table *ti_opp_table;
 	struct ti_cpufreq_data *opp_data;
 	const char * const reg_names[] = {"vdd", "vbb"};
 	int ret;
 
-	np = of_find_node_by_path("/");
-	match = of_match_node(ti_cpufreq_of_match, np);
-	of_node_put(np);
+	match = dev_get_platdata(&pdev->dev);
 	if (!match)
 		return -ENODEV;
 
@@ -290,7 +299,14 @@ static int ti_cpufreq_probe(struct platform_device *pdev)
 
 static int ti_cpufreq_init(void)
 {
-	platform_device_register_simple("ti-cpufreq", -1, NULL, 0);
+	const struct of_device_id *match;
+
+	/* Check to ensure we are on a compatible platform */
+	match = ti_cpufreq_match_node();
+	if (match)
+		platform_device_register_data(NULL, "ti-cpufreq", -1, match,
+					      sizeof(*match));
+
 	return 0;
 }
 module_init(ti_cpufreq_init);
-- 
GitLab


From c22397888f1eed98cd59f0a88f2a5f6925f80e15 Mon Sep 17 00:00:00 2001
From: Chanho Min <chanho.min@lge.com>
Date: Mon, 12 Nov 2018 12:54:45 +0900
Subject: [PATCH 1199/1890] exec: make de_thread() freezable

Suspend fails due to the exec family of functions blocking the freezer.
The casue is that de_thread() sleeps in TASK_UNINTERRUPTIBLE waiting for
all sub-threads to die, and we have the deadlock if one of them is frozen.
This also can occur with the schedule() waiting for the group thread leader
to exit if it is frozen.

In our machine, it causes freeze timeout as bellows.

Freezing of tasks failed after 20.010 seconds (1 tasks refusing to freeze, wq_busy=0):
setcpushares-ls D ffffffc00008ed70     0  5817   1483 0x0040000d
 Call trace:
[<ffffffc00008ed70>] __switch_to+0x88/0xa0
[<ffffffc000d1c30c>] __schedule+0x1bc/0x720
[<ffffffc000d1ca90>] schedule+0x40/0xa8
[<ffffffc0001cd784>] flush_old_exec+0xdc/0x640
[<ffffffc000220360>] load_elf_binary+0x2a8/0x1090
[<ffffffc0001ccff4>] search_binary_handler+0x9c/0x240
[<ffffffc00021c584>] load_script+0x20c/0x228
[<ffffffc0001ccff4>] search_binary_handler+0x9c/0x240
[<ffffffc0001ce8e0>] do_execveat_common.isra.14+0x4f8/0x6e8
[<ffffffc0001cedd0>] compat_SyS_execve+0x38/0x48
[<ffffffc00008de30>] el0_svc_naked+0x24/0x28

To fix this, make de_thread() freezable. It looks safe and works fine.

Suggested-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Chanho Min <chanho.min@lge.com>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 fs/exec.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index fc281b738a982..acc3a5536384c 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -62,6 +62,7 @@
 #include <linux/oom.h>
 #include <linux/compat.h>
 #include <linux/vmalloc.h>
+#include <linux/freezer.h>
 
 #include <linux/uaccess.h>
 #include <asm/mmu_context.h>
@@ -1083,7 +1084,7 @@ static int de_thread(struct task_struct *tsk)
 	while (sig->notify_count) {
 		__set_current_state(TASK_KILLABLE);
 		spin_unlock_irq(lock);
-		schedule();
+		freezable_schedule();
 		if (unlikely(__fatal_signal_pending(tsk)))
 			goto killed;
 		spin_lock_irq(lock);
@@ -1111,7 +1112,7 @@ static int de_thread(struct task_struct *tsk)
 			__set_current_state(TASK_KILLABLE);
 			write_unlock_irq(&tasklist_lock);
 			cgroup_threadgroup_change_end(tsk);
-			schedule();
+			freezable_schedule();
 			if (unlikely(__fatal_signal_pending(tsk)))
 				goto killed;
 		}
-- 
GitLab


From cdcefe6bd9df754f528ffc339d3cc143cea4ddf6 Mon Sep 17 00:00:00 2001
From: Rajat Jain <rajatja@google.com>
Date: Mon, 29 Oct 2018 15:17:01 -0700
Subject: [PATCH 1200/1890] mmc: sdhci-pci: Try "cd" for card-detect lookup
 before using NULL

Problem:

The card detect IRQ does not work with modern BIOS (that want
to use _DSD to provide the card detect GPIO to the driver).

Details:

The mmc core provides the mmc_gpiod_request_cd() API to let host drivers
request the gpio descriptor for the "card detect" pin.
This pin is specified in the ACPI for the SDHC device:

 * Either as a resource using _CRS. This is a method used by legacy BIOS.
   (The driver needs to tell which resource index).

 * Or as a named property ("cd-gpios"/"cd-gpio") in _DSD (which internally
   points to an entry in _CRS). This way, the driver can lookup using a
   string. This is what modern BIOS prefer to use.

This API finally results in a call to the following code:

struct gpio_desc *acpi_find_gpio(..., const char *con_id,...)
{
...
   /* Lookup gpio (using "<con_id>-gpio") in the _DSD */
...
   if (!acpi_can_fallback_to_crs(adev, con_id))
          return ERR_PTR(-ENOENT);
...
   /* Falling back to _CRS is allowed, Lookup gpio in the _CRS */
...
}

Note that this means that if the ACPI has _DSD properties, the kernel
will never use _CRS for the lookup (Because acpi_can_fallback_to_crs()
will always be false for any device hat has _DSD entries).

The SDHCI driver is thus currently broken on a modern BIOS, even if
BIOS provides both _CRS (for index based lookup) and _DSD entries (for
string based lookup). Ironically, none of these will be used for the
lookup currently because:

* Since the con_id is NULL, acpi_find_gpio() does not find a matching
  entry in DSDT. (The _DSDT entry has the property name = "cd-gpios")

* Because ACPI contains DSDT entries, thus acpi_can_fallback_to_crs()
  returns false (because device properties have been populated from
  _DSD), thus the _CRS is never used for the lookup.

Fix:

Try "cd" for lookup in the _DSD before falling back to using NULL so
as to try looking up in the _CRS.

I've tested this patch successfully with both Legacy BIOS (that
provide only _CRS method) as well as modern BIOS (that provide both
_CRS and _DSD). Also the use of "cd" appears to be fairly consistent
across other users of this API (other MMC host controller drivers).

Link: https://lkml.org/lkml/2018/9/25/1113
Signed-off-by: Rajat Jain <rajatja@google.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Fixes: f10e4bf6632b ("gpio: acpi: Even more tighten up ACPI GPIO lookups")
Cc: stable@vger.kernel.org
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-pci-core.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c
index 7bfd366d970da..e53333c695b39 100644
--- a/drivers/mmc/host/sdhci-pci-core.c
+++ b/drivers/mmc/host/sdhci-pci-core.c
@@ -1762,8 +1762,13 @@ static struct sdhci_pci_slot *sdhci_pci_probe_slot(
 		device_init_wakeup(&pdev->dev, true);
 
 	if (slot->cd_idx >= 0) {
-		ret = mmc_gpiod_request_cd(host->mmc, NULL, slot->cd_idx,
+		ret = mmc_gpiod_request_cd(host->mmc, "cd", slot->cd_idx,
 					   slot->cd_override_level, 0, NULL);
+		if (ret && ret != -EPROBE_DEFER)
+			ret = mmc_gpiod_request_cd(host->mmc, NULL,
+						   slot->cd_idx,
+						   slot->cd_override_level,
+						   0, NULL);
 		if (ret == -EPROBE_DEFER)
 			goto remove;
 
-- 
GitLab


From cce997292a5264c5342c968bbd226d7c365f03d6 Mon Sep 17 00:00:00 2001
From: Connor McAdams <conmanx360@gmail.com>
Date: Fri, 16 Nov 2018 14:24:55 -0500
Subject: [PATCH 1201/1890] ALSA: hda/ca0132 - Add new ZxR quirk

This patch adds a new PCI subsys ID for the ZxR, as found and tested by
other users. Without a way to know if any Z's use it as well, it keeps
the quirk of QUIRK_SBZ and goes through the HDA subsys test function.

Signed-off-by: Connor McAdams <conmanx360@gmail.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_ca0132.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c
index bdbbc51e8d187..f2e6207192855 100644
--- a/sound/pci/hda/patch_ca0132.c
+++ b/sound/pci/hda/patch_ca0132.c
@@ -1177,6 +1177,7 @@ static const struct snd_pci_quirk ca0132_quirks[] = {
 	SND_PCI_QUIRK(0x1028, 0x0708, "Alienware 15 R2 2016", QUIRK_ALIENWARE),
 	SND_PCI_QUIRK(0x1102, 0x0010, "Sound Blaster Z", QUIRK_SBZ),
 	SND_PCI_QUIRK(0x1102, 0x0023, "Sound Blaster Z", QUIRK_SBZ),
+	SND_PCI_QUIRK(0x1102, 0x0033, "Sound Blaster ZxR", QUIRK_SBZ),
 	SND_PCI_QUIRK(0x1458, 0xA016, "Recon3Di", QUIRK_R3DI),
 	SND_PCI_QUIRK(0x1458, 0xA026, "Gigabyte G1.Sniper Z97", QUIRK_R3DI),
 	SND_PCI_QUIRK(0x1458, 0xA036, "Gigabyte GA-Z170X-Gaming 7", QUIRK_R3DI),
-- 
GitLab


From a6b0961b39896a9f9f1350d26d202f078a7d9dbc Mon Sep 17 00:00:00 2001
From: Connor McAdams <conmanx360@gmail.com>
Date: Fri, 16 Nov 2018 14:24:56 -0500
Subject: [PATCH 1202/1890] ALSA: hda/ca0132 - fix AE-5 pincfg

This patch fixes the pincfg assignment for the AE-5, which was
previously using the Recon3D pincfg's by mistake.

Fixes: d06feaf02fe6 ("ALSA: hda/ca0132 - Add pincfg for AE-5")
Signed-off-by: Connor McAdams <conmanx360@gmail.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_ca0132.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c
index f2e6207192855..0a567634e5fad 100644
--- a/sound/pci/hda/patch_ca0132.c
+++ b/sound/pci/hda/patch_ca0132.c
@@ -8489,7 +8489,7 @@ static void ca0132_config(struct hda_codec *codec)
 		break;
 	case QUIRK_AE5:
 		codec_dbg(codec, "%s: QUIRK_AE5 applied.\n", __func__);
-		snd_hda_apply_pincfgs(codec, r3di_pincfgs);
+		snd_hda_apply_pincfgs(codec, ae5_pincfgs);
 		break;
 	}
 
-- 
GitLab


From 21556350ade3cb5d7afecc8b3544e56431d21695 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Wed, 14 Nov 2018 19:34:40 +0200
Subject: [PATCH 1203/1890] drm/i915: Disable LP3 watermarks on all SNB
 machines
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I have a Thinkpad X220 Tablet in my hands that is losing vblank
interrupts whenever LP3 watermarks are used.

If I nudge the latency value written to the WM3 register just
by one in either direction the problem disappears. That to me
suggests that the punit will not enter the corrsponding
powersave mode (MPLL shutdown IIRC) unless the latency value
in the register matches exactly what we read from SSKPD. Ie.
it's not really a latency value but rather just a cookie
by which the punit can identify the desired power saving state.
On HSW/BDW this was changed such that we actually just write
the WM level number into those bits, which makes much more
sense given the observed behaviour.

We could try to handle this by disallowing LP3 watermarks
only when vblank interrupts are enabled but we'd first have
to prove that only vblank interrupts are affected, which
seems unlikely. Also we can't grab the wm mutex from the
vblank enable/disable hooks because those are called with
various spinlocks held. Thus we'd have to redesigne the
watermark locking. So to play it safe and keep the code
simple we simply disable LP3 watermarks on all SNB machines.

To do that we simply zero out the latency values for
watermark level 3, and we adjust the watermark computation
to check for that. The behaviour now matches that of the
g4x/vlv/skl wm code in the presence of a zeroed latency
value.

v2: s/USHRT_MAX/U32_MAX/ for consistency with the types (Chris)

Cc: stable@vger.kernel.org
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Acked-by: Chris Wilson <chris@chris-wilson.co.uk>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101269
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103713
Signed-off-by: Ville SyrjÃ¤lÃ¤ <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181114173440.6730-1-ville.syrjala@linux.intel.com
(cherry picked from commit 03981c6ebec4fc7056b9b45f847393aeac90d060)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_pm.c | 41 ++++++++++++++++++++++++++++++++-
 1 file changed, 40 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 245f0022bcfd0..3fe358db12768 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -2493,6 +2493,9 @@ static uint32_t ilk_compute_pri_wm(const struct intel_crtc_state *cstate,
 	uint32_t method1, method2;
 	int cpp;
 
+	if (mem_value == 0)
+		return U32_MAX;
+
 	if (!intel_wm_plane_visible(cstate, pstate))
 		return 0;
 
@@ -2522,6 +2525,9 @@ static uint32_t ilk_compute_spr_wm(const struct intel_crtc_state *cstate,
 	uint32_t method1, method2;
 	int cpp;
 
+	if (mem_value == 0)
+		return U32_MAX;
+
 	if (!intel_wm_plane_visible(cstate, pstate))
 		return 0;
 
@@ -2545,6 +2551,9 @@ static uint32_t ilk_compute_cur_wm(const struct intel_crtc_state *cstate,
 {
 	int cpp;
 
+	if (mem_value == 0)
+		return U32_MAX;
+
 	if (!intel_wm_plane_visible(cstate, pstate))
 		return 0;
 
@@ -3008,6 +3017,34 @@ static void snb_wm_latency_quirk(struct drm_i915_private *dev_priv)
 	intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency);
 }
 
+static void snb_wm_lp3_irq_quirk(struct drm_i915_private *dev_priv)
+{
+	/*
+	 * On some SNB machines (Thinkpad X220 Tablet at least)
+	 * LP3 usage can cause vblank interrupts to be lost.
+	 * The DEIIR bit will go high but it looks like the CPU
+	 * never gets interrupted.
+	 *
+	 * It's not clear whether other interrupt source could
+	 * be affected or if this is somehow limited to vblank
+	 * interrupts only. To play it safe we disable LP3
+	 * watermarks entirely.
+	 */
+	if (dev_priv->wm.pri_latency[3] == 0 &&
+	    dev_priv->wm.spr_latency[3] == 0 &&
+	    dev_priv->wm.cur_latency[3] == 0)
+		return;
+
+	dev_priv->wm.pri_latency[3] = 0;
+	dev_priv->wm.spr_latency[3] = 0;
+	dev_priv->wm.cur_latency[3] = 0;
+
+	DRM_DEBUG_KMS("LP3 watermarks disabled due to potential for lost interrupts\n");
+	intel_print_wm_latency(dev_priv, "Primary", dev_priv->wm.pri_latency);
+	intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency);
+	intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency);
+}
+
 static void ilk_setup_wm_latency(struct drm_i915_private *dev_priv)
 {
 	intel_read_wm_latency(dev_priv, dev_priv->wm.pri_latency);
@@ -3024,8 +3061,10 @@ static void ilk_setup_wm_latency(struct drm_i915_private *dev_priv)
 	intel_print_wm_latency(dev_priv, "Sprite", dev_priv->wm.spr_latency);
 	intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency);
 
-	if (IS_GEN6(dev_priv))
+	if (IS_GEN6(dev_priv)) {
 		snb_wm_latency_quirk(dev_priv);
+		snb_wm_lp3_irq_quirk(dev_priv);
+	}
 }
 
 static void skl_setup_wm_latency(struct drm_i915_private *dev_priv)
-- 
GitLab


From 5bb9b84b0796336b18c8b8651628009eace70202 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20P=C3=A9ron?= <peron.clem@gmail.com>
Date: Sat, 17 Nov 2018 13:14:52 +0100
Subject: [PATCH 1204/1890] MAINTAINERS: add ASoC maintainers for sound
 dt-bindings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Sound dt-bindings are applied by ASoC maintainers and should be
submit to ASoC list in addition to the devicetree list.

Hence, add this information into the MAINTAINERS file.

Signed-off-by: ClÃ©ment PÃ©ron <peron.clem@gmail.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index d3814c46d60a6..79640cffe658b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13592,6 +13592,7 @@ S:	Supported
 F:	Documentation/devicetree/bindings/sound/
 F:	Documentation/sound/soc/
 F:	sound/soc/
+F:	include/dt-bindings/sound/
 F:	include/sound/soc*
 
 SOUNDWIRE SUBSYSTEM
-- 
GitLab


From 5305ec6a27b2dc7398a689e661a4a2e951026f09 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Mon, 19 Nov 2018 14:53:07 +0200
Subject: [PATCH 1205/1890] mmc: sdhci-pci: Workaround GLK firmware failing to
 restore the tuning value

GLK firmware can indicate that the tuning value will be restored after
runtime suspend, but not actually do that. Add a workaround that detects
such cases, and lets the driver do re-tuning instead.

Reported-by: Anisse Astier <anisse@astier.eu>
Tested-by: Anisse Astier <anisse@astier.eu>
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: stable@vger.kernel.org
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-pci-core.c | 79 ++++++++++++++++++++++++++++++-
 1 file changed, 77 insertions(+), 2 deletions(-)

diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c
index e53333c695b39..c4115bae5db18 100644
--- a/drivers/mmc/host/sdhci-pci-core.c
+++ b/drivers/mmc/host/sdhci-pci-core.c
@@ -12,6 +12,7 @@
  *     - JMicron (hardware and technical support)
  */
 
+#include <linux/bitfield.h>
 #include <linux/string.h>
 #include <linux/delay.h>
 #include <linux/highmem.h>
@@ -462,6 +463,9 @@ struct intel_host {
 	u32	dsm_fns;
 	int	drv_strength;
 	bool	d3_retune;
+	bool	rpm_retune_ok;
+	u32	glk_rx_ctrl1;
+	u32	glk_tun_val;
 };
 
 static const guid_t intel_dsm_guid =
@@ -791,6 +795,77 @@ static int glk_emmc_add_host(struct sdhci_pci_slot *slot)
 	return ret;
 }
 
+#ifdef CONFIG_PM
+#define GLK_RX_CTRL1	0x834
+#define GLK_TUN_VAL	0x840
+#define GLK_PATH_PLL	GENMASK(13, 8)
+#define GLK_DLY		GENMASK(6, 0)
+/* Workaround firmware failing to restore the tuning value */
+static void glk_rpm_retune_wa(struct sdhci_pci_chip *chip, bool susp)
+{
+	struct sdhci_pci_slot *slot = chip->slots[0];
+	struct intel_host *intel_host = sdhci_pci_priv(slot);
+	struct sdhci_host *host = slot->host;
+	u32 glk_rx_ctrl1;
+	u32 glk_tun_val;
+	u32 dly;
+
+	if (intel_host->rpm_retune_ok || !mmc_can_retune(host->mmc))
+		return;
+
+	glk_rx_ctrl1 = sdhci_readl(host, GLK_RX_CTRL1);
+	glk_tun_val = sdhci_readl(host, GLK_TUN_VAL);
+
+	if (susp) {
+		intel_host->glk_rx_ctrl1 = glk_rx_ctrl1;
+		intel_host->glk_tun_val = glk_tun_val;
+		return;
+	}
+
+	if (!intel_host->glk_tun_val)
+		return;
+
+	if (glk_rx_ctrl1 != intel_host->glk_rx_ctrl1) {
+		intel_host->rpm_retune_ok = true;
+		return;
+	}
+
+	dly = FIELD_PREP(GLK_DLY, FIELD_GET(GLK_PATH_PLL, glk_rx_ctrl1) +
+				  (intel_host->glk_tun_val << 1));
+	if (dly == FIELD_GET(GLK_DLY, glk_rx_ctrl1))
+		return;
+
+	glk_rx_ctrl1 = (glk_rx_ctrl1 & ~GLK_DLY) | dly;
+	sdhci_writel(host, glk_rx_ctrl1, GLK_RX_CTRL1);
+
+	intel_host->rpm_retune_ok = true;
+	chip->rpm_retune = true;
+	mmc_retune_needed(host->mmc);
+	pr_info("%s: Requiring re-tune after rpm resume", mmc_hostname(host->mmc));
+}
+
+static void glk_rpm_retune_chk(struct sdhci_pci_chip *chip, bool susp)
+{
+	if (chip->pdev->device == PCI_DEVICE_ID_INTEL_GLK_EMMC &&
+	    !chip->rpm_retune)
+		glk_rpm_retune_wa(chip, susp);
+}
+
+static int glk_runtime_suspend(struct sdhci_pci_chip *chip)
+{
+	glk_rpm_retune_chk(chip, true);
+
+	return sdhci_cqhci_runtime_suspend(chip);
+}
+
+static int glk_runtime_resume(struct sdhci_pci_chip *chip)
+{
+	glk_rpm_retune_chk(chip, false);
+
+	return sdhci_cqhci_runtime_resume(chip);
+}
+#endif
+
 #ifdef CONFIG_ACPI
 static int ni_set_max_freq(struct sdhci_pci_slot *slot)
 {
@@ -879,8 +954,8 @@ static const struct sdhci_pci_fixes sdhci_intel_glk_emmc = {
 	.resume			= sdhci_cqhci_resume,
 #endif
 #ifdef CONFIG_PM
-	.runtime_suspend	= sdhci_cqhci_runtime_suspend,
-	.runtime_resume		= sdhci_cqhci_runtime_resume,
+	.runtime_suspend	= glk_runtime_suspend,
+	.runtime_resume		= glk_runtime_resume,
 #endif
 	.quirks			= SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC,
 	.quirks2		= SDHCI_QUIRK2_PRESET_VALUE_BROKEN |
-- 
GitLab


From 512cab3e7e0be11234f965d9898936dce2325382 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <festevam@gmail.com>
Date: Sat, 10 Nov 2018 15:53:59 -0200
Subject: [PATCH 1206/1890] ARM: dts: imx51-zii-rdu1: Remove EEPROM node

The EEPROM under I2C2 was put by mistake in the dts.

Remove it as it is not really present on the real hardware.

Fixes: ceef0396f367 ("ARM: dts: imx: add ZII RDU1 board")
Reported-by: Chris Healy <cphealy@gmail.com>
Signed-off-by: Fabio Estevam <festevam@gmail.com>
Reviewed-by: Chris Healy <cphealy@gmail.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm/boot/dts/imx51-zii-rdu1.dts | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/arch/arm/boot/dts/imx51-zii-rdu1.dts b/arch/arm/boot/dts/imx51-zii-rdu1.dts
index e45a15ceb94bc..69d753cac89ae 100644
--- a/arch/arm/boot/dts/imx51-zii-rdu1.dts
+++ b/arch/arm/boot/dts/imx51-zii-rdu1.dts
@@ -492,12 +492,6 @@ &i2c2 {
 	pinctrl-0 = <&pinctrl_i2c2>;
 	status = "okay";
 
-	eeprom@50 {
-		compatible = "atmel,24c04";
-		pagesize = <16>;
-		reg = <0x50>;
-	};
-
 	hpa1: amp@60 {
 		compatible = "ti,tpa6130a2";
 		reg = <0x60>;
-- 
GitLab


From 25bbe21bf427a81b8e3ccd480ea0e1d940256156 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Fri, 16 Nov 2018 15:50:02 -0500
Subject: [PATCH 1207/1890] dax: Avoid losing wakeup in dax_lock_mapping_entry

After calling get_unlocked_entry(), you have to call
put_unlocked_entry() to avoid subsequent waiters losing wakeups.

Fixes: c2a7d2a11552 ("filesystem-dax: Introduce dax_lock_mapping_entry()")
Cc: stable@vger.kernel.org
Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 fs/dax.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/dax.c b/fs/dax.c
index cf2394e2bf4bf..9bcce89ea18ef 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -391,6 +391,7 @@ bool dax_lock_mapping_entry(struct page *page)
 			rcu_read_unlock();
 			entry = get_unlocked_entry(&xas);
 			xas_unlock_irq(&xas);
+			put_unlocked_entry(&xas, entry);
 			rcu_read_lock();
 			continue;
 		}
-- 
GitLab


From fffc9a260e38acec3187515738122a3ecb24ac90 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Mon, 19 Nov 2018 09:36:29 -0500
Subject: [PATCH 1208/1890] XArray tests: Add missing locking

Lockdep caught me being sloppy in the test suite and failing to lock
the XArray appropriately.

Reported-by: kernel test robot <rong.a.chen@intel.com>
Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 lib/test_xarray.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index 5f9c14e975a40..0598e86af8fc3 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -208,15 +208,19 @@ static noinline void check_xa_mark_1(struct xarray *xa, unsigned long index)
 			XA_BUG_ON(xa, xa_get_mark(xa, i, XA_MARK_2));
 
 			/* We should see two elements in the array */
+			rcu_read_lock();
 			xas_for_each(&xas, entry, ULONG_MAX)
 				seen++;
+			rcu_read_unlock();
 			XA_BUG_ON(xa, seen != 2);
 
 			/* One of which is marked */
 			xas_set(&xas, 0);
 			seen = 0;
+			rcu_read_lock();
 			xas_for_each_marked(&xas, entry, ULONG_MAX, XA_MARK_0)
 				seen++;
+			rcu_read_unlock();
 			XA_BUG_ON(xa, seen != 1);
 		}
 		XA_BUG_ON(xa, xa_get_mark(xa, next, XA_MARK_0));
@@ -442,7 +446,9 @@ static noinline void check_multi_store_1(struct xarray *xa, unsigned long index,
 	XA_BUG_ON(xa, xa_load(xa, max) != NULL);
 	XA_BUG_ON(xa, xa_load(xa, min - 1) != NULL);
 
+	xas_lock(&xas);
 	XA_BUG_ON(xa, xas_store(&xas, xa_mk_value(min)) != xa_mk_value(index));
+	xas_unlock(&xas);
 	XA_BUG_ON(xa, xa_load(xa, min) != xa_mk_value(min));
 	XA_BUG_ON(xa, xa_load(xa, max - 1) != xa_mk_value(min));
 	XA_BUG_ON(xa, xa_load(xa, max) != NULL);
@@ -458,9 +464,11 @@ static noinline void check_multi_store_2(struct xarray *xa, unsigned long index,
 	XA_STATE(xas, xa, index);
 	xa_store_order(xa, index, order, xa_mk_value(0), GFP_KERNEL);
 
+	xas_lock(&xas);
 	XA_BUG_ON(xa, xas_store(&xas, xa_mk_value(1)) != xa_mk_value(0));
 	XA_BUG_ON(xa, xas.xa_index != index);
 	XA_BUG_ON(xa, xas_store(&xas, NULL) != xa_mk_value(1));
+	xas_unlock(&xas);
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 #endif
@@ -1180,10 +1188,12 @@ static noinline void check_account(struct xarray *xa)
 		XA_STATE(xas, xa, 1 << order);
 
 		xa_store_order(xa, 0, order, xa, GFP_KERNEL);
+		rcu_read_lock();
 		xas_load(&xas);
 		XA_BUG_ON(xa, xas.xa_node->count == 0);
 		XA_BUG_ON(xa, xas.xa_node->count > (1 << order));
 		XA_BUG_ON(xa, xas.xa_node->nr_values != 0);
+		rcu_read_unlock();
 
 		xa_store_order(xa, 1 << order, order, xa_mk_value(1 << order),
 				GFP_KERNEL);
-- 
GitLab


From 7e241f647dc7087a0401418a187f3f5b527cc690 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 8 Nov 2018 15:55:37 +0100
Subject: [PATCH 1209/1890] libceph: fall back to sendmsg for slab pages

skb_can_coalesce() allows coalescing neighboring slab objects into
a single frag:

  return page == skb_frag_page(frag) &&
         off == frag->page_offset + skb_frag_size(frag);

ceph_tcp_sendpage() can be handed slab pages.  One example of this is
XFS: it passes down sector sized slab objects for its metadata I/O.  If
the kernel client is co-located on the OSD node, the skb may go through
loopback and pop on the receive side with the exact same set of frags.
When tcp_recvmsg() attempts to copy out such a frag, hardened usercopy
complains because the size exceeds the object's allocated size:

  usercopy: kernel memory exposure attempt detected from ffff9ba917f20a00 (kmalloc-512) (1024 bytes)

Although skb_can_coalesce() could be taught to return false if the
resulting frag would cross a slab object boundary, we already have
a fallback for non-refcounted pages.  Utilize it for slab pages too.

Cc: stable@vger.kernel.org # 4.8+
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 net/ceph/messenger.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 57fcc6b4bf6e1..2f126eff275d5 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -580,9 +580,15 @@ static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
 	struct bio_vec bvec;
 	int ret;
 
-	/* sendpage cannot properly handle pages with page_count == 0,
-	 * we need to fallback to sendmsg if that's the case */
-	if (page_count(page) >= 1)
+	/*
+	 * sendpage cannot properly handle pages with page_count == 0,
+	 * we need to fall back to sendmsg if that's the case.
+	 *
+	 * Same goes for slab pages: skb_can_coalesce() allows
+	 * coalescing neighboring slab objects into a single frag which
+	 * triggers one of hardened usercopy checks.
+	 */
+	if (page_count(page) >= 1 && !PageSlab(page))
 		return __ceph_tcp_sendpage(sock, page, offset, size, more);
 
 	bvec.bv_page = page;
-- 
GitLab


From a36b5444b1ec79ed5fedd12295626c05b4f788eb Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Mon, 19 Nov 2018 08:01:03 +0100
Subject: [PATCH 1210/1890] MAINTAINERS: Add myself as third phylib maintainer

Add myself as third phylib maintainer.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Acked-by: Andrew Lunn <andrew@lunn.ch>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 77b11742785de..d258515ac1171 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5534,6 +5534,7 @@ F:	net/bridge/
 ETHERNET PHY LIBRARY
 M:	Andrew Lunn <andrew@lunn.ch>
 M:	Florian Fainelli <f.fainelli@gmail.com>
+M:	Heiner Kallweit <hkallweit1@gmail.com>
 L:	netdev@vger.kernel.org
 S:	Maintained
 F:	Documentation/ABI/testing/sysfs-bus-mdio
-- 
GitLab


From 8feb8efef97a134933620071e0b6384cb3238b4e Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 19 Nov 2018 16:56:22 -0300
Subject: [PATCH 1211/1890] tools build feature: Check if
 get_current_dir_name() is available

As the namespace support code will use this, which is not available in
some non _GNU_SOURCE libraries such as Android's bionic used in my
container build tests (r12b and r15c at the moment).

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-x56ypm940pwclwu45d7jfj47@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/build/Makefile.feature                   |  1 +
 tools/build/feature/Makefile                   |  4 ++++
 tools/build/feature/test-all.c                 |  5 +++++
 .../build/feature/test-get_current_dir_name.c  | 10 ++++++++++
 tools/perf/Makefile.config                     |  5 +++++
 tools/perf/util/Build                          |  1 +
 tools/perf/util/get_current_dir_name.c         | 18 ++++++++++++++++++
 tools/perf/util/util.h                         |  4 ++++
 8 files changed, 48 insertions(+)
 create mode 100644 tools/build/feature/test-get_current_dir_name.c
 create mode 100644 tools/perf/util/get_current_dir_name.c

diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index f216b2f5c3d7b..d74bb9414d7c6 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -33,6 +33,7 @@ FEATURE_TESTS_BASIC :=                  \
         dwarf_getlocations              \
         fortify-source                  \
         sync-compare-and-swap           \
+        get_current_dir_name            \
         glibc                           \
         gtk2                            \
         gtk2-infobar                    \
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index 0516259be70f0..304b984f11b99 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -7,6 +7,7 @@ FILES=                                          \
          test-dwarf_getlocations.bin            \
          test-fortify-source.bin                \
          test-sync-compare-and-swap.bin         \
+         test-get_current_dir_name.bin          \
          test-glibc.bin                         \
          test-gtk2.bin                          \
          test-gtk2-infobar.bin                  \
@@ -101,6 +102,9 @@ $(OUTPUT)test-bionic.bin:
 $(OUTPUT)test-libelf.bin:
 	$(BUILD) -lelf
 
+$(OUTPUT)test-get_current_dir_name.bin:
+	$(BUILD)
+
 $(OUTPUT)test-glibc.bin:
 	$(BUILD)
 
diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c
index 8dc20a61341f6..56722bfe6bdd3 100644
--- a/tools/build/feature/test-all.c
+++ b/tools/build/feature/test-all.c
@@ -34,6 +34,10 @@
 # include "test-libelf-mmap.c"
 #undef main
 
+#define main main_test_get_current_dir_name
+# include "test-get_current_dir_name.c"
+#undef main
+
 #define main main_test_glibc
 # include "test-glibc.c"
 #undef main
@@ -174,6 +178,7 @@ int main(int argc, char *argv[])
 	main_test_hello();
 	main_test_libelf();
 	main_test_libelf_mmap();
+	main_test_get_current_dir_name();
 	main_test_glibc();
 	main_test_dwarf();
 	main_test_dwarf_getlocations();
diff --git a/tools/build/feature/test-get_current_dir_name.c b/tools/build/feature/test-get_current_dir_name.c
new file mode 100644
index 0000000000000..573000f932124
--- /dev/null
+++ b/tools/build/feature/test-get_current_dir_name.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <stdlib.h>
+
+int main(void)
+{
+	free(get_current_dir_name());
+	return 0;
+}
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index e30d20fb482d0..a0e8c23f91255 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -299,6 +299,11 @@ ifndef NO_BIONIC
   endif
 endif
 
+ifeq ($(feature-get_current_dir_name), 1)
+  CFLAGS += -DHAVE_GET_CURRENT_DIR_NAME
+endif
+
+
 ifdef NO_LIBELF
   NO_DWARF := 1
   NO_DEMANGLE := 1
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index ecd9f9ceda77c..b7bf201fe8a87 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -10,6 +10,7 @@ libperf-y += evlist.o
 libperf-y += evsel.o
 libperf-y += evsel_fprintf.o
 libperf-y += find_bit.o
+libperf-y += get_current_dir_name.o
 libperf-y += kallsyms.o
 libperf-y += levenshtein.o
 libperf-y += llvm-utils.o
diff --git a/tools/perf/util/get_current_dir_name.c b/tools/perf/util/get_current_dir_name.c
new file mode 100644
index 0000000000000..267aa609a5823
--- /dev/null
+++ b/tools/perf/util/get_current_dir_name.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2018, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+//
+#ifndef HAVE_GET_CURRENT_DIR_NAME
+#include "util.h"
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdlib.h>
+
+/* Android's 'bionic' library, for one, doesn't have this */
+
+char *get_current_dir_name(void)
+{
+	char pwd[PATH_MAX];
+
+	return getcwd(pwd, sizeof(pwd)) == NULL ? NULL : strdup(pwd);
+}
+#endif // HAVE_GET_CURRENT_DIR_NAME
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 14508ee7707a4..ece040b799f6e 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -59,6 +59,10 @@ int fetch_kernel_version(unsigned int *puint,
 
 const char *perf_tip(const char *dirpath);
 
+#ifndef HAVE_GET_CURRENT_DIR_NAME
+char *get_current_dir_name(void);
+#endif
+
 #ifndef HAVE_SCHED_GETCPU_SUPPORT
 int sched_getcpu(void);
 #endif
-- 
GitLab


From b01c1f69c8660eaeab7d365cd570103c5c073a02 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 1 Nov 2018 18:00:01 +0100
Subject: [PATCH 1212/1890] perf tools: Restore proper cwd on return from mnt
 namespace

When reporting on 'record' server we try to retrieve/use the mnt
namespace of the profiled tasks. We use following API with cookie to
hold the return namespace, roughly:

  nsinfo__mountns_enter(struct nsinfo *nsi, struct nscookie *nc)
    setns(newns, 0);
  ...
  new ns related open..
  ...
  nsinfo__mountns_exit(struct nscookie *nc)
    setns(nc->oldns)

Once finished we setns to old namespace, which also sets the current
working directory (cwd) to "/", trashing the cwd we had.

This is mostly fine, because we use absolute paths almost everywhere,
but it screws up 'perf diff':

  # perf diff
  failed to open perf.data: No such file or directory  (try 'perf record' first)
  ...

Adding the current working directory to be part of the cookie and
restoring it in the nsinfo__mountns_exit call.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Krister Johansen <kjlx@templeofstupid.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Fixes: 843ff37bb59e ("perf symbols: Find symbols in different mount namespace")
Link: http://lkml.kernel.org/r/20181101170001.30019-1-jolsa@kernel.org
[ No need to check for NULL args for free(), use zfree() for struct members ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/namespaces.c | 17 +++++++++++++++--
 tools/perf/util/namespaces.h |  1 +
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c
index cf8bd123cf73f..aed170bd4384e 100644
--- a/tools/perf/util/namespaces.c
+++ b/tools/perf/util/namespaces.c
@@ -18,6 +18,7 @@
 #include <stdio.h>
 #include <string.h>
 #include <unistd.h>
+#include <asm/bug.h>
 
 struct namespaces *namespaces__new(struct namespaces_event *event)
 {
@@ -186,6 +187,7 @@ void nsinfo__mountns_enter(struct nsinfo *nsi,
 	char curpath[PATH_MAX];
 	int oldns = -1;
 	int newns = -1;
+	char *oldcwd = NULL;
 
 	if (nc == NULL)
 		return;
@@ -199,9 +201,13 @@ void nsinfo__mountns_enter(struct nsinfo *nsi,
 	if (snprintf(curpath, PATH_MAX, "/proc/self/ns/mnt") >= PATH_MAX)
 		return;
 
+	oldcwd = get_current_dir_name();
+	if (!oldcwd)
+		return;
+
 	oldns = open(curpath, O_RDONLY);
 	if (oldns < 0)
-		return;
+		goto errout;
 
 	newns = open(nsi->mntns_path, O_RDONLY);
 	if (newns < 0)
@@ -210,11 +216,13 @@ void nsinfo__mountns_enter(struct nsinfo *nsi,
 	if (setns(newns, CLONE_NEWNS) < 0)
 		goto errout;
 
+	nc->oldcwd = oldcwd;
 	nc->oldns = oldns;
 	nc->newns = newns;
 	return;
 
 errout:
+	free(oldcwd);
 	if (oldns > -1)
 		close(oldns);
 	if (newns > -1)
@@ -223,11 +231,16 @@ void nsinfo__mountns_enter(struct nsinfo *nsi,
 
 void nsinfo__mountns_exit(struct nscookie *nc)
 {
-	if (nc == NULL || nc->oldns == -1 || nc->newns == -1)
+	if (nc == NULL || nc->oldns == -1 || nc->newns == -1 || !nc->oldcwd)
 		return;
 
 	setns(nc->oldns, CLONE_NEWNS);
 
+	if (nc->oldcwd) {
+		WARN_ON_ONCE(chdir(nc->oldcwd));
+		zfree(&nc->oldcwd);
+	}
+
 	if (nc->oldns > -1) {
 		close(nc->oldns);
 		nc->oldns = -1;
diff --git a/tools/perf/util/namespaces.h b/tools/perf/util/namespaces.h
index cae1a9a397222..d5f46c09ea319 100644
--- a/tools/perf/util/namespaces.h
+++ b/tools/perf/util/namespaces.h
@@ -38,6 +38,7 @@ struct nsinfo {
 struct nscookie {
 	int			oldns;
 	int			newns;
+	char			*oldcwd;
 };
 
 int nsinfo__init(struct nsinfo *nsi);
-- 
GitLab


From 53f00f4548ef700e3c6867a35fd7d4f824cd165a Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 19 Nov 2018 12:17:42 -0800
Subject: [PATCH 1213/1890] tools headers uapi: Synchronize i915_drm.h

To pick up the changes in:

  900ccf30f9e1 ("drm/i915: Only force GGTT coherency w/a on required chipsets")

No changes are required in tools/ nor does anything gets automatically
generated to be used in the 'perf trace' syscall arg beautifiers.

This silences this perf build warning:

  Warning: Kernel ABI header at 'tools/include/uapi/drm/i915_drm.h' differs from latest version at 'include/uapi/drm/i915_drm.h'
  diff -u tools/include/uapi/drm/i915_drm.h include/uapi/drm/i915_drm.h

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-t2vor2wegv41gt5n49095kly@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/drm/i915_drm.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index 7f5634ce8e885..a4446f452040a 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -529,6 +529,28 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_CS_TIMESTAMP_FREQUENCY 51
 
+/*
+ * Once upon a time we supposed that writes through the GGTT would be
+ * immediately in physical memory (once flushed out of the CPU path). However,
+ * on a few different processors and chipsets, this is not necessarily the case
+ * as the writes appear to be buffered internally. Thus a read of the backing
+ * storage (physical memory) via a different path (with different physical tags
+ * to the indirect write via the GGTT) will see stale values from before
+ * the GGTT write. Inside the kernel, we can for the most part keep track of
+ * the different read/write domains in use (e.g. set-domain), but the assumption
+ * of coherency is baked into the ABI, hence reporting its true state in this
+ * parameter.
+ *
+ * Reports true when writes via mmap_gtt are immediately visible following an
+ * lfence to flush the WCB.
+ *
+ * Reports false when writes via mmap_gtt are indeterminately delayed in an in
+ * internal buffer and are _not_ immediately visible to third parties accessing
+ * directly via mmap_cpu/mmap_wc. Use of mmap_gtt as part of an IPC
+ * communications channel when reporting false is strongly disadvised.
+ */
+#define I915_PARAM_MMAP_GTT_COHERENT	52
+
 typedef struct drm_i915_getparam {
 	__s32 param;
 	/*
-- 
GitLab


From 65e259d5c4ae425f7c80e40b838eba1b9e8608fd Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 19 Nov 2018 12:27:07 -0800
Subject: [PATCH 1214/1890] tools arch x86: Update tools's copy of
 cpufeatures.h

To get the changes in the following csets:

  ace6485a0326 ("x86/cpufeatures: Enumerate MOVDIR64B instruction")
  33823f4d63f7 ("x86/cpufeatures: Enumerate MOVDIRI instruction")

No tools were affected, copy it to silence this perf tool build warning:

  Warning: Kernel ABI header at 'tools/arch/x86/include/asm/cpufeatures.h' differs from latest version at 'arch/x86/include/asm/cpufeatures.h'
  diff -u tools/arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/cpufeatures.h

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Link: https://lkml.kernel.org/n/tip-83kcyqa1qkxkhm1s7q3hbpel@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/x86/include/asm/cpufeatures.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 89a048c2faec7..28c4a502b4197 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -331,6 +331,8 @@
 #define X86_FEATURE_LA57		(16*32+16) /* 5-level page tables */
 #define X86_FEATURE_RDPID		(16*32+22) /* RDPID instruction */
 #define X86_FEATURE_CLDEMOTE		(16*32+25) /* CLDEMOTE instruction */
+#define X86_FEATURE_MOVDIRI		(16*32+27) /* MOVDIRI instruction */
+#define X86_FEATURE_MOVDIR64B		(16*32+28) /* MOVDIR64B instruction */
 
 /* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */
 #define X86_FEATURE_OVERFLOW_RECOV	(17*32+ 0) /* MCA overflow recovery support */
-- 
GitLab


From 83d9bdeaedd8f7aa9749828364f0018cacd1dad3 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 19 Nov 2018 12:31:45 -0800
Subject: [PATCH 1215/1890] tools uapi asm-generic: Synchronize ioctls.h

To pick up the changes in:

  ad8c0eaa0a41 ("tty/serial_core: add ISO7816 infrastructure")

That is a change that imply a change to be made in tools/perf/trace/beauty/ioctl.c to
make 'perf trace' ioctl syscall argument beautifier to support these new
commands:  TIOCGISO7816 and TIOCSISO7816.

This is not yet done automatically by a script like is done for some
other headers, for instance:

  $ tools/perf/trace/beauty/drm_ioctl.sh | head
  #ifndef DRM_COMMAND_BASE
  #define DRM_COMMAND_BASE                0x40
  #endif
  static const char *drm_ioctl_cmds[] = {
	[0x00] = "VERSION",
	[0x01] = "GET_UNIQUE",
	[0x02] = "GET_MAGIC",
	[0x03] = "IRQ_BUSID",
	[0x04] = "GET_MAP",
	[0x05] = "GET_CLIENT",
  $

So we will need to change tools/perf/trace/beauty/ioctl.c in a follow up
patch until we switch to a generator script.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Nicolas Ferre <nicolas.ferre@microchip.com>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-zin76fe6iykqsilvo6u47f9o@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/uapi/asm-generic/ioctls.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/include/uapi/asm-generic/ioctls.h b/tools/include/uapi/asm-generic/ioctls.h
index 0406517356629..cdc9f4ca8c275 100644
--- a/tools/include/uapi/asm-generic/ioctls.h
+++ b/tools/include/uapi/asm-generic/ioctls.h
@@ -79,6 +79,8 @@
 #define TIOCGPTLCK	_IOR('T', 0x39, int) /* Get Pty lock state */
 #define TIOCGEXCL	_IOR('T', 0x40, int) /* Get exclusive mode state */
 #define TIOCGPTPEER	_IO('T', 0x41) /* Safely open the slave */
+#define TIOCGISO7816	_IOR('T', 0x42, struct serial_iso7816)
+#define TIOCSISO7816	_IOWR('T', 0x43, struct serial_iso7816)
 
 #define FIONCLEX	0x5450
 #define FIOCLEX		0x5451
-- 
GitLab


From a4243e1494532ab8fa679a4134153149a71fa331 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 19 Nov 2018 12:38:50 -0800
Subject: [PATCH 1216/1890] perf tools beauty ioctl: Support new ISO7816
 commands

Introduced in:

  ad8c0eaa0a41 ("tty/serial_core: add ISO7816 infrastructure")

Now 'perf trace' will be able to pretty-print the 'cmd' ioctl arg when
used in capable systems with software emitting those commands.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Nicolas Ferre <nicolas.ferre@microchip.com>
Cc: Wang Nan <wangnan0@huawei.com>
Link: https://lkml.kernel.org/n/tip-7bds48dhckfnleie08mit314@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/trace/beauty/ioctl.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/perf/trace/beauty/ioctl.c b/tools/perf/trace/beauty/ioctl.c
index 5d2a7fd8d4077..eae59ad15ce3f 100644
--- a/tools/perf/trace/beauty/ioctl.c
+++ b/tools/perf/trace/beauty/ioctl.c
@@ -31,6 +31,7 @@ static size_t ioctl__scnprintf_tty_cmd(int nr, int dir, char *bf, size_t size)
 	"TCSETSW2", "TCSETSF2", "TIOCGRS48", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
 	"TIOCGDEV", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG", "TIOCVHANGUP", "TIOCGPKT",
 	"TIOCGPTLCK", [_IOC_NR(TIOCGEXCL)] = "TIOCGEXCL", "TIOCGPTPEER",
+	"TIOCGISO7816", "TIOCSISO7816",
 	[_IOC_NR(FIONCLEX)] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
 	"TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
 	"TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
-- 
GitLab


From 02968ccf0125d39b08ecef5946300a8a873c0942 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sun, 18 Nov 2018 15:07:38 +0800
Subject: [PATCH 1217/1890] sctp: count sk_wmem_alloc by skb truesize in
 sctp_packet_transmit

Now sctp increases sk_wmem_alloc by 1 when doing set_owner_w for the
skb allocked in sctp_packet_transmit and decreases by 1 when freeing
this skb.

But when this skb goes through networking stack, some subcomponents
might change skb->truesize and add the same amount on sk_wmem_alloc.
However sctp doesn't know the amount to decrease by, it would cause
a leak on sk->sk_wmem_alloc and the sock can never be freed.

Xiumei found this issue when it hit esp_output_head() by using sctp
over ipsec, where skb->truesize is added and so is sk->sk_wmem_alloc.

Since sctp has used sk_wmem_queued to count for writable space since
Commit cd305c74b0f8 ("sctp: use sk_wmem_queued to check for writable
space"), it's ok to fix it by counting sk_wmem_alloc by skb truesize
in sctp_packet_transmit.

Fixes: cac2661c53f3 ("esp4: Avoid skb_cow_data whenever possible")
Reported-by: Xiumei Mu <xmu@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/output.c | 21 +--------------------
 1 file changed, 1 insertion(+), 20 deletions(-)

diff --git a/net/sctp/output.c b/net/sctp/output.c
index 67939ad99c013..88dfa6ae1fb42 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -396,25 +396,6 @@ enum sctp_xmit sctp_packet_append_chunk(struct sctp_packet *packet,
 	return retval;
 }
 
-static void sctp_packet_release_owner(struct sk_buff *skb)
-{
-	sk_free(skb->sk);
-}
-
-static void sctp_packet_set_owner_w(struct sk_buff *skb, struct sock *sk)
-{
-	skb_orphan(skb);
-	skb->sk = sk;
-	skb->destructor = sctp_packet_release_owner;
-
-	/*
-	 * The data chunks have already been accounted for in sctp_sendmsg(),
-	 * therefore only reserve a single byte to keep socket around until
-	 * the packet has been transmitted.
-	 */
-	refcount_inc(&sk->sk_wmem_alloc);
-}
-
 static void sctp_packet_gso_append(struct sk_buff *head, struct sk_buff *skb)
 {
 	if (SCTP_OUTPUT_CB(head)->last == head)
@@ -601,7 +582,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
 	if (!head)
 		goto out;
 	skb_reserve(head, packet->overhead + MAX_HEADER);
-	sctp_packet_set_owner_w(head, sk);
+	skb_set_owner_w(head, sk);
 
 	/* set sctp header */
 	sh = skb_push(head, sizeof(struct sctphdr));
-- 
GitLab


From cc3ccf26f0649089b3a34a2781977755ea36e72c Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sun, 18 Nov 2018 15:21:53 +0800
Subject: [PATCH 1218/1890] sctp: not allow to set asoc prsctp_enable by
 sockopt

As rfc7496#section4.5 says about SCTP_PR_SUPPORTED:

   This socket option allows the enabling or disabling of the
   negotiation of PR-SCTP support for future associations.  For existing
   associations, it allows one to query whether or not PR-SCTP support
   was negotiated on a particular association.

It means only sctp sock's prsctp_enable can be set.

Note that for the limitation of SCTP_{CURRENT|ALL}_ASSOC, we will
add it when introducing SCTP_{FUTURE|CURRENT|ALL}_ASSOC for linux
sctp in another patchset.

v1->v2:
  - drop the params.assoc_id check as Neil suggested.

Fixes: 28aa4c26fce2 ("sctp: add SCTP_PR_SUPPORTED on sctp sockopt")
Reported-by: Ying Xu <yinxu@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 26 +++++---------------------
 1 file changed, 5 insertions(+), 21 deletions(-)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 739f3e50120dd..bf618d1b41fd2 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3940,32 +3940,16 @@ static int sctp_setsockopt_pr_supported(struct sock *sk,
 					unsigned int optlen)
 {
 	struct sctp_assoc_value params;
-	struct sctp_association *asoc;
-	int retval = -EINVAL;
 
 	if (optlen != sizeof(params))
-		goto out;
-
-	if (copy_from_user(&params, optval, optlen)) {
-		retval = -EFAULT;
-		goto out;
-	}
-
-	asoc = sctp_id2assoc(sk, params.assoc_id);
-	if (asoc) {
-		asoc->prsctp_enable = !!params.assoc_value;
-	} else if (!params.assoc_id) {
-		struct sctp_sock *sp = sctp_sk(sk);
+		return -EINVAL;
 
-		sp->ep->prsctp_enable = !!params.assoc_value;
-	} else {
-		goto out;
-	}
+	if (copy_from_user(&params, optval, optlen))
+		return -EFAULT;
 
-	retval = 0;
+	sctp_sk(sk)->ep->prsctp_enable = !!params.assoc_value;
 
-out:
-	return retval;
+	return 0;
 }
 
 static int sctp_setsockopt_default_prinfo(struct sock *sk,
-- 
GitLab


From 69fec325a64383667b8a35df5d48d6ce52fb2782 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sun, 18 Nov 2018 16:14:47 +0800
Subject: [PATCH 1219/1890] Revert "sctp: remove sctp_transport_pmtu_check"

This reverts commit 22d7be267eaa8114dcc28d66c1c347f667d7878a.

The dst's mtu in transport can be updated by a non sctp place like
in xfrm where the MTU information didn't get synced between asoc,
transport and dst, so it is still needed to do the pmtu check
in sctp_packet_config.

Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sctp.h | 12 ++++++++++++
 net/sctp/output.c       |  3 +++
 2 files changed, 15 insertions(+)

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 8c2caa370e0f6..ab9242e51d9e0 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -608,4 +608,16 @@ static inline __u32 sctp_dst_mtu(const struct dst_entry *dst)
 				 SCTP_DEFAULT_MINSEGMENT));
 }
 
+static inline bool sctp_transport_pmtu_check(struct sctp_transport *t)
+{
+	__u32 pmtu = sctp_dst_mtu(t->dst);
+
+	if (t->pathmtu == pmtu)
+		return true;
+
+	t->pathmtu = pmtu;
+
+	return false;
+}
+
 #endif /* __net_sctp_h__ */
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 88dfa6ae1fb42..b0e74a3e77ec5 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -118,6 +118,9 @@ void sctp_packet_config(struct sctp_packet *packet, __u32 vtag,
 		sctp_transport_route(tp, NULL, sp);
 		if (asoc->param_flags & SPP_PMTUD_ENABLE)
 			sctp_assoc_sync_pmtu(asoc);
+	} else if (!sctp_transport_pmtu_check(tp)) {
+		if (asoc->param_flags & SPP_PMTUD_ENABLE)
+			sctp_assoc_sync_pmtu(asoc);
 	}
 
 	if (asoc->pmtu_pending) {
-- 
GitLab


From 919a52fc4ca137c871f295224507fa3401e08472 Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Fri, 20 Jul 2018 11:37:25 -0400
Subject: [PATCH 1220/1890] drm/amdgpu: Fix oops when
 pp_funcs->switch_power_profile is unset

On Vega20 and other pre-production GPUs, powerplay is not enabled yet.
Check for NULL pointers before calling pp_funcs function pointers.

Also affects Kaveri.

CC: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Tested-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index c31a8849e9f87..1580ec60b89f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -501,8 +501,11 @@ void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
 
-	amdgpu_dpm_switch_power_profile(adev,
-					PP_SMC_POWER_PROFILE_COMPUTE, !idle);
+	if (adev->powerplay.pp_funcs &&
+	    adev->powerplay.pp_funcs->switch_power_profile)
+		amdgpu_dpm_switch_power_profile(adev,
+						PP_SMC_POWER_PROFILE_COMPUTE,
+						!idle);
 }
 
 bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
-- 
GitLab


From 009dd011762925e5e03a59282b1785214f8470e0 Mon Sep 17 00:00:00 2001
From: Evan Quan <evan.quan@amd.com>
Date: Mon, 19 Nov 2018 12:03:24 +0800
Subject: [PATCH 1221/1890] drm/amd/powerplay: disable Vega20 DS related
 features

Disable these features on Vega20 for now.

Signed-off-by: Evan Quan <evan.quan@amd.com>
Acked-by: Feifei Xu<Feifei.Xu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
index 99861f32b1f95..397ee88fec036 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
@@ -75,7 +75,17 @@ static void vega20_set_default_registry_data(struct pp_hwmgr *hwmgr)
 	data->phy_clk_quad_eqn_b = PPREGKEY_VEGA20QUADRATICEQUATION_DFLT;
 	data->phy_clk_quad_eqn_c = PPREGKEY_VEGA20QUADRATICEQUATION_DFLT;
 
-	data->registry_data.disallowed_features = 0x0;
+	/*
+	 * Disable the following features for now:
+	 *   GFXCLK DS
+	 *   SOCLK DS
+	 *   LCLK DS
+	 *   DCEFCLK DS
+	 *   FCLK DS
+	 *   MP1CLK DS
+	 *   MP0CLK DS
+	 */
+	data->registry_data.disallowed_features = 0xE0041C00;
 	data->registry_data.od_state_in_dc_support = 0;
 	data->registry_data.thermal_support = 1;
 	data->registry_data.skip_baco_hardware = 0;
-- 
GitLab


From 8d4d7c58994759bbd9f4fec32d88bf0e0b89302e Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 19 Nov 2018 12:55:12 +0100
Subject: [PATCH 1222/1890] drm/amdgpu: Add missing firmware entry for HAINAN
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Due to lack of MODULE_FIRMWARE() with hainan_mc.bin, the driver
doesn't work properly in initrd.  Let's add it.

Bugzilla: https://bugzilla.suse.com/show_bug.cgi?id=1116239
Fixes: 8eaf2b1faaf4 ("drm/amdgpu: switch firmware path for SI parts")
Cc: <stable@vger.kernel.org>
Reviewed-by: Christian KÃ¶nig <christian.koenig@amd.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index e1c2b4e9c7b23..73ad02aea2b2e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -46,6 +46,7 @@ MODULE_FIRMWARE("amdgpu/tahiti_mc.bin");
 MODULE_FIRMWARE("amdgpu/pitcairn_mc.bin");
 MODULE_FIRMWARE("amdgpu/verde_mc.bin");
 MODULE_FIRMWARE("amdgpu/oland_mc.bin");
+MODULE_FIRMWARE("amdgpu/hainan_mc.bin");
 MODULE_FIRMWARE("amdgpu/si58_mc.bin");
 
 #define MC_SEQ_MISC0__MT__MASK   0xf0000000
-- 
GitLab


From a4233cc944d1b7125d906f1fa276bda3df48df0c Mon Sep 17 00:00:00 2001
From: "Greathouse, Joseph" <Joseph.Greathouse@amd.com>
Date: Mon, 19 Nov 2018 16:59:28 +0000
Subject: [PATCH 1223/1890] drm/amd/pp: handle negative values when reading OD

Reading the sysfs files pp_sclk_od and pp_mclk_od return the
percentage difference between the VBIOS-provided default
frequency and the current (possibly user-set) frequency in
the highest SCLK and MCLK DPM states, respectively.

Writing to these files provides an easy mechanism for
setting a higher-than-default maximum frequency. We
normally only allow values >= 0 to be written here.

However, with the addition of pp_od_clk_voltage, we now
allow users to set custom DPM tables. If they then set
the maximum DPM state to something less than the default,
later reads of pp_*_od should return a negative value.
The highest DPM state is now less than the VBIOS-provided
default, so the percentage is negative.

The math to calculate this was originally performed with
unsigned values, meaning reads that should return negative
values returned meaningless data. This patch corrects that
issue and normalizes how all of the calculations are done
across the various hwmgr types.

Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Joseph Greathouse <Joseph.Greathouse@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c  | 20 +++++++--------
 .../drm/amd/powerplay/hwmgr/vega10_hwmgr.c    | 25 ++++++++-----------
 .../drm/amd/powerplay/hwmgr/vega12_hwmgr.c    | 23 ++++++++---------
 .../drm/amd/powerplay/hwmgr/vega20_hwmgr.c    | 18 +++++++------
 4 files changed, 40 insertions(+), 46 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
index ed35ec0341e67..88f6b35ea6fee 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
@@ -4525,12 +4525,12 @@ static int smu7_get_sclk_od(struct pp_hwmgr *hwmgr)
 	struct smu7_single_dpm_table *sclk_table = &(data->dpm_table.sclk_table);
 	struct smu7_single_dpm_table *golden_sclk_table =
 			&(data->golden_dpm_table.sclk_table);
-	int value;
+	int value = sclk_table->dpm_levels[sclk_table->count - 1].value;
+	int golden_value = golden_sclk_table->dpm_levels
+			[golden_sclk_table->count - 1].value;
 
-	value = (sclk_table->dpm_levels[sclk_table->count - 1].value -
-			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value) *
-			100 /
-			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value;
+	value -= golden_value;
+	value = DIV_ROUND_UP(value * 100, golden_value);
 
 	return value;
 }
@@ -4567,12 +4567,12 @@ static int smu7_get_mclk_od(struct pp_hwmgr *hwmgr)
 	struct smu7_single_dpm_table *mclk_table = &(data->dpm_table.mclk_table);
 	struct smu7_single_dpm_table *golden_mclk_table =
 			&(data->golden_dpm_table.mclk_table);
-	int value;
+        int value = mclk_table->dpm_levels[mclk_table->count - 1].value;
+	int golden_value = golden_mclk_table->dpm_levels
+			[golden_mclk_table->count - 1].value;
 
-	value = (mclk_table->dpm_levels[mclk_table->count - 1].value -
-			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value) *
-			100 /
-			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value;
+	value -= golden_value;
+	value = DIV_ROUND_UP(value * 100, golden_value);
 
 	return value;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index 8c4db86bb4b77..e2bc6e0c229f9 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -4522,15 +4522,13 @@ static int vega10_get_sclk_od(struct pp_hwmgr *hwmgr)
 	struct vega10_single_dpm_table *sclk_table = &(data->dpm_table.gfx_table);
 	struct vega10_single_dpm_table *golden_sclk_table =
 			&(data->golden_dpm_table.gfx_table);
-	int value;
-
-	value = (sclk_table->dpm_levels[sclk_table->count - 1].value -
-			golden_sclk_table->dpm_levels
-			[golden_sclk_table->count - 1].value) *
-			100 /
-			golden_sclk_table->dpm_levels
+	int value = sclk_table->dpm_levels[sclk_table->count - 1].value;
+	int golden_value = golden_sclk_table->dpm_levels
 			[golden_sclk_table->count - 1].value;
 
+	value -= golden_value;
+	value = DIV_ROUND_UP(value * 100, golden_value);
+
 	return value;
 }
 
@@ -4575,16 +4573,13 @@ static int vega10_get_mclk_od(struct pp_hwmgr *hwmgr)
 	struct vega10_single_dpm_table *mclk_table = &(data->dpm_table.mem_table);
 	struct vega10_single_dpm_table *golden_mclk_table =
 			&(data->golden_dpm_table.mem_table);
-	int value;
-
-	value = (mclk_table->dpm_levels
-			[mclk_table->count - 1].value -
-			golden_mclk_table->dpm_levels
-			[golden_mclk_table->count - 1].value) *
-			100 /
-			golden_mclk_table->dpm_levels
+	int value = mclk_table->dpm_levels[mclk_table->count - 1].value;
+	int golden_value = golden_mclk_table->dpm_levels
 			[golden_mclk_table->count - 1].value;
 
+	value -= golden_value;
+	value = DIV_ROUND_UP(value * 100, golden_value);
+
 	return value;
 }
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
index 74bc37308dc09..54364444ecd12 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
@@ -2243,12 +2243,12 @@ static int vega12_get_sclk_od(struct pp_hwmgr *hwmgr)
 	struct vega12_single_dpm_table *sclk_table = &(data->dpm_table.gfx_table);
 	struct vega12_single_dpm_table *golden_sclk_table =
 			&(data->golden_dpm_table.gfx_table);
-	int value;
+	int value = sclk_table->dpm_levels[sclk_table->count - 1].value;
+	int golden_value = golden_sclk_table->dpm_levels
+			[golden_sclk_table->count - 1].value;
 
-	value = (sclk_table->dpm_levels[sclk_table->count - 1].value -
-			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value) *
-			100 /
-			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value;
+	value -= golden_value;
+	value = DIV_ROUND_UP(value * 100, golden_value);
 
 	return value;
 }
@@ -2264,16 +2264,13 @@ static int vega12_get_mclk_od(struct pp_hwmgr *hwmgr)
 	struct vega12_single_dpm_table *mclk_table = &(data->dpm_table.mem_table);
 	struct vega12_single_dpm_table *golden_mclk_table =
 			&(data->golden_dpm_table.mem_table);
-	int value;
-
-	value = (mclk_table->dpm_levels
-			[mclk_table->count - 1].value -
-			golden_mclk_table->dpm_levels
-			[golden_mclk_table->count - 1].value) *
-			100 /
-			golden_mclk_table->dpm_levels
+	int value = mclk_table->dpm_levels[mclk_table->count - 1].value;
+	int golden_value = golden_mclk_table->dpm_levels
 			[golden_mclk_table->count - 1].value;
 
+	value -= golden_value;
+	value = DIV_ROUND_UP(value * 100, golden_value);
+
 	return value;
 }
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
index 397ee88fec036..b4eadd47f3a44 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
@@ -1323,12 +1323,13 @@ static int vega20_get_sclk_od(
 			&(data->dpm_table.gfx_table);
 	struct vega20_single_dpm_table *golden_sclk_table =
 			&(data->golden_dpm_table.gfx_table);
-	int value;
+	int value = sclk_table->dpm_levels[sclk_table->count - 1].value;
+	int golden_value = golden_sclk_table->dpm_levels
+			[golden_sclk_table->count - 1].value;
 
 	/* od percentage */
-	value = DIV_ROUND_UP((sclk_table->dpm_levels[sclk_table->count - 1].value -
-		golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value) * 100,
-		golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value);
+	value -= golden_value;
+	value = DIV_ROUND_UP(value * 100, golden_value);
 
 	return value;
 }
@@ -1368,12 +1369,13 @@ static int vega20_get_mclk_od(
 			&(data->dpm_table.mem_table);
 	struct vega20_single_dpm_table *golden_mclk_table =
 			&(data->golden_dpm_table.mem_table);
-	int value;
+	int value = mclk_table->dpm_levels[mclk_table->count - 1].value;
+	int golden_value = golden_mclk_table->dpm_levels
+			[golden_mclk_table->count - 1].value;
 
 	/* od percentage */
-	value = DIV_ROUND_UP((mclk_table->dpm_levels[mclk_table->count - 1].value -
-		golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value) * 100,
-		golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value);
+	value -= golden_value;
+	value = DIV_ROUND_UP(value * 100, golden_value);
 
 	return value;
 }
-- 
GitLab


From a6a66f80c85e8e20573ca03fabf32445954a88d5 Mon Sep 17 00:00:00 2001
From: Thor Thayer <thor.thayer@linux.intel.com>
Date: Fri, 16 Nov 2018 08:25:49 -0600
Subject: [PATCH 1224/1890] mtd: spi-nor: Fix Cadence QSPI page fault kernel
 panic

The current Cadence QSPI driver caused a kernel panic sporadically
when writing to QSPI. The problem was caused by writing more bytes
than needed because the QSPI operated on 4 bytes at a time.
<snip>
[   11.202044] Unable to handle kernel paging request at virtual address bffd3000
[   11.209254] pgd = e463054d
[   11.211948] [bffd3000] *pgd=2fffb811, *pte=00000000, *ppte=00000000
[   11.218202] Internal error: Oops: 7 [#1] SMP ARM
[   11.222797] Modules linked in:
[   11.225844] CPU: 1 PID: 1317 Comm: systemd-hwdb Not tainted 4.17.7-d0c45cd44a8f
[   11.235796] Hardware name: Altera SOCFPGA Arria10
[   11.240487] PC is at __raw_writesl+0x70/0xd4
[   11.244741] LR is at cqspi_write+0x1a0/0x2cc
</snip>
On a page boundary limit the number of bytes copied from the tx buffer
to remain within the page.

This patch uses a temporary buffer to hold the 4 bytes to write and then
copies only the bytes required from the tx buffer.

Reported-by: Adrian Amborzewicz <adrian.ambrozewicz@intel.com>
Signed-off-by: Thor Thayer <thor.thayer@linux.intel.com>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
---
 drivers/mtd/spi-nor/cadence-quadspi.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/drivers/mtd/spi-nor/cadence-quadspi.c b/drivers/mtd/spi-nor/cadence-quadspi.c
index d846428ef038e..04cedd3a2bf66 100644
--- a/drivers/mtd/spi-nor/cadence-quadspi.c
+++ b/drivers/mtd/spi-nor/cadence-quadspi.c
@@ -644,9 +644,23 @@ static int cqspi_indirect_write_execute(struct spi_nor *nor, loff_t to_addr,
 		ndelay(cqspi->wr_delay);
 
 	while (remaining > 0) {
+		size_t write_words, mod_bytes;
+
 		write_bytes = remaining > page_size ? page_size : remaining;
-		iowrite32_rep(cqspi->ahb_base, txbuf,
-			      DIV_ROUND_UP(write_bytes, 4));
+		write_words = write_bytes / 4;
+		mod_bytes = write_bytes % 4;
+		/* Write 4 bytes at a time then single bytes. */
+		if (write_words) {
+			iowrite32_rep(cqspi->ahb_base, txbuf, write_words);
+			txbuf += (write_words * 4);
+		}
+		if (mod_bytes) {
+			unsigned int temp = 0xFFFFFFFF;
+
+			memcpy(&temp, txbuf, mod_bytes);
+			iowrite32(temp, cqspi->ahb_base);
+			txbuf += mod_bytes;
+		}
 
 		if (!wait_for_completion_timeout(&cqspi->transfer_complete,
 					msecs_to_jiffies(CQSPI_TIMEOUT_MS))) {
@@ -655,7 +669,6 @@ static int cqspi_indirect_write_execute(struct spi_nor *nor, loff_t to_addr,
 			goto failwr;
 		}
 
-		txbuf += write_bytes;
 		remaining -= write_bytes;
 
 		if (remaining > 0)
-- 
GitLab


From 59e4293149106fb92530f8e56fa3992d8548c5e6 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Wed, 14 Nov 2018 07:46:40 -0800
Subject: [PATCH 1225/1890] xfs: fix shared extent data corruption due to
 missing cow reservation

Page writeback indirectly handles shared extents via the existence
of overlapping COW fork blocks. If COW fork blocks exist, writeback
always performs the associated copy-on-write regardless if the
underlying blocks are actually shared. If the blocks are shared,
then overlapping COW fork blocks must always exist.

fstests shared/010 reproduces a case where a buffered write occurs
over a shared block without performing the requisite COW fork
reservation.  This ultimately causes writeback to the shared extent
and data corruption that is detected across md5 checks of the
filesystem across a mount cycle.

The problem occurs when a buffered write lands over a shared extent
that crosses an extent size hint boundary and that also happens to
have a partial COW reservation that doesn't cover the start and end
blocks of the data fork extent.

For example, a buffered write occurs across the file offset (in FSB
units) range of [29, 57]. A shared extent exists at blocks [29, 35]
and COW reservation already exists at blocks [32, 34]. After
accommodating a COW extent size hint of 32 blocks and the existing
reservation at offset 32, xfs_reflink_reserve_cow() allocates 32
blocks of reservation at offset 0 and returns with COW reservation
across the range of [0, 34]. The associated data fork extent is
still [29, 35], however, which isn't fully covered by the COW
reservation.

This leads to a buffered write at file offset 35 over a shared
extent without associated COW reservation. Writeback eventually
kicks in, performs an overwrite of the underlying shared block and
causes the associated data corruption.

Update xfs_reflink_reserve_cow() to accommodate the fact that a
delalloc allocation request may not fully cover the extent in the
data fork. Trim the data fork extent appropriately, just as is done
for shared extent boundaries and/or existing COW reservations that
happen to overlap the start of the data fork extent. This prevents
shared/010 failures due to data corruption on reflink enabled
filesystems.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_reflink.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index ecdb086bc23e5..c56bdbfcf7ae5 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -296,6 +296,7 @@ xfs_reflink_reserve_cow(
 	if (error)
 		return error;
 
+	xfs_trim_extent(imap, got.br_startoff, got.br_blockcount);
 	trace_xfs_reflink_cow_alloc(ip, &got);
 	return 0;
 }
-- 
GitLab


From da034bcc6aaaf2a6ba6c5b5e63565c5ef4816a0e Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Wed, 14 Nov 2018 21:48:18 -0800
Subject: [PATCH 1226/1890] xfs: make xfs_file_remap_range() static

xfs_file_remap_range() is only used in fs/xfs/xfs_file.c, so make it
static.

This addresses a gcc warning when -Wmissing-prototypes is enabled.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 53c9ab8fb777f..e47425071e654 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -920,7 +920,7 @@ xfs_file_fallocate(
 }
 
 
-loff_t
+STATIC loff_t
 xfs_file_remap_range(
 	struct file		*file_in,
 	loff_t			pos_in,
-- 
GitLab


From f2b18732ee9863ac036759baf616ffa03c252ed5 Mon Sep 17 00:00:00 2001
From: Raed Salem <raeds@mellanox.com>
Date: Thu, 18 Oct 2018 08:55:21 +0300
Subject: [PATCH 1227/1890] net/mlx5: IPSec, Fix the SA context hash key

The commit "net/mlx5: Refactor accel IPSec code" introduced a
bug where asynchronous short time change in hash key value
by create/release SA context might happen during an asynchronous
hash resize operation this could cause a subsequent remove SA
context operation to fail as the key value used during resize is
not the same key value used when remove SA context operation is
invoked.

This commit fixes the bug by defining the SA context hash key
such that it includes only fields that never change during the
lifetime of the SA context object.

Fixes: d6c4f0298cec ("net/mlx5: Refactor accel IPSec code")
Signed-off-by: Raed Salem <raeds@mellanox.com>
Reviewed-by: Aviad Yehezkel <aviadye@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
index 515e3d6de0516..5a22c5874f3bc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
@@ -83,8 +83,14 @@ struct mlx5_fpga_ipsec_rule {
 };
 
 static const struct rhashtable_params rhash_sa = {
-	.key_len = FIELD_SIZEOF(struct mlx5_fpga_ipsec_sa_ctx, hw_sa),
-	.key_offset = offsetof(struct mlx5_fpga_ipsec_sa_ctx, hw_sa),
+	/* Keep out "cmd" field from the key as it's
+	 * value is not constant during the lifetime
+	 * of the key object.
+	 */
+	.key_len = FIELD_SIZEOF(struct mlx5_fpga_ipsec_sa_ctx, hw_sa) -
+		   FIELD_SIZEOF(struct mlx5_ifc_fpga_ipsec_sa_v1, cmd),
+	.key_offset = offsetof(struct mlx5_fpga_ipsec_sa_ctx, hw_sa) +
+		      FIELD_SIZEOF(struct mlx5_ifc_fpga_ipsec_sa_v1, cmd),
 	.head_offset = offsetof(struct mlx5_fpga_ipsec_sa_ctx, hash),
 	.automatic_shrinking = true,
 	.min_size = 1,
-- 
GitLab


From acf3766b36d8e59ecbc307894c6d05703ee48014 Mon Sep 17 00:00:00 2001
From: Denis Drozdov <denisd@mellanox.com>
Date: Thu, 27 Sep 2018 14:17:54 +0300
Subject: [PATCH 1228/1890] net/mlx5e: IPoIB, Reset QP after channels are
 closed

The mlx5e channels should be closed before mlx5i_uninit_underlay_qp
puts the QP into RST (reset) state during mlx5i_close. Currently QP
state incorrectly set to RST before channels got deactivated and closed,
since mlx5_post_send request expects QP in RTS (Ready To Send) state.

The fix is to keep QP in RTS state until mlx5e channels get closed
and to reset QP afterwards.

Also this fix is simply correct in order to keep the open/close flow
symmetric, i.e mlx5i_init_underlay_qp() is called first thing at open,
the correct thing to do is to call mlx5i_uninit_underlay_qp() last thing
at close, which is exactly what this patch is doing.

Fixes: dae37456c8ac ("net/mlx5: Support for attaching multiple underlay QPs to root flow table")
Signed-off-by: Denis Drozdov <denisd@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index b59953daf8b44..11dabd62e2c75 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -560,9 +560,9 @@ static int mlx5i_close(struct net_device *netdev)
 
 	netif_carrier_off(epriv->netdev);
 	mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn);
-	mlx5i_uninit_underlay_qp(epriv);
 	mlx5e_deactivate_priv_channels(epriv);
 	mlx5e_close_channels(&epriv->channels);
+	mlx5i_uninit_underlay_qp(epriv);
 unlock:
 	mutex_unlock(&epriv->state_lock);
 	return 0;
-- 
GitLab


From d3a80bb5a3eac311ddf28387402593977574460d Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Thu, 25 Oct 2018 15:41:58 +0000
Subject: [PATCH 1229/1890] net/mlx5e: Don't match on vlan non-existence if
 ethertype is wildcarded

For the "all" ethertype we should not care whether the packet has
vlans. Besides being wrong, the way we did it caused FW error
for rules such as:

tc filter add dev eth0 protocol all parent ffff: \
	prio 1 flower skip_sw action drop

b/c the matching meta-data (outer headers bit in struct mlx5_flow_spec)
wasn't set. Fix that by matching on vlan non-existence only if we were
also told to match on the ethertype.

Fixes: cee26487620b ('net/mlx5e: Set vlan masks for all offloaded TC rules')
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reported-by: Slava Ovsiienko <viacheslavo@mellanox.com>
Reviewed-by: Jianbo Liu <jianbol@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_tc.c   | 63 ++++++++++---------
 1 file changed, 32 insertions(+), 31 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 608025ca5c04d..84eb6939e69a9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1447,31 +1447,21 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
 					 inner_headers);
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
-		struct flow_dissector_key_eth_addrs *key =
+	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_dissector_key_basic *key =
 			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_ETH_ADDRS,
+						  FLOW_DISSECTOR_KEY_BASIC,
 						  f->key);
-		struct flow_dissector_key_eth_addrs *mask =
+		struct flow_dissector_key_basic *mask =
 			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_ETH_ADDRS,
+						  FLOW_DISSECTOR_KEY_BASIC,
 						  f->mask);
+		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
+			 ntohs(mask->n_proto));
+		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
+			 ntohs(key->n_proto));
 
-		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
-					     dmac_47_16),
-				mask->dst);
-		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
-					     dmac_47_16),
-				key->dst);
-
-		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
-					     smac_47_16),
-				mask->src);
-		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
-					     smac_47_16),
-				key->src);
-
-		if (!is_zero_ether_addr(mask->src) || !is_zero_ether_addr(mask->dst))
+		if (mask->n_proto)
 			*match_level = MLX5_MATCH_L2;
 	}
 
@@ -1505,9 +1495,10 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
 
 			*match_level = MLX5_MATCH_L2;
 		}
-	} else {
+	} else if (*match_level != MLX5_MATCH_NONE) {
 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, svlan_tag, 1);
 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
+		*match_level = MLX5_MATCH_L2;
 	}
 
 	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CVLAN)) {
@@ -1545,21 +1536,31 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
 		}
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
-		struct flow_dissector_key_basic *key =
+	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+		struct flow_dissector_key_eth_addrs *key =
 			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_BASIC,
+						  FLOW_DISSECTOR_KEY_ETH_ADDRS,
 						  f->key);
-		struct flow_dissector_key_basic *mask =
+		struct flow_dissector_key_eth_addrs *mask =
 			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_BASIC,
+						  FLOW_DISSECTOR_KEY_ETH_ADDRS,
 						  f->mask);
-		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
-			 ntohs(mask->n_proto));
-		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
-			 ntohs(key->n_proto));
 
-		if (mask->n_proto)
+		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+					     dmac_47_16),
+				mask->dst);
+		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+					     dmac_47_16),
+				key->dst);
+
+		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+					     smac_47_16),
+				mask->src);
+		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+					     smac_47_16),
+				key->src);
+
+		if (!is_zero_ether_addr(mask->src) || !is_zero_ether_addr(mask->dst))
 			*match_level = MLX5_MATCH_L2;
 	}
 
-- 
GitLab


From 077ecd785d90c6cbba08d719faa4be8561aa0a1e Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Thu, 18 Oct 2018 12:31:27 +0200
Subject: [PATCH 1230/1890] net/mlx5e: Claim TC hw offloads support only under
 a proper build config

Currently, we are only supporting tc hw offloads when the eswitch
support is compiled in, but we are not gating the adevertizment
of the NETIF_F_HW_TC feature on this config being set.

Fix it, and while doing that, also avoid dealing with the feature
on ethtool when the config is not set.

Fixes: e8f887ac6a45 ('net/mlx5e: Introduce tc offload support')
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 1243edbedc9e9..fb3b2d9c352b7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3574,6 +3574,7 @@ static int set_feature_cvlan_filter(struct net_device *netdev, bool enable)
 	return 0;
 }
 
+#ifdef CONFIG_MLX5_ESWITCH
 static int set_feature_tc_num_filters(struct net_device *netdev, bool enable)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -3586,6 +3587,7 @@ static int set_feature_tc_num_filters(struct net_device *netdev, bool enable)
 
 	return 0;
 }
+#endif
 
 static int set_feature_rx_all(struct net_device *netdev, bool enable)
 {
@@ -3684,7 +3686,9 @@ static int mlx5e_set_features(struct net_device *netdev,
 	err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro);
 	err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_FILTER,
 				    set_feature_cvlan_filter);
+#ifdef CONFIG_MLX5_ESWITCH
 	err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_TC, set_feature_tc_num_filters);
+#endif
 	err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXALL, set_feature_rx_all);
 	err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXFCS, set_feature_rx_fcs);
 	err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_RX, set_feature_rx_vlan);
@@ -4678,7 +4682,9 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
 	    FT_CAP(modify_root) &&
 	    FT_CAP(identified_miss_table_mode) &&
 	    FT_CAP(flow_table_modify)) {
+#ifdef CONFIG_MLX5_ESWITCH
 		netdev->hw_features      |= NETIF_F_HW_TC;
+#endif
 #ifdef CONFIG_MLX5_EN_ARFS
 		netdev->hw_features	 |= NETIF_F_NTUPLE;
 #endif
-- 
GitLab


From 83621b7df6a646e550fd3d36db2e301cf9a5096b Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Sun, 28 Oct 2018 12:27:29 +0200
Subject: [PATCH 1231/1890] net/mlx5e: Always use the match level enum when
 parsing TC rule match

We get the match level (none, l2, l3, l4) while going over the match
dissectors of an offloaded tc rule. When doing this, the match level
enum and the not min inline enum values should be used, fix that.

This worked accidentally b/c both enums have the same numerical values.

Fixes: d708f902989b ('net/mlx5e: Get the required HW match level while parsing TC flow matches')
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 84eb6939e69a9..0409767237a74 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1587,10 +1587,10 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
 
 			/* the HW doesn't need L3 inline to match on frag=no */
 			if (!(key->flags & FLOW_DIS_IS_FRAGMENT))
-				*match_level = MLX5_INLINE_MODE_L2;
+				*match_level = MLX5_MATCH_L2;
 	/* ***  L2 attributes parsing up to here *** */
 			else
-				*match_level = MLX5_INLINE_MODE_IP;
+				*match_level = MLX5_MATCH_L3;
 		}
 	}
 
-- 
GitLab


From a1f240f1801721f76bee734c50df2d9529da86e0 Mon Sep 17 00:00:00 2001
From: Yuval Avnery <yuvalav@mellanox.com>
Date: Tue, 16 Oct 2018 15:20:20 -0500
Subject: [PATCH 1232/1890] net/mlx5e: Adjust to max number of channles when
 re-attaching

When core driver enters deattach/attach flow after pci reset,
Number of logical CPUs may have changed.
As a result we need to update the cpu affiliated resource tables.
	1. indirect rqt list
	2. eq table

Reproduction (PowerPC):
	echo 1000 > /sys/kernel/debug/powerpc/eeh_max_freezes
	ppc64_cpu --smt=on
	# Restart driver
	modprobe -r ... ; modprobe ...
	# Link up
	ifconfig ...
	# Only physical CPUs
	ppc64_cpu --smt=off
	# Inject PCI errors so PCI will reset - calling the pci error handler
	echo 0x8000000000000000 > /sys/kernel/debug/powerpc/<PCI BUS>/err_injct_inboundA

Call trace when trying to add non-existing rqs to an indirect rqt:
	mlx5e_redirect_rqt+0x84/0x260 [mlx5_core] (unreliable)
	mlx5e_redirect_rqts+0x188/0x190 [mlx5_core]
	mlx5e_activate_priv_channels+0x488/0x570 [mlx5_core]
	mlx5e_open_locked+0xbc/0x140 [mlx5_core]
	mlx5e_open+0x50/0x130 [mlx5_core]
	mlx5e_nic_enable+0x174/0x1b0 [mlx5_core]
	mlx5e_attach_netdev+0x154/0x290 [mlx5_core]
	mlx5e_attach+0x88/0xd0 [mlx5_core]
	mlx5_attach_device+0x168/0x1e0 [mlx5_core]
	mlx5_load_one+0x1140/0x1210 [mlx5_core]
	mlx5_pci_resume+0x6c/0xf0 [mlx5_core]

Create cq will fail when trying to use non-existing EQ.

Fixes: 89d44f0a6c73 ("net/mlx5_core: Add pci error handlers to mlx5_core driver")
Signed-off-by: Yuval Avnery <yuvalav@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_main.c | 27 +++++++++++++++----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index fb3b2d9c352b7..25b09bb68e8b3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1623,13 +1623,15 @@ static int mlx5e_alloc_cq_common(struct mlx5_core_dev *mdev,
 	int err;
 	u32 i;
 
+	err = mlx5_vector2eqn(mdev, param->eq_ix, &eqn_not_used, &irqn);
+	if (err)
+		return err;
+
 	err = mlx5_cqwq_create(mdev, &param->wq, param->cqc, &cq->wq,
 			       &cq->wq_ctrl);
 	if (err)
 		return err;
 
-	mlx5_vector2eqn(mdev, param->eq_ix, &eqn_not_used, &irqn);
-
 	mcq->cqe_sz     = 64;
 	mcq->set_ci_db  = cq->wq_ctrl.db.db;
 	mcq->arm_db     = cq->wq_ctrl.db.db + 1;
@@ -1687,6 +1689,10 @@ static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
 	int eqn;
 	int err;
 
+	err = mlx5_vector2eqn(mdev, param->eq_ix, &eqn, &irqn_not_used);
+	if (err)
+		return err;
+
 	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
 		sizeof(u64) * cq->wq_ctrl.buf.npages;
 	in = kvzalloc(inlen, GFP_KERNEL);
@@ -1700,8 +1706,6 @@ static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
 	mlx5_fill_page_frag_array(&cq->wq_ctrl.buf,
 				  (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas));
 
-	mlx5_vector2eqn(mdev, param->eq_ix, &eqn, &irqn_not_used);
-
 	MLX5_SET(cqc,   cqc, cq_period_mode, param->cq_period_mode);
 	MLX5_SET(cqc,   cqc, c_eqn,         eqn);
 	MLX5_SET(cqc,   cqc, uar_page,      mdev->priv.uar->index);
@@ -1921,6 +1925,10 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
 	int err;
 	int eqn;
 
+	err = mlx5_vector2eqn(priv->mdev, ix, &eqn, &irq);
+	if (err)
+		return err;
+
 	c = kvzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu));
 	if (!c)
 		return -ENOMEM;
@@ -1937,7 +1945,6 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
 	c->xdp      = !!params->xdp_prog;
 	c->stats    = &priv->channel_stats[ix].ch;
 
-	mlx5_vector2eqn(priv->mdev, ix, &eqn, &irq);
 	c->irq_desc = irq_to_desc(irq);
 
 	netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64);
@@ -5010,11 +5017,21 @@ struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
 int mlx5e_attach_netdev(struct mlx5e_priv *priv)
 {
 	const struct mlx5e_profile *profile;
+	int max_nch;
 	int err;
 
 	profile = priv->profile;
 	clear_bit(MLX5E_STATE_DESTROYING, &priv->state);
 
+	/* max number of channels may have changed */
+	max_nch = mlx5e_get_max_num_channels(priv->mdev);
+	if (priv->channels.params.num_channels > max_nch) {
+		mlx5_core_warn(priv->mdev, "MLX5E: Reducing number of channels to %d\n", max_nch);
+		priv->channels.params.num_channels = max_nch;
+		mlx5e_build_default_indir_rqt(priv->channels.params.indirection_rqt,
+					      MLX5E_INDIR_RQT_SIZE, max_nch);
+	}
+
 	err = profile->init_tx(priv);
 	if (err)
 		goto out;
-- 
GitLab


From 1392f44bba22533b8dfdc6b0e73610b27c3f76cf Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@mellanox.com>
Date: Tue, 23 Oct 2018 17:30:04 +0300
Subject: [PATCH 1233/1890] net/mlx5e: Apply the correct check for supporting
 TC esw rules split

The mirror and not the output count is the one denoting a split.
Fix to condition the offload attempt on the mirror count being > 0
along the firmware to have the related capability.

Fixes: 592d36515969 ("net/mlx5e: Parse mirroring action for offloaded TC eswitch flows")
Signed-off-by: Roi Dayan <roid@mellanox.com>
Reviewed-by: Yossi Kuperman <yossiku@mellanox.com>
Reviewed-by: Chris Mi <chrism@mellanox.com>
Acked-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 0409767237a74..fca6f4132c91a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -2980,7 +2980,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 	if (!actions_match_supported(priv, exts, parse_attr, flow, extack))
 		return -EOPNOTSUPP;
 
-	if (attr->out_count > 1 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
+	if (attr->mirror_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
 		NL_SET_ERR_MSG_MOD(extack,
 				   "current firmware doesn't support split rule for port mirroring");
 		netdev_warn_once(priv->netdev, "current firmware doesn't support split rule for port mirroring\n");
-- 
GitLab


From 0073c8f72736b423aade8a817587a5f3e4df4ad8 Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@mellanox.com>
Date: Thu, 11 Oct 2018 07:31:10 +0300
Subject: [PATCH 1234/1890] net/mlx5e: RX, verify received packet size in
 Linear Striding RQ

In case of striding RQ, we use  MPWRQ (Multi Packet WQE RQ), which means
that WQE (RX descriptor) can be used for many packets and so the WQE is
much bigger than MTU.  In virtualization setups where the port mtu can
be larger than the vf mtu, if received packet is bigger than MTU, it
won't be dropped by HW on too small receive WQE. If we use linear SKB in
striding RQ, since each stride has room for mtu size payload and skb
info, an oversized packet can lead to crash for crossing allocated page
boundary upon the call to build_skb. So driver needs to check packet
size and drop it.

Introduce new SW rx counter, rx_oversize_pkts_sw_drop, which counts the
number of packets dropped by the driver for being too large.

As a new field is added to the RQ struct, re-open the channels whenever
this field is being used in datapath (i.e., in the case of linear
Striding RQ).

Fixes: 619a8f2a42f1 ("net/mlx5e: Use linear SKB in Striding RQ")
Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h       | 1 +
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  | 4 +++-
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c    | 6 ++++++
 drivers/net/ethernet/mellanox/mlx5/core/en_stats.c | 3 +++
 drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 2 ++
 5 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index d7fbd5b6ac957..1183248029264 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -569,6 +569,7 @@ struct mlx5e_rq {
 
 	unsigned long          state;
 	int                    ix;
+	unsigned int           hw_mtu;
 
 	struct net_dim         dim; /* Dynamic Interrupt Moderation */
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 25b09bb68e8b3..871313d6b34d1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -502,6 +502,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 	rq->channel = c;
 	rq->ix      = c->ix;
 	rq->mdev    = mdev;
+	rq->hw_mtu  = MLX5E_SW2HW_MTU(params, params->sw_mtu);
 	rq->stats   = &c->priv->channel_stats[c->ix].rq;
 
 	rq->xdp_prog = params->xdp_prog ? bpf_prog_inc(params->xdp_prog) : NULL;
@@ -3766,10 +3767,11 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
 	}
 
 	if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
+		bool is_linear = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev, &new_channels.params);
 		u8 ppw_old = mlx5e_mpwqe_log_pkts_per_wqe(params);
 		u8 ppw_new = mlx5e_mpwqe_log_pkts_per_wqe(&new_channels.params);
 
-		reset = reset && (ppw_old != ppw_new);
+		reset = reset && (is_linear || (ppw_old != ppw_new));
 	}
 
 	if (!reset) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 79638dcbae783..16985ca3248d7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -1104,6 +1104,12 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
 	u32 frag_size;
 	bool consumed;
 
+	/* Check packet size. Note LRO doesn't use linear SKB */
+	if (unlikely(cqe_bcnt > rq->hw_mtu)) {
+		rq->stats->oversize_pkts_sw_drop++;
+		return NULL;
+	}
+
 	va             = page_address(di->page) + head_offset;
 	data           = va + rx_headroom;
 	frag_size      = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt32);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 1e55b9c27ffc0..3e99d0728b2f2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -83,6 +83,7 @@ static const struct counter_desc sw_stats_desc[] = {
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_wqe_err) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler_cqes) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler_strides) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_oversize_pkts_sw_drop) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) },
@@ -161,6 +162,7 @@ void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
 		s->rx_wqe_err   += rq_stats->wqe_err;
 		s->rx_mpwqe_filler_cqes    += rq_stats->mpwqe_filler_cqes;
 		s->rx_mpwqe_filler_strides += rq_stats->mpwqe_filler_strides;
+		s->rx_oversize_pkts_sw_drop += rq_stats->oversize_pkts_sw_drop;
 		s->rx_buff_alloc_err += rq_stats->buff_alloc_err;
 		s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks;
 		s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts;
@@ -1189,6 +1191,7 @@ static const struct counter_desc rq_stats_desc[] = {
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_filler_cqes) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_filler_strides) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, oversize_pkts_sw_drop) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) },
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 77f74ce11280e..3f8e870ef4c90 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -96,6 +96,7 @@ struct mlx5e_sw_stats {
 	u64 rx_wqe_err;
 	u64 rx_mpwqe_filler_cqes;
 	u64 rx_mpwqe_filler_strides;
+	u64 rx_oversize_pkts_sw_drop;
 	u64 rx_buff_alloc_err;
 	u64 rx_cqe_compress_blks;
 	u64 rx_cqe_compress_pkts;
@@ -193,6 +194,7 @@ struct mlx5e_rq_stats {
 	u64 wqe_err;
 	u64 mpwqe_filler_cqes;
 	u64 mpwqe_filler_strides;
+	u64 oversize_pkts_sw_drop;
 	u64 buff_alloc_err;
 	u64 cqe_compress_blks;
 	u64 cqe_compress_pkts;
-- 
GitLab


From 228c4cd04dfd0667eda182c91504b83c17d97584 Mon Sep 17 00:00:00 2001
From: Valentine Fatiev <valentinef@mellanox.com>
Date: Wed, 17 Oct 2018 11:45:07 +0300
Subject: [PATCH 1235/1890] net/mlx5e: Fix selftest for small MTUs

Loopback test had fixed packet size, which can be bigger than configured
MTU. Shorten the loopback packet size to be bigger than minimal MTU
allowed by the device. Text field removed from struct 'mlx5ehdr'
as redundant to allow send small packets as minimal allowed MTU.

Fixes: d605d66 ("net/mlx5e: Add support for ethtool self diagnostics test")
Signed-off-by: Valentine Fatiev <valentinef@mellanox.com>
Reviewed-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../ethernet/mellanox/mlx5/core/en_selftest.c | 26 +++++++------------
 1 file changed, 10 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
index 35ded91203f52..4382ef85488c5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
@@ -98,18 +98,17 @@ static int mlx5e_test_link_speed(struct mlx5e_priv *priv)
 	return 1;
 }
 
-#ifdef CONFIG_INET
-/* loopback test */
-#define MLX5E_TEST_PKT_SIZE (MLX5E_RX_MAX_HEAD - NET_IP_ALIGN)
-static const char mlx5e_test_text[ETH_GSTRING_LEN] = "MLX5E SELF TEST";
-#define MLX5E_TEST_MAGIC 0x5AEED15C001ULL
-
 struct mlx5ehdr {
 	__be32 version;
 	__be64 magic;
-	char   text[ETH_GSTRING_LEN];
 };
 
+#ifdef CONFIG_INET
+/* loopback test */
+#define MLX5E_TEST_PKT_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) +\
+			     sizeof(struct udphdr) + sizeof(struct mlx5ehdr))
+#define MLX5E_TEST_MAGIC 0x5AEED15C001ULL
+
 static struct sk_buff *mlx5e_test_get_udp_skb(struct mlx5e_priv *priv)
 {
 	struct sk_buff *skb = NULL;
@@ -117,10 +116,7 @@ static struct sk_buff *mlx5e_test_get_udp_skb(struct mlx5e_priv *priv)
 	struct ethhdr *ethh;
 	struct udphdr *udph;
 	struct iphdr *iph;
-	int datalen, iplen;
-
-	datalen = MLX5E_TEST_PKT_SIZE -
-		  (sizeof(*ethh) + sizeof(*iph) + sizeof(*udph));
+	int    iplen;
 
 	skb = netdev_alloc_skb(priv->netdev, MLX5E_TEST_PKT_SIZE);
 	if (!skb) {
@@ -149,7 +145,7 @@ static struct sk_buff *mlx5e_test_get_udp_skb(struct mlx5e_priv *priv)
 	/* Fill UDP header */
 	udph->source = htons(9);
 	udph->dest = htons(9); /* Discard Protocol */
-	udph->len = htons(datalen + sizeof(struct udphdr));
+	udph->len = htons(sizeof(struct mlx5ehdr) + sizeof(struct udphdr));
 	udph->check = 0;
 
 	/* Fill IP header */
@@ -157,7 +153,8 @@ static struct sk_buff *mlx5e_test_get_udp_skb(struct mlx5e_priv *priv)
 	iph->ttl = 32;
 	iph->version = 4;
 	iph->protocol = IPPROTO_UDP;
-	iplen = sizeof(struct iphdr) + sizeof(struct udphdr) + datalen;
+	iplen = sizeof(struct iphdr) + sizeof(struct udphdr) +
+		sizeof(struct mlx5ehdr);
 	iph->tot_len = htons(iplen);
 	iph->frag_off = 0;
 	iph->saddr = 0;
@@ -170,9 +167,6 @@ static struct sk_buff *mlx5e_test_get_udp_skb(struct mlx5e_priv *priv)
 	mlxh = skb_put(skb, sizeof(*mlxh));
 	mlxh->version = 0;
 	mlxh->magic = cpu_to_be64(MLX5E_TEST_MAGIC);
-	strlcpy(mlxh->text, mlx5e_test_text, sizeof(mlxh->text));
-	datalen -= sizeof(*mlxh);
-	skb_put_zero(skb, datalen);
 
 	skb->csum = 0;
 	skb->ip_summed = CHECKSUM_PARTIAL;
-- 
GitLab


From e1e46479847e66f78f79d8c24d5169a5954b3fc2 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sun, 18 Nov 2018 21:59:49 +0800
Subject: [PATCH 1236/1890] sctp: not increase stream's incnt before sending
 addstrm_in request

Different from processing the addstrm_out request, The receiver handles
an addstrm_in request by sending back an addstrm_out request to the
sender who will increase its stream's in and incnt later.

Now stream->incnt has been increased since it sent out the addstrm_in
request in sctp_send_add_streams(), with the wrong stream->incnt will
even cause crash when copying stream info from the old stream's in to
the new one's in sctp_process_strreset_addstrm_out().

This patch is to fix it by simply removing the stream->incnt change
from sctp_send_add_streams().

Fixes: 242bd2d519d7 ("sctp: implement sender-side procedures for Add Incoming/Outgoing Streams Request Parameter")
Reported-by: Jianwen Ji <jiji@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/stream.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index ffb940d3b57c1..3892e7630f3ad 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -535,7 +535,6 @@ int sctp_send_add_streams(struct sctp_association *asoc,
 		goto out;
 	}
 
-	stream->incnt = incnt;
 	stream->outcnt = outcnt;
 
 	asoc->strreset_outstanding = !!out + !!in;
-- 
GitLab


From e76ad21d070f79e566ac46ce0b0584c3c93e1b43 Mon Sep 17 00:00:00 2001
From: Arthur Kiyanovski <akiyano@amazon.com>
Date: Mon, 19 Nov 2018 12:05:20 +0200
Subject: [PATCH 1237/1890] net: ena: fix crash during failed resume from
 hibernation

During resume from hibernation if ena_restore_device fails,
ena_com_dev_reset() is called, and uses the readless read mechanism,
which was already destroyed by the call to
ena_com_mmio_reg_read_request_destroy(). This causes a NULL pointer
reference.

In this commit we switch the call order of the above two functions
to avoid this crash.

Fixes: d7703ddbd7c9 ("net: ena: fix rare bug when failed restart/resume is followed by driver removal")
Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_netdev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 18956e7604a31..1d3cead0977eb 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -2694,8 +2694,8 @@ static int ena_restore_device(struct ena_adapter *adapter)
 	ena_com_abort_admin_commands(ena_dev);
 	ena_com_wait_for_abort_completion(ena_dev);
 	ena_com_admin_destroy(ena_dev);
-	ena_com_mmio_reg_read_request_destroy(ena_dev);
 	ena_com_dev_reset(ena_dev, ENA_REGS_RESET_DRIVER_INVALID_STATE);
+	ena_com_mmio_reg_read_request_destroy(ena_dev);
 err:
 	clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
 	clear_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags);
-- 
GitLab


From 58a54b9c62e206b8d5f6e59020bcb178fc271d8e Mon Sep 17 00:00:00 2001
From: Arthur Kiyanovski <akiyano@amazon.com>
Date: Mon, 19 Nov 2018 12:05:21 +0200
Subject: [PATCH 1238/1890] net: ena: fix crash during ena_remove()

In ena_remove() we have the following stack call:
ena_remove()
  unregister_netdev()
  ena_destroy_device()
    netif_carrier_off()

Calling netif_carrier_off() causes linkwatch to try to handle the
link change event on the already unregistered netdev, which leads
to a read from an unreadable memory address.

This patch switches the order of the two functions, so that
netif_carrier_off() is called on a regiestered netdev.

To accomplish this fix we also had to:
1. Remove the set bit ENA_FLAG_TRIGGER_RESET
2. Add a sanitiy check in ena_close()
both to prevent double device reset (when calling unregister_netdev()
ena_close is called, but the device was already deleted in
ena_destroy_device()).
3. Set the admin_queue running state to false to avoid using it after
device was reset (for example when calling ena_destroy_all_io_queues()
right after ena_com_dev_reset() in ena_down)

Fixes: 944b28aa2982 ("net: ena: fix missing lock during device destruction")
Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_netdev.c | 21 ++++++++++----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 1d3cead0977eb..a70bb1bb90e7d 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -1848,6 +1848,8 @@ static void ena_down(struct ena_adapter *adapter)
 		rc = ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
 		if (rc)
 			dev_err(&adapter->pdev->dev, "Device reset failed\n");
+		/* stop submitting admin commands on a device that was reset */
+		ena_com_set_admin_running_state(adapter->ena_dev, false);
 	}
 
 	ena_destroy_all_io_queues(adapter);
@@ -1914,6 +1916,9 @@ static int ena_close(struct net_device *netdev)
 
 	netif_dbg(adapter, ifdown, netdev, "%s\n", __func__);
 
+	if (!test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags))
+		return 0;
+
 	if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
 		ena_down(adapter);
 
@@ -2613,9 +2618,7 @@ static void ena_destroy_device(struct ena_adapter *adapter, bool graceful)
 		ena_down(adapter);
 
 	/* Stop the device from sending AENQ events (in case reset flag is set
-	 *  and device is up, ena_close already reset the device
-	 * In case the reset flag is set and the device is up, ena_down()
-	 * already perform the reset, so it can be skipped.
+	 *  and device is up, ena_down() already reset the device.
 	 */
 	if (!(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags) && dev_up))
 		ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
@@ -3452,6 +3455,8 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	ena_com_rss_destroy(ena_dev);
 err_free_msix:
 	ena_com_dev_reset(ena_dev, ENA_REGS_RESET_INIT_ERR);
+	/* stop submitting admin commands on a device that was reset */
+	ena_com_set_admin_running_state(ena_dev, false);
 	ena_free_mgmnt_irq(adapter);
 	ena_disable_msix(adapter);
 err_worker_destroy:
@@ -3498,18 +3503,12 @@ static void ena_remove(struct pci_dev *pdev)
 
 	cancel_work_sync(&adapter->reset_task);
 
-	unregister_netdev(netdev);
-
-	/* If the device is running then we want to make sure the device will be
-	 * reset to make sure no more events will be issued by the device.
-	 */
-	if (test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags))
-		set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
-
 	rtnl_lock();
 	ena_destroy_device(adapter, true);
 	rtnl_unlock();
 
+	unregister_netdev(netdev);
+
 	free_netdev(netdev);
 
 	ena_com_rss_destroy(ena_dev);
-- 
GitLab


From 4c23738a3f9f203a9b41c89e030eaa8ee241f90f Mon Sep 17 00:00:00 2001
From: Arthur Kiyanovski <akiyano@amazon.com>
Date: Mon, 19 Nov 2018 12:05:22 +0200
Subject: [PATCH 1239/1890] net: ena: update driver version from 2.0.1 to 2.0.2

Update driver version due to critical bug fixes.

Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_netdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
index 5218736423393..dc8b6173d8d82 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
@@ -45,7 +45,7 @@
 
 #define DRV_MODULE_VER_MAJOR	2
 #define DRV_MODULE_VER_MINOR	0
-#define DRV_MODULE_VER_SUBMINOR 1
+#define DRV_MODULE_VER_SUBMINOR 2
 
 #define DRV_MODULE_NAME		"ena"
 #ifndef DRV_MODULE_VERSION
-- 
GitLab


From 9cdeaab3b7e72fe60f582e7658511f30c19f1e29 Mon Sep 17 00:00:00 2001
From: Shay Agroskin <shayag@mellanox.com>
Date: Sun, 28 Oct 2018 16:13:46 +0200
Subject: [PATCH 1240/1890] net/mlx5e: Fix a bug in turning off FEC policy in
 unsupported speeds

Some speeds don't support turning FEC policy off. In case a requested
FEC policy is not supported for a speed (including current speed), its new
FEC policy would be:
	no FEC - if disabling FEC is supported for that speed
	unchanged - else

Fixes: 2095b2641477 ("net/mlx5e: Add port FEC get/set functions")
Signed-off-by: Shay Agroskin <shayag@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/en/port.c | 28 ++++++++-----------
 1 file changed, 12 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
index 023dc4bccd289..c16351eb9e54e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
@@ -394,12 +394,12 @@ int mlx5e_get_fec_mode(struct mlx5_core_dev *dev, u32 *fec_mode_active,
 
 int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u8 fec_policy)
 {
+	u8 fec_policy_nofec = BIT(MLX5E_FEC_NOFEC);
 	bool fec_mode_not_supp_in_speed = false;
-	u8 no_fec_policy = BIT(MLX5E_FEC_NOFEC);
 	u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {};
 	u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {};
 	int sz = MLX5_ST_SZ_BYTES(pplm_reg);
-	u32 current_fec_speed;
+	u8 fec_policy_auto = 0;
 	u8 fec_caps = 0;
 	int err;
 	int i;
@@ -415,23 +415,19 @@ int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u8 fec_policy)
 	if (err)
 		return err;
 
-	err = mlx5e_port_linkspeed(dev, &current_fec_speed);
-	if (err)
-		return err;
+	MLX5_SET(pplm_reg, out, local_port, 1);
 
-	memset(in, 0, sz);
-	MLX5_SET(pplm_reg, in, local_port, 1);
-	for (i = 0; i < MLX5E_FEC_SUPPORTED_SPEEDS && !!fec_policy; i++) {
+	for (i = 0; i < MLX5E_FEC_SUPPORTED_SPEEDS; i++) {
 		mlx5e_get_fec_cap_field(out, &fec_caps, fec_supported_speeds[i]);
-		/* policy supported for link speed */
-		if (!!(fec_caps & fec_policy)) {
-			mlx5e_fec_admin_field(in, &fec_policy, 1,
+		/* policy supported for link speed, or policy is auto */
+		if (fec_caps & fec_policy || fec_policy == fec_policy_auto) {
+			mlx5e_fec_admin_field(out, &fec_policy, 1,
 					      fec_supported_speeds[i]);
 		} else {
-			if (fec_supported_speeds[i] == current_fec_speed)
-				return -EOPNOTSUPP;
-			mlx5e_fec_admin_field(in, &no_fec_policy, 1,
-					      fec_supported_speeds[i]);
+			/* turn off FEC if supported. Else, leave it the same */
+			if (fec_caps & fec_policy_nofec)
+				mlx5e_fec_admin_field(out, &fec_policy_nofec, 1,
+						      fec_supported_speeds[i]);
 			fec_mode_not_supp_in_speed = true;
 		}
 	}
@@ -441,5 +437,5 @@ int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u8 fec_policy)
 			      "FEC policy 0x%x is not supported for some speeds",
 			      fec_policy);
 
-	return mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 1);
+	return mlx5_core_access_reg(dev, out, sz, out, sz, MLX5_REG_PPLM, 0, 1);
 }
-- 
GitLab


From febd72f27c02713823cdd011e0087c0e80eb8f62 Mon Sep 17 00:00:00 2001
From: Shay Agroskin <shayag@mellanox.com>
Date: Sun, 28 Oct 2018 09:17:29 +0200
Subject: [PATCH 1241/1890] net/mlx5e: Fix wrong field name in FEC related
 functions

This bug would result in reading wrong FEC capabilities for 10G/40G.

Fixes: 2095b2641477 ("net/mlx5e: Add port FEC get/set functions")
Signed-off-by: Shay Agroskin <shayag@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/port.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
index c16351eb9e54e..83ba9ea201d8e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
@@ -258,7 +258,7 @@ static int mlx5e_fec_admin_field(u32 *pplm,
 	case 40000:
 		if (!write)
 			*fec_policy = MLX5_GET(pplm_reg, pplm,
-					       fec_override_cap_10g_40g);
+					       fec_override_admin_10g_40g);
 		else
 			MLX5_SET(pplm_reg, pplm,
 				 fec_override_admin_10g_40g, *fec_policy);
@@ -310,7 +310,7 @@ static int mlx5e_get_fec_cap_field(u32 *pplm,
 	case 10000:
 	case 40000:
 		*fec_cap = MLX5_GET(pplm_reg, pplm,
-				    fec_override_admin_10g_40g);
+				    fec_override_cap_10g_40g);
 		break;
 	case 25000:
 		*fec_cap = MLX5_GET(pplm_reg, pplm,
-- 
GitLab


From 64e283348458e2fd2fe41b60dfb6c30e88ee695f Mon Sep 17 00:00:00 2001
From: Shay Agroskin <shayag@mellanox.com>
Date: Sun, 28 Oct 2018 09:06:11 +0200
Subject: [PATCH 1242/1890] net/mlx5e: Removed unnecessary warnings in FEC caps
 query

Querying interface FEC caps with 'ethtool [int]' after link reset
throws warning regading link speed.
This warning is not needed as there is already an indication in
user space that the link is not up.

Fixes: 0696d60853d5 ("net/mlx5e: Receive buffer configuration")
Signed-off-by: Shay Agroskin <shayag@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/port.c        | 4 +---
 drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c | 4 +++-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
index 83ba9ea201d8e..4a37713023be5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
@@ -88,10 +88,8 @@ int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
 
 	eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
 	*speed = mlx5e_port_ptys2speed(eth_proto_oper);
-	if (!(*speed)) {
-		mlx5_core_warn(mdev, "cannot get port speed\n");
+	if (!(*speed))
 		err = -EINVAL;
-	}
 
 	return err;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
index c047da8752daa..eac245a93f918 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
@@ -130,8 +130,10 @@ static u32 calculate_xoff(struct mlx5e_priv *priv, unsigned int mtu)
 	int err;
 
 	err = mlx5e_port_linkspeed(priv->mdev, &speed);
-	if (err)
+	if (err) {
+		mlx5_core_warn(priv->mdev, "cannot get port speed\n");
 		return 0;
+	}
 
 	xoff = (301 + 216 * priv->dcbx.cable_len / 100) * speed / 1000 + 272 * mtu / 100;
 
-- 
GitLab


From 9184e51b5b3ef4509ea869f43e34a60f78f0d32a Mon Sep 17 00:00:00 2001
From: Shay Agroskin <shayag@mellanox.com>
Date: Thu, 8 Nov 2018 14:23:36 +0200
Subject: [PATCH 1243/1890] net/mlx5e: Fix failing ethtool query on FEC query
 error

If FEC caps query fails when executing 'ethtool <interface>'
the whole callback fails unnecessarily, fixed that by replacing the
error return code with debug logging only.

Fixes: 6cfa94605091 ("net/mlx5e: Ethtool driver callback for query/set FEC policy")
Signed-off-by: Shay Agroskin <shayag@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 3e770abfd8021..25c1c4f968412 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -843,8 +843,7 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev,
 	ethtool_link_ksettings_add_link_mode(link_ksettings, supported,
 					     Autoneg);
 
-	err = get_fec_supported_advertised(mdev, link_ksettings);
-	if (err)
+	if (get_fec_supported_advertised(mdev, link_ksettings))
 		netdev_dbg(netdev, "%s: FEC caps query failed: %d\n",
 			   __func__, err);
 
-- 
GitLab


From aeabb3c96186a0f944fc2b1f25c84d5eb3a93fa9 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Mon, 19 Nov 2018 20:11:45 -0500
Subject: [PATCH 1244/1890] NFSv4: Fix a NFSv4 state manager deadlock

Fix a deadlock whereby the NFSv4 state manager can get stuck in the
delegation return code, waiting for a layout return to complete in
another thread. If the server reboots before that other thread
completes, then we need to be able to start a second state
manager thread in order to perform recovery.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/nfs4_fs.h   |  2 ++
 fs/nfs/nfs4state.c | 16 +++++++++++-----
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 8d59c9655ec48..1b994b5275189 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -41,6 +41,8 @@ enum nfs4_client_state {
 	NFS4CLNT_MOVED,
 	NFS4CLNT_LEASE_MOVED,
 	NFS4CLNT_DELEGATION_EXPIRED,
+	NFS4CLNT_RUN_MANAGER,
+	NFS4CLNT_DELEGRETURN_RUNNING,
 };
 
 #define NFS4_RENEW_TIMEOUT		0x01
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index ffea578853949..d8decf2ec48fa 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1210,6 +1210,7 @@ void nfs4_schedule_state_manager(struct nfs_client *clp)
 	struct task_struct *task;
 	char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1];
 
+	set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
 	if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
 		return;
 	__module_get(THIS_MODULE);
@@ -2503,6 +2504,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
 
 	/* Ensure exclusive access to NFSv4 state */
 	do {
+		clear_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
 		if (test_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state)) {
 			section = "purge state";
 			status = nfs4_purge_lease(clp);
@@ -2593,14 +2595,18 @@ static void nfs4_state_manager(struct nfs_client *clp)
 		}
 
 		nfs4_end_drain_session(clp);
-		if (test_and_clear_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) {
-			nfs_client_return_marked_delegations(clp);
-			continue;
+		nfs4_clear_state_manager_bit(clp);
+
+		if (!test_and_set_bit(NFS4CLNT_DELEGRETURN_RUNNING, &clp->cl_state)) {
+			if (test_and_clear_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) {
+				nfs_client_return_marked_delegations(clp);
+				set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
+			}
+			clear_bit(NFS4CLNT_DELEGRETURN_RUNNING, &clp->cl_state);
 		}
 
-		nfs4_clear_state_manager_bit(clp);
 		/* Did we race with an attempt to give us more work? */
-		if (clp->cl_state == 0)
+		if (!test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state))
 			return;
 		if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
 			return;
-- 
GitLab


From 276d43f0ae963312c0cd0e2b9a85fd11ac65dfcc Mon Sep 17 00:00:00 2001
From: Denis Bolotin <denis.bolotin@cavium.com>
Date: Mon, 19 Nov 2018 16:28:30 +0200
Subject: [PATCH 1245/1890] qed: Fix bitmap_weight() check

Fix the condition which verifies that only one flag is set. The API
bitmap_weight() should receive size in bits instead of bytes.

Fixes: b5a9ee7cf3be ("qed: Revise QM cofiguration")
Signed-off-by: Denis Bolotin <denis.bolotin@cavium.com>
Signed-off-by: Michal Kalderon <michal.kalderon@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_dev.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index cff141077558c..9b41e4ba53e5b 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -485,8 +485,11 @@ static u16 *qed_init_qm_get_idx_from_flags(struct qed_hwfn *p_hwfn,
 	struct qed_qm_info *qm_info = &p_hwfn->qm_info;
 
 	/* Can't have multiple flags set here */
-	if (bitmap_weight((unsigned long *)&pq_flags, sizeof(pq_flags)) > 1)
+	if (bitmap_weight((unsigned long *)&pq_flags,
+			  sizeof(pq_flags) * BITS_PER_BYTE) > 1) {
+		DP_ERR(p_hwfn, "requested multiple pq flags 0x%x\n", pq_flags);
 		goto err;
+	}
 
 	switch (pq_flags) {
 	case PQ_FLAGS_RLS:
-- 
GitLab


From eb62cca9bee842e5b23bd0ddfb1f271ca95e8759 Mon Sep 17 00:00:00 2001
From: Denis Bolotin <denis.bolotin@cavium.com>
Date: Mon, 19 Nov 2018 16:28:31 +0200
Subject: [PATCH 1246/1890] qed: Fix QM getters to always return a valid pq

The getter callers doesn't know the valid Physical Queues (PQ) values.
This patch makes sure that a valid PQ will always be returned.

The patch consists of 3 fixes:

 - When qed_init_qm_get_idx_from_flags() receives a disabled flag, it
   returned PQ 0, which can potentially be another function's pq. Verify
   that flag is enabled, otherwise return default start_pq.

 - When qed_init_qm_get_idx_from_flags() receives an unknown flag, it
   returned NULL and could lead to a segmentation fault. Return default
   start_pq instead.

 - A modulo operation was added to MCOS/VFS PQ getters to make sure the
   PQ returned is in range of the required flag.

Fixes: b5a9ee7cf3be ("qed: Revise QM cofiguration")
Signed-off-by: Denis Bolotin <denis.bolotin@cavium.com>
Signed-off-by: Michal Kalderon <michal.kalderon@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_dev.c | 24 +++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 9b41e4ba53e5b..88a8576ca9cea 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -491,6 +491,11 @@ static u16 *qed_init_qm_get_idx_from_flags(struct qed_hwfn *p_hwfn,
 		goto err;
 	}
 
+	if (!(qed_get_pq_flags(p_hwfn) & pq_flags)) {
+		DP_ERR(p_hwfn, "pq flag 0x%x is not set\n", pq_flags);
+		goto err;
+	}
+
 	switch (pq_flags) {
 	case PQ_FLAGS_RLS:
 		return &qm_info->first_rl_pq;
@@ -513,8 +518,7 @@ static u16 *qed_init_qm_get_idx_from_flags(struct qed_hwfn *p_hwfn,
 	}
 
 err:
-	DP_ERR(p_hwfn, "BAD pq flags %d\n", pq_flags);
-	return NULL;
+	return &qm_info->start_pq;
 }
 
 /* save pq index in qm info */
@@ -538,20 +542,32 @@ u16 qed_get_cm_pq_idx_mcos(struct qed_hwfn *p_hwfn, u8 tc)
 {
 	u8 max_tc = qed_init_qm_get_num_tcs(p_hwfn);
 
+	if (max_tc == 0) {
+		DP_ERR(p_hwfn, "pq with flag 0x%lx do not exist\n",
+		       PQ_FLAGS_MCOS);
+		return p_hwfn->qm_info.start_pq;
+	}
+
 	if (tc > max_tc)
 		DP_ERR(p_hwfn, "tc %d must be smaller than %d\n", tc, max_tc);
 
-	return qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_MCOS) + tc;
+	return qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_MCOS) + (tc % max_tc);
 }
 
 u16 qed_get_cm_pq_idx_vf(struct qed_hwfn *p_hwfn, u16 vf)
 {
 	u16 max_vf = qed_init_qm_get_num_vfs(p_hwfn);
 
+	if (max_vf == 0) {
+		DP_ERR(p_hwfn, "pq with flag 0x%lx do not exist\n",
+		       PQ_FLAGS_VFS);
+		return p_hwfn->qm_info.start_pq;
+	}
+
 	if (vf > max_vf)
 		DP_ERR(p_hwfn, "vf %d must be smaller than %d\n", vf, max_vf);
 
-	return qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_VFS) + vf;
+	return qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_VFS) + (vf % max_vf);
 }
 
 u16 qed_get_cm_pq_idx_ofld_mtc(struct qed_hwfn *p_hwfn, u8 tc)
-- 
GitLab


From a5681e20b541a507c7d4fd48ae4a4040d32ee1ef Mon Sep 17 00:00:00 2001
From: Juliet Kim <julietk@linux.vnet.ibm.com>
Date: Mon, 19 Nov 2018 15:59:22 -0600
Subject: [PATCH 1247/1890] net/ibmnvic: Fix deadlock problem in reset

This patch changes to use rtnl_lock only during a reset to avoid
deadlock that could occur when a thread operating close is holding
rtnl_lock and waiting for reset_lock acquired by another thread,
which is waiting for rtnl_lock in order to set the number of tx/rx
queues during a reset.

Also, we now setting the number of tx/rx queues during a soft reset
for failover or LPM events.

Signed-off-by: Juliet Kim <julietk@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 59 +++++++++++-------------------
 drivers/net/ethernet/ibm/ibmvnic.h |  2 +-
 2 files changed, 22 insertions(+), 39 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index c9d5d0a7fbf17..27a6df30eafd2 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1103,20 +1103,15 @@ static int ibmvnic_open(struct net_device *netdev)
 		return 0;
 	}
 
-	mutex_lock(&adapter->reset_lock);
-
 	if (adapter->state != VNIC_CLOSED) {
 		rc = ibmvnic_login(netdev);
-		if (rc) {
-			mutex_unlock(&adapter->reset_lock);
+		if (rc)
 			return rc;
-		}
 
 		rc = init_resources(adapter);
 		if (rc) {
 			netdev_err(netdev, "failed to initialize resources\n");
 			release_resources(adapter);
-			mutex_unlock(&adapter->reset_lock);
 			return rc;
 		}
 	}
@@ -1124,8 +1119,6 @@ static int ibmvnic_open(struct net_device *netdev)
 	rc = __ibmvnic_open(netdev);
 	netif_carrier_on(netdev);
 
-	mutex_unlock(&adapter->reset_lock);
-
 	return rc;
 }
 
@@ -1269,10 +1262,8 @@ static int ibmvnic_close(struct net_device *netdev)
 		return 0;
 	}
 
-	mutex_lock(&adapter->reset_lock);
 	rc = __ibmvnic_close(netdev);
 	ibmvnic_cleanup(netdev);
-	mutex_unlock(&adapter->reset_lock);
 
 	return rc;
 }
@@ -1820,20 +1811,15 @@ static int do_reset(struct ibmvnic_adapter *adapter,
 				return rc;
 		} else if (adapter->req_rx_queues != old_num_rx_queues ||
 			   adapter->req_tx_queues != old_num_tx_queues) {
-			adapter->map_id = 1;
 			release_rx_pools(adapter);
 			release_tx_pools(adapter);
-			rc = init_rx_pools(netdev);
-			if (rc)
-				return rc;
-			rc = init_tx_pools(netdev);
-			if (rc)
-				return rc;
-
 			release_napi(adapter);
-			rc = init_napi(adapter);
+			release_vpd_data(adapter);
+
+			rc = init_resources(adapter);
 			if (rc)
 				return rc;
+
 		} else {
 			rc = reset_tx_pools(adapter);
 			if (rc)
@@ -1917,17 +1903,8 @@ static int do_hard_reset(struct ibmvnic_adapter *adapter,
 		adapter->state = VNIC_PROBED;
 		return 0;
 	}
-	/* netif_set_real_num_xx_queues needs to take rtnl lock here
-	 * unless wait_for_reset is set, in which case the rtnl lock
-	 * has already been taken before initializing the reset
-	 */
-	if (!adapter->wait_for_reset) {
-		rtnl_lock();
-		rc = init_resources(adapter);
-		rtnl_unlock();
-	} else {
-		rc = init_resources(adapter);
-	}
+
+	rc = init_resources(adapter);
 	if (rc)
 		return rc;
 
@@ -1986,13 +1963,21 @@ static void __ibmvnic_reset(struct work_struct *work)
 	struct ibmvnic_rwi *rwi;
 	struct ibmvnic_adapter *adapter;
 	struct net_device *netdev;
+	bool we_lock_rtnl = false;
 	u32 reset_state;
 	int rc = 0;
 
 	adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset);
 	netdev = adapter->netdev;
 
-	mutex_lock(&adapter->reset_lock);
+	/* netif_set_real_num_xx_queues needs to take rtnl lock here
+	 * unless wait_for_reset is set, in which case the rtnl lock
+	 * has already been taken before initializing the reset
+	 */
+	if (!adapter->wait_for_reset) {
+		rtnl_lock();
+		we_lock_rtnl = true;
+	}
 	reset_state = adapter->state;
 
 	rwi = get_next_rwi(adapter);
@@ -2020,12 +2005,11 @@ static void __ibmvnic_reset(struct work_struct *work)
 	if (rc) {
 		netdev_dbg(adapter->netdev, "Reset failed\n");
 		free_all_rwi(adapter);
-		mutex_unlock(&adapter->reset_lock);
-		return;
 	}
 
 	adapter->resetting = false;
-	mutex_unlock(&adapter->reset_lock);
+	if (we_lock_rtnl)
+		rtnl_unlock();
 }
 
 static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
@@ -4768,7 +4752,6 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
 
 	INIT_WORK(&adapter->ibmvnic_reset, __ibmvnic_reset);
 	INIT_LIST_HEAD(&adapter->rwi_list);
-	mutex_init(&adapter->reset_lock);
 	mutex_init(&adapter->rwi_lock);
 	adapter->resetting = false;
 
@@ -4840,8 +4823,8 @@ static int ibmvnic_remove(struct vio_dev *dev)
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
 
 	adapter->state = VNIC_REMOVING;
-	unregister_netdev(netdev);
-	mutex_lock(&adapter->reset_lock);
+	rtnl_lock();
+	unregister_netdevice(netdev);
 
 	release_resources(adapter);
 	release_sub_crqs(adapter, 1);
@@ -4852,7 +4835,7 @@ static int ibmvnic_remove(struct vio_dev *dev)
 
 	adapter->state = VNIC_REMOVED;
 
-	mutex_unlock(&adapter->reset_lock);
+	rtnl_unlock();
 	device_remove_file(&dev->dev, &dev_attr_failover);
 	free_netdev(netdev);
 	dev_set_drvdata(&dev->dev, NULL);
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index 18103b811d4db..99c4f8d331ce7 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -1075,7 +1075,7 @@ struct ibmvnic_adapter {
 	struct tasklet_struct tasklet;
 	enum vnic_state state;
 	enum ibmvnic_reset_reason reset_reason;
-	struct mutex reset_lock, rwi_lock;
+	struct mutex rwi_lock;
 	struct list_head rwi_list;
 	struct work_struct ibmvnic_reset;
 	bool resetting;
-- 
GitLab


From 8830f26bcd3cf1ff07d9078cd310a534c03b6a10 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 2 Nov 2018 16:12:12 +0000
Subject: [PATCH 1248/1890] drm/i915: Prevent machine hang from Broxton's vtd
 w/a and error capture

Since capturing the error state requires fiddling around with the GGTT
to read arbitrary buffers and is itself run under stop_machine(), it
deadlocks the machine (effectively a hard hang) when run in conjunction
with Broxton's VTd workaround to serialize GGTT access.

v2: Store the ERR_PTR in first_error so that the error can be reported
to the user via sysfs.
v3: Mention the quirk in dmesg (using info as per usual)

Fixes: 0ef34ad6222a ("drm/i915: Serialize GTT/Aperture accesses on BXT")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jon Bloomfield <jon.bloomfield@intel.com>
Cc: John Harrison <john.C.Harrison@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181102161232.17742-5-chris@chris-wilson.co.uk
(cherry picked from commit fb6f0b64e455b207a636346588e65bf9598d30eb)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c   |  5 +++++
 drivers/gpu/drm/i915/i915_gpu_error.c | 15 ++++++++++++++-
 drivers/gpu/drm/i915/i915_gpu_error.h |  8 +++++++-
 3 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 47c3025437990..07999fe09ad23 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3413,6 +3413,11 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
 		ggtt->vm.insert_page    = bxt_vtd_ggtt_insert_page__BKL;
 		if (ggtt->vm.clear_range != nop_clear_range)
 			ggtt->vm.clear_range = bxt_vtd_ggtt_clear_range__BKL;
+
+		/* Prevent recursively calling stop_machine() and deadlocks. */
+		dev_info(dev_priv->drm.dev,
+			 "Disabling error capture for VT-d workaround\n");
+		i915_disable_error_state(dev_priv, -ENODEV);
 	}
 
 	ggtt->invalidate = gen6_ggtt_invalidate;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 8762d17b66591..3eb33e000d6f0 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -648,6 +648,9 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 		return 0;
 	}
 
+	if (IS_ERR(error))
+		return PTR_ERR(error);
+
 	if (*error->error_msg)
 		err_printf(m, "%s\n", error->error_msg);
 	err_printf(m, "Kernel: " UTS_RELEASE "\n");
@@ -1859,6 +1862,7 @@ void i915_capture_error_state(struct drm_i915_private *i915,
 	error = i915_capture_gpu_state(i915);
 	if (!error) {
 		DRM_DEBUG_DRIVER("out of memory, not capturing error state\n");
+		i915_disable_error_state(i915, -ENOMEM);
 		return;
 	}
 
@@ -1914,5 +1918,14 @@ void i915_reset_error_state(struct drm_i915_private *i915)
 	i915->gpu_error.first_error = NULL;
 	spin_unlock_irq(&i915->gpu_error.lock);
 
-	i915_gpu_state_put(error);
+	if (!IS_ERR(error))
+		i915_gpu_state_put(error);
+}
+
+void i915_disable_error_state(struct drm_i915_private *i915, int err)
+{
+	spin_lock_irq(&i915->gpu_error.lock);
+	if (!i915->gpu_error.first_error)
+		i915->gpu_error.first_error = ERR_PTR(err);
+	spin_unlock_irq(&i915->gpu_error.lock);
 }
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 8710fb18ed746..3ec89a504de52 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -343,6 +343,7 @@ static inline void i915_gpu_state_put(struct i915_gpu_state *gpu)
 
 struct i915_gpu_state *i915_first_error_state(struct drm_i915_private *i915);
 void i915_reset_error_state(struct drm_i915_private *i915);
+void i915_disable_error_state(struct drm_i915_private *i915, int err);
 
 #else
 
@@ -355,13 +356,18 @@ static inline void i915_capture_error_state(struct drm_i915_private *dev_priv,
 static inline struct i915_gpu_state *
 i915_first_error_state(struct drm_i915_private *i915)
 {
-	return NULL;
+	return ERR_PTR(-ENODEV);
 }
 
 static inline void i915_reset_error_state(struct drm_i915_private *i915)
 {
 }
 
+static inline void i915_disable_error_state(struct drm_i915_private *i915,
+					    int err)
+{
+}
+
 #endif /* IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) */
 
 #endif /* _I915_GPU_ERROR_H_ */
-- 
GitLab


From 3841840449817ba6cf3e636008bc4e1061a03388 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Tue, 20 Nov 2018 08:25:28 +0100
Subject: [PATCH 1249/1890] x86/boot: Mostly revert commit ae7e1238e68f2a ("Add
 ACPI RSDP address to setup_header")

Peter Anvin pointed out that commit:

  ae7e1238e68f2a ("x86/boot: Add ACPI RSDP address to setup_header")

should be reverted as setup_header should only contain items set by the
legacy BIOS.

So revert said commit. Instead of fully reverting the dependent commit
of:

  e7b66d16fe4172 ("x86/acpi, x86/boot: Take RSDP address for boot params if available")

just remove the setup_header reference in order to replace it by
a boot_params in a followup patch.

Suggested-by: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: boris.ostrovsky@oracle.com
Cc: bp@alien8.de
Cc: daniel.kiper@oracle.com
Cc: sstabellini@kernel.org
Cc: xen-devel@lists.xenproject.org
Link: http://lkml.kernel.org/r/20181120072529.5489-2-jgross@suse.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 Documentation/x86/boot.txt            | 32 +--------------------------
 arch/x86/boot/header.S                |  6 +----
 arch/x86/include/asm/x86_init.h       |  2 --
 arch/x86/include/uapi/asm/bootparam.h |  4 ----
 arch/x86/kernel/acpi/boot.c           |  2 +-
 arch/x86/kernel/head32.c              |  1 -
 arch/x86/kernel/head64.c              |  2 --
 arch/x86/kernel/setup.c               | 17 --------------
 8 files changed, 3 insertions(+), 63 deletions(-)

diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt
index 7727db8f94bce..5e9b826b5f62f 100644
--- a/Documentation/x86/boot.txt
+++ b/Documentation/x86/boot.txt
@@ -61,18 +61,6 @@ Protocol 2.12:	(Kernel 3.8) Added the xloadflags field and extension fields
 	 	to struct boot_params for loading bzImage and ramdisk
 		above 4G in 64bit.
 
-Protocol 2.13:	(Kernel 3.14) Support 32- and 64-bit flags being set in
-		xloadflags to support booting a 64-bit kernel from 32-bit
-		EFI
-
-Protocol 2.14:	(Kernel 4.20) Added acpi_rsdp_addr holding the physical
-		address of the ACPI RSDP table.
-		The bootloader updates version with:
-		0x8000 | min(kernel-version, bootloader-version)
-		kernel-version being the protocol version supported by
-		the kernel and bootloader-version the protocol version
-		supported by the bootloader.
-
 **** MEMORY LAYOUT
 
 The traditional memory map for the kernel loader, used for Image or
@@ -209,7 +197,6 @@ Offset	Proto	Name		Meaning
 0258/8	2.10+	pref_address	Preferred loading address
 0260/4	2.10+	init_size	Linear memory required during initialization
 0264/4	2.11+	handover_offset	Offset of handover entry point
-0268/8	2.14+	acpi_rsdp_addr	Physical address of RSDP table
 
 (1) For backwards compatibility, if the setup_sects field contains 0, the
     real value is 4.
@@ -322,7 +309,7 @@ Protocol:	2.00+
   Contains the magic number "HdrS" (0x53726448).
 
 Field name:	version
-Type:		modify
+Type:		read
 Offset/size:	0x206/2
 Protocol:	2.00+
 
@@ -330,12 +317,6 @@ Protocol:	2.00+
   e.g. 0x0204 for version 2.04, and 0x0a11 for a hypothetical version
   10.17.
 
-  Up to protocol version 2.13 this information is only read by the
-  bootloader. From protocol version 2.14 onwards the bootloader will
-  write the used protocol version or-ed with 0x8000 to the field. The
-  used protocol version will be the minimum of the supported protocol
-  versions of the bootloader and the kernel.
-
 Field name:	realmode_swtch
 Type:		modify (optional)
 Offset/size:	0x208/4
@@ -763,17 +744,6 @@ Offset/size:	0x264/4
 
   See EFI HANDOVER PROTOCOL below for more details.
 
-Field name:	acpi_rsdp_addr
-Type:		write
-Offset/size:	0x268/8
-Protocol:	2.14+
-
-  This field can be set by the boot loader to tell the kernel the
-  physical address of the ACPI RSDP table.
-
-  A value of 0 indicates the kernel should fall back to the standard
-  methods to locate the RSDP.
-
 
 **** THE IMAGE CHECKSUM
 
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 4c881c850125c..850b8762e8896 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -300,7 +300,7 @@ _start:
 	# Part 2 of the header, from the old setup.S
 
 		.ascii	"HdrS"		# header signature
-		.word	0x020e		# header version number (>= 0x0105)
+		.word	0x020d		# header version number (>= 0x0105)
 					# or else old loadlin-1.5 will fail)
 		.globl realmode_swtch
 realmode_swtch:	.word	0, 0		# default_switch, SETUPSEG
@@ -558,10 +558,6 @@ pref_address:		.quad LOAD_PHYSICAL_ADDR	# preferred load addr
 init_size:		.long INIT_SIZE		# kernel initialization size
 handover_offset:	.long 0			# Filled in by build.c
 
-acpi_rsdp_addr:		.quad 0			# 64-bit physical pointer to the
-						# ACPI RSDP table, added with
-						# version 2.14
-
 # End of setup header #####################################################
 
 	.section ".entrytext", "ax"
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 0f842104862c3..b85a7c54c6a13 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -303,6 +303,4 @@ extern void x86_init_noop(void);
 extern void x86_init_uint_noop(unsigned int unused);
 extern bool x86_pnpbios_disabled(void);
 
-void x86_verify_bootdata_version(void);
-
 #endif
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index 22f89d040dddc..a06cbf0197446 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -16,9 +16,6 @@
 #define RAMDISK_PROMPT_FLAG		0x8000
 #define RAMDISK_LOAD_FLAG		0x4000
 
-/* version flags */
-#define VERSION_WRITTEN	0x8000
-
 /* loadflags */
 #define LOADED_HIGH	(1<<0)
 #define KASLR_FLAG	(1<<1)
@@ -89,7 +86,6 @@ struct setup_header {
 	__u64	pref_address;
 	__u32	init_size;
 	__u32	handover_offset;
-	__u64	acpi_rsdp_addr;
 } __attribute__((packed));
 
 struct sys_desc_table {
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 92c76bf97ad82..fb3b1f3a5aba5 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1776,5 +1776,5 @@ void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size)
 
 u64 x86_default_get_root_pointer(void)
 {
-	return boot_params.hdr.acpi_rsdp_addr;
+	return 0;
 }
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 76fa3b8365985..ec6fefbfd3c04 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -37,7 +37,6 @@ asmlinkage __visible void __init i386_start_kernel(void)
 	cr4_init_shadow();
 
 	sanitize_boot_params(&boot_params);
-	x86_verify_bootdata_version();
 
 	x86_early_init_platform_quirks();
 
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 7663a8eb602bc..16b1cbd3a61e2 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -457,8 +457,6 @@ void __init x86_64_start_reservations(char *real_mode_data)
 	if (!boot_params.hdr.version)
 		copy_bootdata(__va(real_mode_data));
 
-	x86_verify_bootdata_version();
-
 	x86_early_init_platform_quirks();
 
 	switch (boot_params.hdr.hardware_subarch) {
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index b74e7bfed6ab4..d494b9bfe618c 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1280,23 +1280,6 @@ void __init setup_arch(char **cmdline_p)
 	unwind_init();
 }
 
-/*
- * From boot protocol 2.14 onwards we expect the bootloader to set the
- * version to "0x8000 | <used version>". In case we find a version >= 2.14
- * without the 0x8000 we assume the boot loader supports 2.13 only and
- * reset the version accordingly. The 0x8000 flag is removed in any case.
- */
-void __init x86_verify_bootdata_version(void)
-{
-	if (boot_params.hdr.version & VERSION_WRITTEN)
-		boot_params.hdr.version &= ~VERSION_WRITTEN;
-	else if (boot_params.hdr.version >= 0x020e)
-		boot_params.hdr.version = 0x020d;
-
-	if (boot_params.hdr.version < 0x020e)
-		boot_params.hdr.acpi_rsdp_addr = 0;
-}
-
 #ifdef CONFIG_X86_32
 
 static struct resource video_ram_resource = {
-- 
GitLab


From e6e094e053af75cbc164e950814d3d084fb1e698 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Tue, 20 Nov 2018 08:25:29 +0100
Subject: [PATCH 1250/1890] x86/acpi, x86/boot: Take RSDP address from boot
 params if available

In case the RSDP address in struct boot_params is specified don't try
to find the table by searching, but take the address directly as set
by the boot loader.

Suggested-by: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: boris.ostrovsky@oracle.com
Cc: bp@alien8.de
Cc: daniel.kiper@oracle.com
Cc: sstabellini@kernel.org
Cc: xen-devel@lists.xenproject.org
Link: http://lkml.kernel.org/r/20181120072529.5489-3-jgross@suse.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/uapi/asm/bootparam.h | 3 ++-
 arch/x86/kernel/acpi/boot.c           | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index a06cbf0197446..60733f137e9a2 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -155,7 +155,8 @@ struct boot_params {
 	__u8  _pad2[4];					/* 0x054 */
 	__u64  tboot_addr;				/* 0x058 */
 	struct ist_info ist_info;			/* 0x060 */
-	__u8  _pad3[16];				/* 0x070 */
+	__u64 acpi_rsdp_addr;				/* 0x070 */
+	__u8  _pad3[8];					/* 0x078 */
 	__u8  hd0_info[16];	/* obsolete! */		/* 0x080 */
 	__u8  hd1_info[16];	/* obsolete! */		/* 0x090 */
 	struct sys_desc_table sys_desc_table; /* obsolete! */	/* 0x0a0 */
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index fb3b1f3a5aba5..06635fbca81c0 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1776,5 +1776,5 @@ void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size)
 
 u64 x86_default_get_root_pointer(void)
 {
-	return 0;
+	return boot_params.acpi_rsdp_addr;
 }
-- 
GitLab


From 68bc10bf992180f269816ff3d22eb30383138577 Mon Sep 17 00:00:00 2001
From: Trent Piepho <tpiepho@impinj.com>
Date: Mon, 5 Nov 2018 18:11:36 +0000
Subject: [PATCH 1251/1890] PCI: imx6: Fix link training status detection in
 link up check

This bug was introduced in the interaction for two commits on either
branch of the merge commit 562df5c8521e ("Merge branch
'pci/host-designware' into next").

Commit 4d107d3b5a68 ("PCI: imx6: Move link up check into
imx6_pcie_wait_for_link()"), changed imx6_pcie_wait_for_link() to poll
the link status register directly, checking for link up and not
training, and made imx6_pcie_link_up() only check the link up bit (once,
not a polling loop).

While commit 886bc5ceb5cc ("PCI: designware: Add generic
dw_pcie_wait_for_link()"), replaced the loop in
imx6_pcie_wait_for_link() with a call to a new dwc core function, which
polled imx6_pcie_link_up(), which still checked both link up and not
training in a loop.

When these two commits were merged, the version of
imx6_pcie_wait_for_link() from 886bc5ceb5cc was kept, which eliminated
the link training check placed there by 4d107d3b5a68. However, the
version of imx6_pcie_link_up() from 4d107d3b5a68 was kept, which
eliminated the link training check that had been there and was moved to
imx6_pcie_wait_for_link().

The result was the link training check got lost for the imx6 driver.

Eliminate imx6_pcie_link_up() so that the default handler,
dw_pcie_link_up(), is used instead. The default handler has the correct
code, which checks for link up and also that it still is not training,
fixing the regression.

Fixes: 562df5c8521e ("Merge branch 'pci/host-designware' into next")
Signed-off-by: Trent Piepho <tpiepho@impinj.com>
[lorenzo.pieralisi@arm.com: rewrote the commit log]
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Reviewed-by: Lucas Stach <l.stach@pengutronix.de>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Joao Pinto <Joao.Pinto@synopsys.com>
Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: Richard Zhu <hongxing.zhu@nxp.com>
---
 drivers/pci/controller/dwc/pci-imx6.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c
index 2cbef2d7c207a..88af6bff945f3 100644
--- a/drivers/pci/controller/dwc/pci-imx6.c
+++ b/drivers/pci/controller/dwc/pci-imx6.c
@@ -81,8 +81,6 @@ struct imx6_pcie {
 #define PCIE_PL_PFLR_FORCE_LINK			(1 << 15)
 #define PCIE_PHY_DEBUG_R0 (PL_OFFSET + 0x28)
 #define PCIE_PHY_DEBUG_R1 (PL_OFFSET + 0x2c)
-#define PCIE_PHY_DEBUG_R1_XMLH_LINK_IN_TRAINING	(1 << 29)
-#define PCIE_PHY_DEBUG_R1_XMLH_LINK_UP		(1 << 4)
 
 #define PCIE_PHY_CTRL (PL_OFFSET + 0x114)
 #define PCIE_PHY_CTRL_DATA_LOC 0
@@ -711,12 +709,6 @@ static int imx6_pcie_host_init(struct pcie_port *pp)
 	return 0;
 }
 
-static int imx6_pcie_link_up(struct dw_pcie *pci)
-{
-	return dw_pcie_readl_dbi(pci, PCIE_PHY_DEBUG_R1) &
-			PCIE_PHY_DEBUG_R1_XMLH_LINK_UP;
-}
-
 static const struct dw_pcie_host_ops imx6_pcie_host_ops = {
 	.host_init = imx6_pcie_host_init,
 };
@@ -749,7 +741,7 @@ static int imx6_add_pcie_port(struct imx6_pcie *imx6_pcie,
 }
 
 static const struct dw_pcie_ops dw_pcie_ops = {
-	.link_up = imx6_pcie_link_up,
+	/* No special ops needed, but pcie-designware still expects this struct */
 };
 
 #ifdef CONFIG_PM_SLEEP
-- 
GitLab


From c6fd6fe9dea44732cdcd970f1130b8cc50ad685a Mon Sep 17 00:00:00 2001
From: Hou Zhiqiang <Zhiqiang.Hou@nxp.com>
Date: Wed, 7 Nov 2018 05:16:49 +0000
Subject: [PATCH 1252/1890] PCI: layerscape: Fix wrong invocation of outbound
 window disable accessor

The order of parameters is not correct when invoking the outbound
window disable routine. Fix it.

Fixes: 4a2745d760fa ("PCI: layerscape: Disable outbound windows configured by bootloader")
Signed-off-by: Hou Zhiqiang <Zhiqiang.Hou@nxp.com>
[lorenzo.pieralisi@arm.com: commit log]
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: stable@vger.kernel.org
---
 drivers/pci/controller/dwc/pci-layerscape.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pci/controller/dwc/pci-layerscape.c b/drivers/pci/controller/dwc/pci-layerscape.c
index 3724d3ef7008e..7aa9a82b7ebd6 100644
--- a/drivers/pci/controller/dwc/pci-layerscape.c
+++ b/drivers/pci/controller/dwc/pci-layerscape.c
@@ -88,7 +88,7 @@ static void ls_pcie_disable_outbound_atus(struct ls_pcie *pcie)
 	int i;
 
 	for (i = 0; i < PCIE_IATU_NUM; i++)
-		dw_pcie_disable_atu(pcie->pci, DW_PCIE_REGION_OUTBOUND, i);
+		dw_pcie_disable_atu(pcie->pci, i, DW_PCIE_REGION_OUTBOUND);
 }
 
 static int ls1021_pcie_link_up(struct dw_pcie *pci)
-- 
GitLab


From 63529eaa6164ef7ab4b907b25ac3648177e5e78f Mon Sep 17 00:00:00 2001
From: Maarten Jacobs <maarten256@outlook.com>
Date: Mon, 19 Nov 2018 23:18:49 +0000
Subject: [PATCH 1253/1890] usb: cdc-acm: add entry for Hiro (Conexant) modem

The cdc-acm kernel module currently does not support the Hiro (Conexant)
H05228 USB modem. The patch below adds the device specific information:
	idVendor	0x0572
	idProduct	0x1349

Signed-off-by: Maarten Jacobs <maarten256@outlook.com>
Acked-by: Oliver Neukum <oneukum@suse.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/class/cdc-acm.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 47d75c20c211c..1b68fed464cb9 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -1696,6 +1696,9 @@ static const struct usb_device_id acm_ids[] = {
 	{ USB_DEVICE(0x0572, 0x1328), /* Shiro / Aztech USB MODEM UM-3100 */
 	.driver_info = NO_UNION_NORMAL, /* has no union descriptor */
 	},
+	{ USB_DEVICE(0x0572, 0x1349), /* Hiro (Conexant) USB MODEM H50228 */
+	.driver_info = NO_UNION_NORMAL, /* has no union descriptor */
+	},
 	{ USB_DEVICE(0x20df, 0x0001), /* Simtec Electronics Entropy Key */
 	.driver_info = QUIRK_CONTROL_LINE_STATE, },
 	{ USB_DEVICE(0x2184, 0x001c) },	/* GW Instek AFG-2225 */
-- 
GitLab


From 21f70d4abf9e17c2e3d7e64b7bfa3424e017f176 Mon Sep 17 00:00:00 2001
From: Olof Johansson <olof@lixom.net>
Date: Wed, 14 Nov 2018 16:27:55 -0800
Subject: [PATCH 1254/1890] RISC-V: Fix raw_copy_{to,from}_user()

Sparse highlighted it, and appears to be a pure bug (from vs to).

./arch/riscv/include/asm/uaccess.h:403:35: warning: incorrect type in argument 1 (different address spaces)
./arch/riscv/include/asm/uaccess.h:403:39: warning: incorrect type in argument 2 (different address spaces)
./arch/riscv/include/asm/uaccess.h:409:37: warning: incorrect type in argument 1 (different address spaces)
./arch/riscv/include/asm/uaccess.h:409:41: warning: incorrect type in argument 2 (different address spaces)

Signed-off-by: Olof Johansson <olof@lixom.net>
Cc: stable@vger.kernel.org
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 arch/riscv/include/asm/uaccess.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h
index 473cfc84e412f..8c3e3e3c8be12 100644
--- a/arch/riscv/include/asm/uaccess.h
+++ b/arch/riscv/include/asm/uaccess.h
@@ -400,13 +400,13 @@ extern unsigned long __must_check __asm_copy_from_user(void *to,
 static inline unsigned long
 raw_copy_from_user(void *to, const void __user *from, unsigned long n)
 {
-	return __asm_copy_to_user(to, from, n);
+	return __asm_copy_from_user(to, from, n);
 }
 
 static inline unsigned long
 raw_copy_to_user(void __user *to, const void *from, unsigned long n)
 {
-	return __asm_copy_from_user(to, from, n);
+	return __asm_copy_to_user(to, from, n);
 }
 
 extern long strncpy_from_user(char *dest, const char __user *src, long count);
-- 
GitLab


From c0fbcd9918607e85c9598bfa3dd0a84ed77ea210 Mon Sep 17 00:00:00 2001
From: Anup Patel <anup@brainfault.org>
Date: Mon, 12 Nov 2018 11:25:15 +0530
Subject: [PATCH 1255/1890] RISC-V: Build flat and compressed kernel images

This patch extends Linux RISC-V build system to build and install:
Image - Flat uncompressed kernel image
Image.gz - Flat and GZip compressed kernel image

Quiet a few bootloaders (such as Uboot, UEFI, etc) are capable of
booting flat and compressed kernel images. In case of Uboot, booting
Image or Image.gz is achieved using bootm command.

The flat and uncompressed kernel image (i.e. Image) is very useful
in pre-silicon developent and testing because we can create back-door
HEX files for RAM on FPGAs from Image.

Signed-off-by: Anup Patel <anup@brainfault.org>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 arch/riscv/Makefile             | 15 ++++++++-
 arch/riscv/boot/.gitignore      |  2 ++
 arch/riscv/boot/Makefile        | 33 ++++++++++++++++++
 arch/riscv/boot/install.sh      | 60 +++++++++++++++++++++++++++++++++
 arch/riscv/kernel/head.S        | 10 ++++++
 arch/riscv/kernel/vmlinux.lds.S |  2 +-
 6 files changed, 120 insertions(+), 2 deletions(-)
 create mode 100644 arch/riscv/boot/.gitignore
 create mode 100644 arch/riscv/boot/Makefile
 create mode 100644 arch/riscv/boot/install.sh

diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 4af153a182b07..4b594f2e4f7eb 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -71,6 +71,10 @@ KBUILD_CFLAGS += $(call cc-option,-mstrict-align)
 # arch specific predefines for sparse
 CHECKFLAGS += -D__riscv -D__riscv_xlen=$(BITS)
 
+# Default target when executing plain make
+boot		:= arch/riscv/boot
+KBUILD_IMAGE	:= $(boot)/Image.gz
+
 head-y := arch/riscv/kernel/head.o
 
 core-y += arch/riscv/kernel/ arch/riscv/mm/
@@ -81,4 +85,13 @@ PHONY += vdso_install
 vdso_install:
 	$(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso $@
 
-all: vmlinux
+all: Image.gz
+
+Image: vmlinux
+	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+
+Image.%: Image
+	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+
+zinstall install:
+	$(Q)$(MAKE) $(build)=$(boot) $@
diff --git a/arch/riscv/boot/.gitignore b/arch/riscv/boot/.gitignore
new file mode 100644
index 0000000000000..8dab0bb6ae667
--- /dev/null
+++ b/arch/riscv/boot/.gitignore
@@ -0,0 +1,2 @@
+Image
+Image.gz
diff --git a/arch/riscv/boot/Makefile b/arch/riscv/boot/Makefile
new file mode 100644
index 0000000000000..0990a9fdbe5d6
--- /dev/null
+++ b/arch/riscv/boot/Makefile
@@ -0,0 +1,33 @@
+#
+# arch/riscv/boot/Makefile
+#
+# This file is included by the global makefile so that you can add your own
+# architecture-specific flags and dependencies.
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 2018, Anup Patel.
+# Author: Anup Patel <anup@brainfault.org>
+#
+# Based on the ia64 and arm64 boot/Makefile.
+#
+
+OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S
+
+targets := Image
+
+$(obj)/Image: vmlinux FORCE
+	$(call if_changed,objcopy)
+
+$(obj)/Image.gz: $(obj)/Image FORCE
+	$(call if_changed,gzip)
+
+install:
+	$(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \
+	$(obj)/Image System.map "$(INSTALL_PATH)"
+
+zinstall:
+	$(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \
+	$(obj)/Image.gz System.map "$(INSTALL_PATH)"
diff --git a/arch/riscv/boot/install.sh b/arch/riscv/boot/install.sh
new file mode 100644
index 0000000000000..18c39159c0ff4
--- /dev/null
+++ b/arch/riscv/boot/install.sh
@@ -0,0 +1,60 @@
+#!/bin/sh
+#
+# arch/riscv/boot/install.sh
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1995 by Linus Torvalds
+#
+# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin
+# Adapted from code in arch/i386/boot/install.sh by Russell King
+#
+# "make install" script for the RISC-V Linux port
+#
+# Arguments:
+#   $1 - kernel version
+#   $2 - kernel image file
+#   $3 - kernel map file
+#   $4 - default install path (blank if root directory)
+#
+
+verify () {
+	if [ ! -f "$1" ]; then
+		echo ""                                                   1>&2
+		echo " *** Missing file: $1"                              1>&2
+		echo ' *** You need to run "make" before "make install".' 1>&2
+		echo ""                                                   1>&2
+		exit 1
+	fi
+}
+
+# Make sure the files actually exist
+verify "$2"
+verify "$3"
+
+# User may have a custom install script
+if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
+if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
+
+if [ "$(basename $2)" = "Image.gz" ]; then
+# Compressed install
+  echo "Installing compressed kernel"
+  base=vmlinuz
+else
+# Normal install
+  echo "Installing normal kernel"
+  base=vmlinux
+fi
+
+if [ -f $4/$base-$1 ]; then
+  mv $4/$base-$1 $4/$base-$1.old
+fi
+cat $2 > $4/$base-$1
+
+# Install system map file
+if [ -f $4/System.map-$1 ]; then
+  mv $4/System.map-$1 $4/System.map-$1.old
+fi
+cp $3 $4/System.map-$1
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index 711190d473d41..fe884cd69abd8 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -44,6 +44,16 @@ ENTRY(_start)
 	amoadd.w a3, a2, (a3)
 	bnez a3, .Lsecondary_start
 
+	/* Clear BSS for flat non-ELF images */
+	la a3, __bss_start
+	la a4, __bss_stop
+	ble a4, a3, clear_bss_done
+clear_bss:
+	REG_S zero, (a3)
+	add a3, a3, RISCV_SZPTR
+	blt a3, a4, clear_bss
+clear_bss_done:
+
 	/* Save hart ID and DTB physical address */
 	mv s0, a0
 	mv s1, a1
diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S
index ece84991609ca..65df1dfdc3038 100644
--- a/arch/riscv/kernel/vmlinux.lds.S
+++ b/arch/riscv/kernel/vmlinux.lds.S
@@ -74,7 +74,7 @@ SECTIONS
 		*(.sbss*)
 	}
 
-	BSS_SECTION(0, 0, 0)
+	BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 0)
 
 	EXCEPTION_TABLE(0x10)
 	NOTES
-- 
GitLab


From 0138ebb90c633f76bc71617f8f23635ce41c84fd Mon Sep 17 00:00:00 2001
From: David Abdurachmanov <david.abdurachmanov@gmail.com>
Date: Thu, 8 Nov 2018 20:07:00 +0100
Subject: [PATCH 1256/1890] riscv: fix warning in
 arch/riscv/include/asm/module.h

Fixes warning: 'struct module' declared inside parameter list will not be
visible outside of this definition or declaration

Signed-off-by: David Abdurachmanov <david.abdurachmanov@gmail.com>
Acked-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 arch/riscv/include/asm/module.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/riscv/include/asm/module.h b/arch/riscv/include/asm/module.h
index 349df33808c42..cd2af4b013e38 100644
--- a/arch/riscv/include/asm/module.h
+++ b/arch/riscv/include/asm/module.h
@@ -8,6 +8,7 @@
 
 #define MODULE_ARCH_VERMAGIC    "riscv"
 
+struct module;
 u64 module_emit_got_entry(struct module *mod, u64 val);
 u64 module_emit_plt_entry(struct module *mod, u64 val);
 
-- 
GitLab


From 27f8899d6002e11a6e2d995e29b8deab5aa9cc25 Mon Sep 17 00:00:00 2001
From: David Abdurachmanov <david.abdurachmanov@gmail.com>
Date: Thu, 8 Nov 2018 20:02:39 +0100
Subject: [PATCH 1257/1890] riscv: add asm/unistd.h UAPI header

Marcin Juszkiewicz reported issues while generating syscall table for riscv
using 4.20-rc1. The patch refactors our unistd.h files to match some other
architectures.

- Add asm/unistd.h UAPI header, which has __ARCH_WANT_NEW_STAT only for 64-bit
- Remove asm/syscalls.h UAPI header and merge to asm/unistd.h
- Adjust kernel asm/unistd.h

So now asm/unistd.h UAPI header should show all syscalls for riscv.

Before this, Makefile simply put `#include <asm-generic/unistd.h>` into
generated asm/unistd.h UAPI header thus user didn't see:

- __NR_riscv_flush_icache
- __NR_newfstatat
- __NR_fstat

which are supported by riscv kernel.

Signed-off-by: David Abdurachmanov <david.abdurachmanov@gmail.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Marcin Juszkiewicz <marcin.juszkiewicz@linaro.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Fixes: 67314ec7b025 ("RISC-V: Request newstat syscalls")
Signed-off-by: David Abdurachmanov <david.abdurachmanov@gmail.com>
Acked-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 arch/riscv/include/asm/unistd.h               |  5 ++--
 .../include/uapi/asm/{syscalls.h => unistd.h} | 26 ++++++++++++++-----
 2 files changed, 21 insertions(+), 10 deletions(-)
 rename arch/riscv/include/uapi/asm/{syscalls.h => unistd.h} (53%)

diff --git a/arch/riscv/include/asm/unistd.h b/arch/riscv/include/asm/unistd.h
index eff7aa9aa1637..fef96f117b4de 100644
--- a/arch/riscv/include/asm/unistd.h
+++ b/arch/riscv/include/asm/unistd.h
@@ -13,10 +13,9 @@
 
 /*
  * There is explicitly no include guard here because this file is expected to
- * be included multiple times.  See uapi/asm/syscalls.h for more info.
+ * be included multiple times.
  */
 
-#define __ARCH_WANT_NEW_STAT
 #define __ARCH_WANT_SYS_CLONE
+
 #include <uapi/asm/unistd.h>
-#include <uapi/asm/syscalls.h>
diff --git a/arch/riscv/include/uapi/asm/syscalls.h b/arch/riscv/include/uapi/asm/unistd.h
similarity index 53%
rename from arch/riscv/include/uapi/asm/syscalls.h
rename to arch/riscv/include/uapi/asm/unistd.h
index 206dc4b0f6ea8..1f3bd3ebbb0d2 100644
--- a/arch/riscv/include/uapi/asm/syscalls.h
+++ b/arch/riscv/include/uapi/asm/unistd.h
@@ -1,13 +1,25 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
- * Copyright (C) 2017-2018 SiFive
+ * Copyright (C) 2018 David Abdurachmanov <david.abdurachmanov@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-/*
- * There is explicitly no include guard here because this file is expected to
- * be included multiple times in order to define the syscall macros via
- * __SYSCALL.
- */
+#ifdef __LP64__
+#define __ARCH_WANT_NEW_STAT
+#endif /* __LP64__ */
+
+#include <asm-generic/unistd.h>
 
 /*
  * Allows the instruction cache to be flushed from userspace.  Despite RISC-V
-- 
GitLab


From 5d8f81ba1da55210123b9595e87b913c79579d02 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Patrick=20St=C3=A4hlin?= <me@packi.ch>
Date: Fri, 9 Nov 2018 22:42:16 +0100
Subject: [PATCH 1258/1890] RISC-V: recognize S/U mode bits in print_isa
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Removes the warning about an unsupported ISA when reading /proc/cpuinfo
on QEMU. The "S" extension is not being returned as it is not accessible
from userspace.

Signed-off-by: Patrick StÃ¤hlin <me@packi.ch>
Signed-off-by: Palmer Dabbelt <palmer@sifive.com>
---
 arch/riscv/kernel/cpu.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
index 3a5a2ee31547b..b4a7d4427fbb9 100644
--- a/arch/riscv/kernel/cpu.c
+++ b/arch/riscv/kernel/cpu.c
@@ -64,7 +64,7 @@ int riscv_of_processor_hartid(struct device_node *node)
 
 static void print_isa(struct seq_file *f, const char *orig_isa)
 {
-	static const char *ext = "mafdc";
+	static const char *ext = "mafdcsu";
 	const char *isa = orig_isa;
 	const char *e;
 
@@ -88,11 +88,14 @@ static void print_isa(struct seq_file *f, const char *orig_isa)
 	/*
 	 * Check the rest of the ISA string for valid extensions, printing those
 	 * we find.  RISC-V ISA strings define an order, so we only print the
-	 * extension bits when they're in order.
+	 * extension bits when they're in order. Hide the supervisor (S)
+	 * extension from userspace as it's not accessible from there.
 	 */
 	for (e = ext; *e != '\0'; ++e) {
 		if (isa[0] == e[0]) {
-			seq_write(f, isa, 1);
+			if (isa[0] != 's')
+				seq_write(f, isa, 1);
+
 			isa++;
 		}
 	}
-- 
GitLab


From e8828ec1c003727fc001eab06aa19bd2ca9b677e Mon Sep 17 00:00:00 2001
From: "Tudor.Ambarus@microchip.com" <Tudor.Ambarus@microchip.com>
Date: Fri, 16 Nov 2018 17:46:37 +0000
Subject: [PATCH 1259/1890] mtd: spi-nor: fix selection of uniform erase type
 in flexible conf

There are uniform, non-uniform and flexible erase flash configurations.

The non-uniform erase types, are the erase types that can _not_ erase
the entire flash by their own.

As the code was, in case flashes had flexible erase capabilities
(support both uniform and non-uniform erase types in the same flash
configuration) and supported multiple uniform erase type sizes, the
code did not sort the uniform erase types, and could select a wrong
erase type size.

Sort the uniform erase mask in case of flexible erase flash
configurations, in order to select the best uniform erase type size.

Uniform, non-uniform, and flexible configurations with just a valid
uniform erase type, are not affected by this change.

Uniform erase tested on mx25l3273fm2i-08g and sst26vf064B-104i/sn.
Non uniform erase tested on sst26vf064B-104i/sn.

Fixes: 5390a8df769e ("mtd: spi-nor: add support to non-uniform SFDP SPI NOR flash memories")
Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
---
 drivers/mtd/spi-nor/spi-nor.c | 47 +++++++++++++++++++++++++++--------
 1 file changed, 36 insertions(+), 11 deletions(-)

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index eb7bb596416b0..93c9bc8931fcd 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -2521,6 +2521,34 @@ static int spi_nor_map_cmp_erase_type(const void *l, const void *r)
 	return left->size - right->size;
 }
 
+/**
+ * spi_nor_sort_erase_mask() - sort erase mask
+ * @map:	the erase map of the SPI NOR
+ * @erase_mask:	the erase type mask to be sorted
+ *
+ * Replicate the sort done for the map's erase types in BFPT: sort the erase
+ * mask in ascending order with the smallest erase type size starting from
+ * BIT(0) in the sorted erase mask.
+ *
+ * Return: sorted erase mask.
+ */
+static u8 spi_nor_sort_erase_mask(struct spi_nor_erase_map *map, u8 erase_mask)
+{
+	struct spi_nor_erase_type *erase_type = map->erase_type;
+	int i;
+	u8 sorted_erase_mask = 0;
+
+	if (!erase_mask)
+		return 0;
+
+	/* Replicate the sort done for the map's erase types. */
+	for (i = 0; i < SNOR_ERASE_TYPE_MAX; i++)
+		if (erase_type[i].size && erase_mask & BIT(erase_type[i].idx))
+			sorted_erase_mask |= BIT(i);
+
+	return sorted_erase_mask;
+}
+
 /**
  * spi_nor_regions_sort_erase_types() - sort erase types in each region
  * @map:	the erase map of the SPI NOR
@@ -2536,19 +2564,13 @@ static int spi_nor_map_cmp_erase_type(const void *l, const void *r)
 static void spi_nor_regions_sort_erase_types(struct spi_nor_erase_map *map)
 {
 	struct spi_nor_erase_region *region = map->regions;
-	struct spi_nor_erase_type *erase_type = map->erase_type;
-	int i;
 	u8 region_erase_mask, sorted_erase_mask;
 
 	while (region) {
 		region_erase_mask = region->offset & SNOR_ERASE_TYPE_MASK;
 
-		/* Replicate the sort done for the map's erase types. */
-		sorted_erase_mask = 0;
-		for (i = 0; i < SNOR_ERASE_TYPE_MAX; i++)
-			if (erase_type[i].size &&
-			    region_erase_mask & BIT(erase_type[i].idx))
-				sorted_erase_mask |= BIT(i);
+		sorted_erase_mask = spi_nor_sort_erase_mask(map,
+							    region_erase_mask);
 
 		/* Overwrite erase mask. */
 		region->offset = (region->offset & ~SNOR_ERASE_TYPE_MASK) |
@@ -2978,7 +3000,7 @@ static int spi_nor_init_non_uniform_erase_map(struct spi_nor *nor,
 	u64 offset;
 	u32 region_count;
 	int i, j;
-	u8 erase_type;
+	u8 erase_type, uniform_erase_type;
 
 	region_count = SMPT_MAP_REGION_COUNT(*smpt);
 	/*
@@ -2991,7 +3013,7 @@ static int spi_nor_init_non_uniform_erase_map(struct spi_nor *nor,
 		return -ENOMEM;
 	map->regions = region;
 
-	map->uniform_erase_type = 0xff;
+	uniform_erase_type = 0xff;
 	offset = 0;
 	/* Populate regions. */
 	for (i = 0; i < region_count; i++) {
@@ -3006,12 +3028,15 @@ static int spi_nor_init_non_uniform_erase_map(struct spi_nor *nor,
 		 * Save the erase types that are supported in all regions and
 		 * can erase the entire flash memory.
 		 */
-		map->uniform_erase_type &= erase_type;
+		uniform_erase_type &= erase_type;
 
 		offset = (region[i].offset & ~SNOR_ERASE_FLAGS_MASK) +
 			 region[i].size;
 	}
 
+	map->uniform_erase_type = spi_nor_sort_erase_mask(map,
+							  uniform_erase_type);
+
 	spi_nor_region_mark_end(&region[i - 1]);
 
 	return 0;
-- 
GitLab


From 68239654acafe6aad5a3c1dc7237e60accfebc03 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue, 20 Nov 2018 11:26:35 +0100
Subject: [PATCH 1260/1890] x86/fpu: Disable bottom halves while loading FPU
 registers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The sequence

  fpu->initialized = 1;		/* step A */
  preempt_disable();		/* step B */
  fpu__restore(fpu);
  preempt_enable();

in __fpu__restore_sig() is racy in regard to a context switch.

For 32bit frames, __fpu__restore_sig() prepares the FPU state within
fpu->state. To ensure that a context switch (switch_fpu_prepare() in
particular) does not modify fpu->state it uses fpu__drop() which sets
fpu->initialized to 0.

After fpu->initialized is cleared, the CPU's FPU state is not saved
to fpu->state during a context switch. The new state is loaded via
fpu__restore(). It gets loaded into fpu->state from userland and
ensured it is sane. fpu->initialized is then set to 1 in order to avoid
fpu__initialize() doing anything (overwrite the new state) which is part
of fpu__restore().

A context switch between step A and B above would save CPU's current FPU
registers to fpu->state and overwrite the newly prepared state. This
looks like a tiny race window but the Kernel Test Robot reported this
back in 2016 while we had lazy FPU support. Borislav Petkov made the
link between that report and another patch that has been posted. Since
the removal of the lazy FPU support, this race goes unnoticed because
the warning has been removed.

Disable bottom halves around the restore sequence to avoid the race. BH
need to be disabled because BH is allowed to run (even with preemption
disabled) and might invoke kernel_fpu_begin() by doing IPsec.

 [ bp: massage commit message a bit. ]

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: "Jason A. Donenfeld" <Jason@zx2c4.com>
Cc: kvm ML <kvm@vger.kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim KrÄmÃ¡Å™ <rkrcmar@redhat.com>
Cc: Rik van Riel <riel@surriel.com>
Cc: stable@vger.kernel.org
Cc: x86-ml <x86@kernel.org>
Link: http://lkml.kernel.org/r/20181120102635.ddv3fvavxajjlfqk@linutronix.de
Link: https://lkml.kernel.org/r/20160226074940.GA28911@pd.tnic
---
 arch/x86/kernel/fpu/signal.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 61a949d84dfa5..d99a8ee9e185e 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -344,10 +344,10 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 			sanitize_restored_xstate(tsk, &env, xfeatures, fx_only);
 		}
 
+		local_bh_disable();
 		fpu->initialized = 1;
-		preempt_disable();
 		fpu__restore(fpu);
-		preempt_enable();
+		local_bh_enable();
 
 		return err;
 	} else {
-- 
GitLab


From 8c4e7c2ee8096b5ca8214418f287b3878d578cc0 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Sun, 18 Nov 2018 16:38:09 -0600
Subject: [PATCH 1261/1890] ASoC: Intel: Skylake: fix Kconfigs, make HDaudio
 codec optional

The Skylake driver currently has a set of problems supporting
load/unload modules. We need to make the HDaudio codec support
optional to help narrow down the issues.

Support for HDaudio codecs also leads to a Kconfig issue. We want the
hdac_hda codec to be compilable independently of Skylake (e.g. with
ALL_CODECS) but when Skylake is selected as built-in the hdac_hda
codec needs to use the same option due a a code dependency

Solve both problems by adding a user-selectable boolean Kconfig,
select HDAC_HDA as needed and make the HDaudio codec support in the
Skylake driver optional. Tests on a Chell Chromebook device without
HDaudio show no regression for speaker and HDMI playback.

This is submitted as an RFC to allow for comments and more validation.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/intel/Kconfig        | 26 +++++++++++++++++++++++---
 sound/soc/intel/boards/Kconfig | 24 ++++++++++++++----------
 sound/soc/intel/skylake/skl.c  | 20 ++++++++++++++++++--
 3 files changed, 55 insertions(+), 15 deletions(-)

diff --git a/sound/soc/intel/Kconfig b/sound/soc/intel/Kconfig
index 0caa1f4eb94d7..18e7177036855 100644
--- a/sound/soc/intel/Kconfig
+++ b/sound/soc/intel/Kconfig
@@ -101,22 +101,42 @@ config SND_SST_ATOM_HIFI2_PLATFORM_ACPI
 	  codec, then enable this option by saying Y or m. This is a
 	  recommended option
 
-config SND_SOC_INTEL_SKYLAKE_SSP_CLK
-	tristate
-
 config SND_SOC_INTEL_SKYLAKE
 	tristate "SKL/BXT/KBL/GLK/CNL... Platforms"
 	depends on PCI && ACPI
+	select SND_SOC_INTEL_SKYLAKE_COMMON
+	help
+	  If you have a Intel Skylake/Broxton/ApolloLake/KabyLake/
+	  GeminiLake or CannonLake platform with the DSP enabled in the BIOS
+	  then enable this option by saying Y or m.
+
+if  SND_SOC_INTEL_SKYLAKE
+
+config SND_SOC_INTEL_SKYLAKE_SSP_CLK
+	tristate
+
+config SND_SOC_INTEL_SKYLAKE_HDAUDIO_CODEC
+	bool "HDAudio codec support"
+	help
+	  If you have a Intel Skylake/Broxton/ApolloLake/KabyLake/
+	  GeminiLake or CannonLake platform with an HDaudio codec
+	  then enable this option by saying Y
+
+config SND_SOC_INTEL_SKYLAKE_COMMON
+	tristate
 	select SND_HDA_EXT_CORE
 	select SND_HDA_DSP_LOADER
 	select SND_SOC_TOPOLOGY
 	select SND_SOC_INTEL_SST
+	select SND_SOC_HDAC_HDA if SND_SOC_INTEL_SKYLAKE_HDAUDIO_CODEC
 	select SND_SOC_ACPI_INTEL_MATCH
 	help
 	  If you have a Intel Skylake/Broxton/ApolloLake/KabyLake/
 	  GeminiLake or CannonLake platform with the DSP enabled in the BIOS
 	  then enable this option by saying Y or m.
 
+endif ## SND_SOC_INTEL_SKYLAKE
+
 config SND_SOC_ACPI_INTEL_MATCH
 	tristate
 	select SND_SOC_ACPI if ACPI
diff --git a/sound/soc/intel/boards/Kconfig b/sound/soc/intel/boards/Kconfig
index 73ca1350aa312..b177db2a0dbb2 100644
--- a/sound/soc/intel/boards/Kconfig
+++ b/sound/soc/intel/boards/Kconfig
@@ -293,16 +293,6 @@ config SND_SOC_INTEL_KBL_DA7219_MAX98927_MACH
 	  Say Y if you have such a device.
 	  If unsure select "N".
 
-config SND_SOC_INTEL_SKL_HDA_DSP_GENERIC_MACH
-	tristate "SKL/KBL/BXT/APL with HDA Codecs"
-	select SND_SOC_HDAC_HDMI
-	select SND_SOC_HDAC_HDA
-	help
-	  This adds support for ASoC machine driver for Intel platforms
-	  SKL/KBL/BXT/APL with iDisp, HDA audio codecs.
-          Say Y or m if you have such a device. This is a recommended option.
-	  If unsure select "N".
-
 config SND_SOC_INTEL_GLK_RT5682_MAX98357A_MACH
 	tristate "GLK with RT5682 and MAX98357A in I2S Mode"
 	depends on MFD_INTEL_LPSS && I2C && ACPI
@@ -319,4 +309,18 @@ config SND_SOC_INTEL_GLK_RT5682_MAX98357A_MACH
 
 endif ## SND_SOC_INTEL_SKYLAKE
 
+if SND_SOC_INTEL_SKYLAKE_HDAUDIO_CODEC
+
+config SND_SOC_INTEL_SKL_HDA_DSP_GENERIC_MACH
+	tristate "SKL/KBL/BXT/APL with HDA Codecs"
+	select SND_SOC_HDAC_HDMI
+	# SND_SOC_HDAC_HDA is already selected
+	help
+	  This adds support for ASoC machine driver for Intel platforms
+	  SKL/KBL/BXT/APL with iDisp, HDA audio codecs.
+          Say Y or m if you have such a device. This is a recommended option.
+	  If unsure select "N".
+
+endif ## SND_SOC_INTEL_SKYLAKE_HDAUDIO_CODEC
+
 endif ## SND_SOC_INTEL_MACH
diff --git a/sound/soc/intel/skylake/skl.c b/sound/soc/intel/skylake/skl.c
index 1586c97d94508..7487f388e65d7 100644
--- a/sound/soc/intel/skylake/skl.c
+++ b/sound/soc/intel/skylake/skl.c
@@ -37,7 +37,9 @@
 #include "skl.h"
 #include "skl-sst-dsp.h"
 #include "skl-sst-ipc.h"
+#if IS_ENABLED(CONFIG_SND_SOC_INTEL_SKYLAKE_HDAUDIO_CODEC)
 #include "../../../soc/codecs/hdac_hda.h"
+#endif
 
 /*
  * initialize the PCI registers
@@ -658,6 +660,8 @@ static void skl_clock_device_unregister(struct skl *skl)
 		platform_device_unregister(skl->clk_dev);
 }
 
+#if IS_ENABLED(CONFIG_SND_SOC_INTEL_SKYLAKE_HDAUDIO_CODEC)
+
 #define IDISP_INTEL_VENDOR_ID	0x80860000
 
 /*
@@ -676,6 +680,8 @@ static void load_codec_module(struct hda_codec *codec)
 #endif
 }
 
+#endif /* CONFIG_SND_SOC_INTEL_SKYLAKE_HDAUDIO_CODEC */
+
 /*
  * Probe the given codec address
  */
@@ -685,9 +691,11 @@ static int probe_codec(struct hdac_bus *bus, int addr)
 		(AC_VERB_PARAMETERS << 8) | AC_PAR_VENDOR_ID;
 	unsigned int res = -1;
 	struct skl *skl = bus_to_skl(bus);
+#if IS_ENABLED(CONFIG_SND_SOC_INTEL_SKYLAKE_HDAUDIO_CODEC)
 	struct hdac_hda_priv *hda_codec;
-	struct hdac_device *hdev;
 	int err;
+#endif
+	struct hdac_device *hdev;
 
 	mutex_lock(&bus->cmd_mutex);
 	snd_hdac_bus_send_cmd(bus, cmd);
@@ -697,6 +705,7 @@ static int probe_codec(struct hdac_bus *bus, int addr)
 		return -EIO;
 	dev_dbg(bus->dev, "codec #%d probed OK: %x\n", addr, res);
 
+#if IS_ENABLED(CONFIG_SND_SOC_INTEL_SKYLAKE_HDAUDIO_CODEC)
 	hda_codec = devm_kzalloc(&skl->pci->dev, sizeof(*hda_codec),
 				 GFP_KERNEL);
 	if (!hda_codec)
@@ -715,6 +724,13 @@ static int probe_codec(struct hdac_bus *bus, int addr)
 		load_codec_module(&hda_codec->codec);
 	}
 	return 0;
+#else
+	hdev = devm_kzalloc(&skl->pci->dev, sizeof(*hdev), GFP_KERNEL);
+	if (!hdev)
+		return -ENOMEM;
+
+	return snd_hdac_ext_bus_device_init(bus, addr, hdev);
+#endif /* CONFIG_SND_SOC_INTEL_SKYLAKE_HDAUDIO_CODEC */
 }
 
 /* Codec initialization */
@@ -870,7 +886,7 @@ static int skl_create(struct pci_dev *pci,
 	hbus = skl_to_hbus(skl);
 	bus = skl_to_bus(skl);
 
-#if IS_ENABLED(CONFIG_SND_SOC_HDAC_HDA)
+#if IS_ENABLED(CONFIG_SND_SOC_INTEL_SKYLAKE_HDAUDIO_CODEC)
 	ext_ops = snd_soc_hdac_hda_get_ops();
 #endif
 	snd_hdac_ext_bus_init(bus, &pci->dev, &bus_core_ops, io_ops, ext_ops);
-- 
GitLab


From a3e620f8422832afd832ad5e20aa97d0c72bada8 Mon Sep 17 00:00:00 2001
From: Keyon Jie <yang.jie@linux.intel.com>
Date: Fri, 16 Nov 2018 18:47:04 -0600
Subject: [PATCH 1262/1890] ASoC: acpi: fix: continue searching when machine is
 ignored

The machine_quirk may return NULL which means the acpi entries should be
skipped and search for next matched entry is needed, here add return
check here and continue for NULL case.

Signed-off-by: Keyon Jie <yang.jie@linux.intel.com>
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/soc-acpi.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/sound/soc/soc-acpi.c b/sound/soc/soc-acpi.c
index b8e72b52db30e..4fb29f0e561ef 100644
--- a/sound/soc/soc-acpi.c
+++ b/sound/soc/soc-acpi.c
@@ -10,11 +10,17 @@ struct snd_soc_acpi_mach *
 snd_soc_acpi_find_machine(struct snd_soc_acpi_mach *machines)
 {
 	struct snd_soc_acpi_mach *mach;
+	struct snd_soc_acpi_mach *mach_alt;
 
 	for (mach = machines; mach->id[0]; mach++) {
 		if (acpi_dev_present(mach->id, NULL, -1)) {
-			if (mach->machine_quirk)
-				mach = mach->machine_quirk(mach);
+			if (mach->machine_quirk) {
+				mach_alt = mach->machine_quirk(mach);
+				if (!mach_alt)
+					continue; /* not full match, ignore */
+				mach = mach_alt;
+			}
+
 			return mach;
 		}
 	}
-- 
GitLab


From cb5d21946d2a2f4687c482ab4604af1d29dac35a Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Thu, 15 Nov 2018 15:03:24 -0800
Subject: [PATCH 1263/1890] MAINTAINERS: Add Sasha as a stable branch
 maintainer

Sasha has somehow been convinced into helping me with the stable kernel
maintenance.  Codify this slip in good judgement before he realizes what
he really signed up for :)

Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 0abecc528daca..a46c1aa82398e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -14078,6 +14078,7 @@ F:	Documentation/devicetree/bindings/iio/proximity/vl53l0x.txt
 
 STABLE BRANCH
 M:	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+M:	Sasha Levin <sashal@kernel.org>
 L:	stable@vger.kernel.org
 S:	Supported
 F:	Documentation/process/stable-kernel-rules.rst
-- 
GitLab


From 544b03da39e2d7b4961d3163976ed4bfb1fac509 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 19 Nov 2018 11:07:18 +0000
Subject: [PATCH 1264/1890] Documentation/security-bugs: Postpone fix
 publication in exceptional cases

At the request of the reporter, the Linux kernel security team offers to
postpone the publishing of a fix for up to 5 business days from the date
of a report.

While it is generally undesirable to keep a fix private after it has
been developed, this short window is intended to allow distributions to
package the fix into their kernel builds and permits early inclusion of
the security team in the case of a co-ordinated disclosure with other
parties. Unfortunately, discussions with major Linux distributions and
cloud providers has revealed that 5 business days is not sufficient to
achieve either of these two goals.

As an example, cloud providers need to roll out KVM security fixes to a
global fleet of hosts with sufficient early ramp-up and monitoring. An
end-to-end timeline of less than two weeks dramatically cuts into the
amount of early validation and increases the chance of guest-visible
regressions.

The consequence of this timeline mismatch is that security issues are
commonly fixed without the involvement of the Linux kernel security team
and are instead analysed and addressed by an ad-hoc group of developers
across companies contributing to Linux. In some cases, mainline (and
therefore the official stable kernels) can be left to languish for
extended periods of time. This undermines the Linux kernel security
process and puts upstream developers in a difficult position should they
find themselves involved with an undisclosed security problem that they
are unable to report due to restrictions from their employer.

To accommodate the needs of these users of the Linux kernel and
encourage them to engage with the Linux security team when security
issues are first uncovered, extend the maximum period for which fixes
may be delayed to 7 calendar days, or 14 calendar days in exceptional
cases, where the logistics of QA and large scale rollouts specifically
need to be accommodated. This brings parity with the linux-distros@
maximum embargo period of 14 calendar days.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Amit Shah <aams@amazon.com>
Cc: Laura Abbott <labbott@redhat.com>
Acked-by: Kees Cook <keescook@chromium.org>
Co-developed-by: Thomas Gleixner <tglx@linutronix.de>
Co-developed-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Reviewed-by: Tyler Hicks <tyhicks@canonical.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/admin-guide/security-bugs.rst | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/Documentation/admin-guide/security-bugs.rst b/Documentation/admin-guide/security-bugs.rst
index 164bf71149fdf..30187d49dc2c7 100644
--- a/Documentation/admin-guide/security-bugs.rst
+++ b/Documentation/admin-guide/security-bugs.rst
@@ -32,16 +32,17 @@ Disclosure and embargoed information
 The security list is not a disclosure channel.  For that, see Coordination
 below.
 
-Once a robust fix has been developed, our preference is to release the
-fix in a timely fashion, treating it no differently than any of the other
-thousands of changes and fixes the Linux kernel project releases every
-month.
-
-However, at the request of the reporter, we will postpone releasing the
-fix for up to 5 business days after the date of the report or after the
-embargo has lifted; whichever comes first.  The only exception to that
-rule is if the bug is publicly known, in which case the preference is to
-release the fix as soon as it's available.
+Once a robust fix has been developed, the release process starts.  Fixes
+for publicly known bugs are released immediately.
+
+Although our preference is to release fixes for publicly undisclosed bugs
+as soon as they become available, this may be postponed at the request of
+the reporter or an affected party for up to 7 calendar days from the start
+of the release process, with an exceptional extension to 14 calendar days
+if it is agreed that the criticality of the bug requires more time.  The
+only valid reason for deferring the publication of a fix is to accommodate
+the logistics of QA and large scale rollouts which require release
+coordination.
 
 Whilst embargoed information may be shared with trusted individuals in
 order to develop a fix, such information will not be published alongside
-- 
GitLab


From 4212368750cf94623f5b382293625dbb3d8d041a Mon Sep 17 00:00:00 2001
From: Jasmin Jessich <jasmin@anw.at>
Date: Fri, 9 Nov 2018 16:06:05 -0500
Subject: [PATCH 1265/1890] media: Use wait_queue_head_t for media_request

The portable type for a wait queue is wait_queue_head_t.

Signed-off-by: Jasmin Jessich <jasmin@anw.at>
Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 include/media/media-request.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/media/media-request.h b/include/media/media-request.h
index 0ce75c35131f1..bd36d74316984 100644
--- a/include/media/media-request.h
+++ b/include/media/media-request.h
@@ -68,7 +68,7 @@ struct media_request {
 	unsigned int access_count;
 	struct list_head objects;
 	unsigned int num_incomplete_objects;
-	struct wait_queue_head poll_wait;
+	wait_queue_head_t poll_wait;
 	spinlock_t lock;
 };
 
-- 
GitLab


From f44e361e37eb074df2e2ca204ef31c834ce44af8 Mon Sep 17 00:00:00 2001
From: Jernej Skrabec <jernej.skrabec@siol.net>
Date: Sun, 11 Nov 2018 06:06:21 -0500
Subject: [PATCH 1266/1890] media: media-request: Add compat ioctl

Currently media request ioctl operations fail on 64-bit kernel with
32-bit userspace due to missing .compat_ioctl callback.

Because no ioctl command uses any argument, just reuse existing ioctl
handler for compat_ioctl too.

Signed-off-by: Jernej Skrabec <jernej.skrabec@siol.net>
Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/media-request.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/media/media-request.c b/drivers/media/media-request.c
index 4e9db1fed6971..c71a34ae6383c 100644
--- a/drivers/media/media-request.c
+++ b/drivers/media/media-request.c
@@ -238,6 +238,9 @@ static const struct file_operations request_fops = {
 	.owner = THIS_MODULE,
 	.poll = media_request_poll,
 	.unlocked_ioctl = media_request_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = media_request_ioctl,
+#endif /* CONFIG_COMPAT */
 	.release = media_request_close,
 };
 
-- 
GitLab


From 0408b205f1ae60c1b99c9888ac0326543d96a091 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Thu, 15 Nov 2018 02:49:24 -0500
Subject: [PATCH 1267/1890] media: cedrus: add action item to the TODO

Mention that the request validation should increment the memory refcount
of reference buffers so we don't forget to do this.

Acked-by: Paul Kocialkowski <paul.kocialkowski@bootlin.com>
Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/staging/media/sunxi/cedrus/TODO | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/staging/media/sunxi/cedrus/TODO b/drivers/staging/media/sunxi/cedrus/TODO
index ec277ece47afd..a951b3fd1ea1d 100644
--- a/drivers/staging/media/sunxi/cedrus/TODO
+++ b/drivers/staging/media/sunxi/cedrus/TODO
@@ -5,3 +5,8 @@ Before this stateless decoder driver can leave the staging area:
 * Userspace support for the Request API needs to be reviewed;
 * Another stateless decoder driver should be submitted;
 * At least one stateless encoder driver should be submitted.
+* When queueing a request containing references to I frames, the
+  refcount of the memory for those I frames needs to be incremented
+  and decremented when the request is completed. This will likely
+  require some help from vb2. The driver should fail the request
+  if the memory/buffer is gone.
-- 
GitLab


From cb3b2ffb757e75fef40fb94bc093cbbf49a6bf6e Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Sat, 17 Nov 2018 06:25:08 -0500
Subject: [PATCH 1268/1890] media: vicodec: fix memchr() kernel oops

The size passed to memchr is too large as it assumes the search
starts at the start of the buffer, but it can start at an offset.

Cc: <stable@vger.kernel.org>      # for v4.19 and up
Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/platform/vicodec/vicodec-core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/media/platform/vicodec/vicodec-core.c b/drivers/media/platform/vicodec/vicodec-core.c
index b292cff26c866..013cdebecbc49 100644
--- a/drivers/media/platform/vicodec/vicodec-core.c
+++ b/drivers/media/platform/vicodec/vicodec-core.c
@@ -304,7 +304,8 @@ static int job_ready(void *priv)
 		for (; p < p_out + sz; p++) {
 			u32 copy;
 
-			p = memchr(p, magic[ctx->comp_magic_cnt], sz);
+			p = memchr(p, magic[ctx->comp_magic_cnt],
+				   p_out + sz - p);
 			if (!p) {
 				ctx->comp_magic_cnt = 0;
 				break;
-- 
GitLab


From 2a5bf23d5b795d5df33dc284e8f5cf8b6a5b4042 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 20 Nov 2018 18:08:42 +0100
Subject: [PATCH 1269/1890] perf/x86/intel: Fix regression by default disabling
 perfmon v4 interrupt handling

Kyle Huey reported that 'rr', a replay debugger, broke due to the following commit:

  af3bdb991a5c ("perf/x86/intel: Add a separate Arch Perfmon v4 PMI handler")

Rework the 'disable_counter_freezing' __setup() parameter such that we
can explicitly enable/disable it and switch to default disabled.

To this purpose, rename the parameter to "perf_v4_pmi=" which is a much
better description and allows requiring a bool argument.

[ mingo: Improved the changelog some more. ]

Reported-by: Kyle Huey <me@kylehuey.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Robert O'Callahan <robert@ocallahan.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: acme@kernel.org
Link: http://lkml.kernel.org/r/20181120170842.GZ2131@hirez.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 Documentation/admin-guide/kernel-parameters.txt |  3 ++-
 arch/x86/events/intel/core.c                    | 12 ++++++++----
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 81d1d5a747280..5463d5a4d85c1 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -856,7 +856,8 @@
 			causing system reset or hang due to sending
 			INIT from AP to BSP.
 
-	disable_counter_freezing [HW]
+	perf_v4_pmi=	[X86,INTEL]
+			Format: <bool>
 			Disable Intel PMU counter freezing feature.
 			The feature only exists starting from
 			Arch Perfmon v4 (Skylake and newer).
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 273c62e815463..af8bea9d4006a 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2306,14 +2306,18 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
 	return handled;
 }
 
-static bool disable_counter_freezing;
+static bool disable_counter_freezing = true;
 static int __init intel_perf_counter_freezing_setup(char *s)
 {
-	disable_counter_freezing = true;
-	pr_info("Intel PMU Counter freezing feature disabled\n");
+	bool res;
+
+	if (kstrtobool(s, &res))
+		return -EINVAL;
+
+	disable_counter_freezing = !res;
 	return 1;
 }
-__setup("disable_counter_freezing", intel_perf_counter_freezing_setup);
+__setup("perf_v4_pmi=", intel_perf_counter_freezing_setup);
 
 /*
  * Simplified handler for Arch Perfmon v4:
-- 
GitLab


From 0b9301fb632f7111a3293a30cc5b20f1b82ed08d Mon Sep 17 00:00:00 2001
From: Artem Savkov <asavkov@redhat.com>
Date: Tue, 20 Nov 2018 11:52:15 -0600
Subject: [PATCH 1270/1890] objtool: Fix double-free in .cold detection error
 path

If read_symbols() fails during second list traversal (the one dealing
with ".cold" subfunctions) it frees the symbol, but never deletes it
from the list/hash_table resulting in symbol being freed again in
elf_close(). Fix it by just returning an error, leaving cleanup to
elf_close().

Signed-off-by: Artem Savkov <asavkov@redhat.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 13810435b9a7 ("objtool: Support GCC 8's cold subfunctions")
Link: http://lkml.kernel.org/r/beac5a9b7da9e8be90223459dcbe07766ae437dd.1542736240.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/objtool/elf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 6dbb9fae0f9d4..e7a7ac40e0454 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -312,7 +312,7 @@ static int read_symbols(struct elf *elf)
 			if (!pfunc) {
 				WARN("%s(): can't find parent function",
 				     sym->name);
-				goto err;
+				return -1;
 			}
 
 			sym->pfunc = pfunc;
-- 
GitLab


From 22566c1603030f0a036ad564634b064ad1a55db2 Mon Sep 17 00:00:00 2001
From: Artem Savkov <asavkov@redhat.com>
Date: Tue, 20 Nov 2018 11:52:16 -0600
Subject: [PATCH 1271/1890] objtool: Fix segfault in .cold detection with
 -ffunction-sections

Because find_symbol_by_name() traverses the same lists as
read_symbols(), changing sym->name in place without copying it affects
the result of find_symbol_by_name().  In the case where a ".cold"
function precedes its parent in sec->symbol_list, it can result in a
function being considered a parent of itself. This leads to function
length being set to 0 and other consequent side-effects including a
segfault in add_switch_table().  The effects of this bug are only
visible when building with -ffunction-sections in KCFLAGS.

Fix by copying the search string instead of modifying it in place.

Signed-off-by: Artem Savkov <asavkov@redhat.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 13810435b9a7 ("objtool: Support GCC 8's cold subfunctions")
Link: http://lkml.kernel.org/r/910abd6b5a4945130fd44f787c24e07b9e07c8da.1542736240.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/objtool/elf.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index e7a7ac40e0454..b8f3cca8e58b4 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -31,6 +31,8 @@
 #include "elf.h"
 #include "warn.h"
 
+#define MAX_NAME_LEN 128
+
 struct section *find_section_by_name(struct elf *elf, const char *name)
 {
 	struct section *sec;
@@ -298,6 +300,8 @@ static int read_symbols(struct elf *elf)
 	/* Create parent/child links for any cold subfunctions */
 	list_for_each_entry(sec, &elf->sections, list) {
 		list_for_each_entry(sym, &sec->symbol_list, list) {
+			char pname[MAX_NAME_LEN + 1];
+			size_t pnamelen;
 			if (sym->type != STT_FUNC)
 				continue;
 			sym->pfunc = sym->cfunc = sym;
@@ -305,9 +309,16 @@ static int read_symbols(struct elf *elf)
 			if (!coldstr)
 				continue;
 
-			coldstr[0] = '\0';
-			pfunc = find_symbol_by_name(elf, sym->name);
-			coldstr[0] = '.';
+			pnamelen = coldstr - sym->name;
+			if (pnamelen > MAX_NAME_LEN) {
+				WARN("%s(): parent function name exceeds maximum length of %d characters",
+				     sym->name, MAX_NAME_LEN);
+				return -1;
+			}
+
+			strncpy(pname, sym->name, pnamelen);
+			pname[pnamelen] = '\0';
+			pfunc = find_symbol_by_name(elf, pname);
 
 			if (!pfunc) {
 				WARN("%s(): can't find parent function",
-- 
GitLab


From 59663e42199c93d1d7314d1446f6782fc4b1eb81 Mon Sep 17 00:00:00 2001
From: Siva Reddy Kallam <siva.kallam@broadcom.com>
Date: Tue, 20 Nov 2018 10:04:04 +0530
Subject: [PATCH 1272/1890] tg3: Add PHY reset for 5717/5719/5720 in change
 ring and flow control paths

This patch has the fix to avoid PHY lockup with 5717/5719/5720 in change
ring and flow control paths. This patch solves the RX hang while doing
continuous ring or flow control parameters with heavy traffic from peer.

Signed-off-by: Siva Reddy Kallam <siva.kallam@broadcom.com>
Acked-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/tg3.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 89295306f1615..432c3b8670848 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -12422,6 +12422,7 @@ static int tg3_set_ringparam(struct net_device *dev, struct ethtool_ringparam *e
 {
 	struct tg3 *tp = netdev_priv(dev);
 	int i, irq_sync = 0, err = 0;
+	bool reset_phy = false;
 
 	if ((ering->rx_pending > tp->rx_std_ring_mask) ||
 	    (ering->rx_jumbo_pending > tp->rx_jmb_ring_mask) ||
@@ -12453,7 +12454,13 @@ static int tg3_set_ringparam(struct net_device *dev, struct ethtool_ringparam *e
 
 	if (netif_running(dev)) {
 		tg3_halt(tp, RESET_KIND_SHUTDOWN, 1);
-		err = tg3_restart_hw(tp, false);
+		/* Reset PHY to avoid PHY lock up */
+		if (tg3_asic_rev(tp) == ASIC_REV_5717 ||
+		    tg3_asic_rev(tp) == ASIC_REV_5719 ||
+		    tg3_asic_rev(tp) == ASIC_REV_5720)
+			reset_phy = true;
+
+		err = tg3_restart_hw(tp, reset_phy);
 		if (!err)
 			tg3_netif_start(tp);
 	}
@@ -12487,6 +12494,7 @@ static int tg3_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam
 {
 	struct tg3 *tp = netdev_priv(dev);
 	int err = 0;
+	bool reset_phy = false;
 
 	if (tp->link_config.autoneg == AUTONEG_ENABLE)
 		tg3_warn_mgmt_link_flap(tp);
@@ -12556,7 +12564,13 @@ static int tg3_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam
 
 		if (netif_running(dev)) {
 			tg3_halt(tp, RESET_KIND_SHUTDOWN, 1);
-			err = tg3_restart_hw(tp, false);
+			/* Reset PHY to avoid PHY lock up */
+			if (tg3_asic_rev(tp) == ASIC_REV_5717 ||
+			    tg3_asic_rev(tp) == ASIC_REV_5719 ||
+			    tg3_asic_rev(tp) == ASIC_REV_5720)
+				reset_phy = true;
+
+			err = tg3_restart_hw(tp, reset_phy);
 			if (!err)
 				tg3_netif_start(tp);
 		}
-- 
GitLab


From cadf9df27e7cf40e390e060a1c71bb86ecde798b Mon Sep 17 00:00:00 2001
From: Stephen Mallon <stephen.mallon@sydney.edu.au>
Date: Tue, 20 Nov 2018 19:15:02 +1100
Subject: [PATCH 1273/1890] tcp: Fix SOF_TIMESTAMPING_RX_HARDWARE to use the
 latest timestamp during TCP coalescing

During tcp coalescing ensure that the skb hardware timestamp refers to the
highest sequence number data.
Previously only the software timestamp was updated during coalescing.

Signed-off-by: Stephen Mallon <stephen.mallon@sydney.edu.au>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2868ef28ce521..e695584bb33fb 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4363,6 +4363,7 @@ static bool tcp_try_coalesce(struct sock *sk,
 	if (TCP_SKB_CB(from)->has_rxtstamp) {
 		TCP_SKB_CB(to)->has_rxtstamp = true;
 		to->tstamp = from->tstamp;
+		skb_hwtstamps(to)->hwtstamp = skb_hwtstamps(from)->hwtstamp;
 	}
 
 	return true;
-- 
GitLab


From d61fa8cbf3da85ffca6620f261354941c126ee23 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Mon, 19 Nov 2018 13:31:07 -0800
Subject: [PATCH 1274/1890] xfs: uncached buffer tracing needs to print bno

Useless:

xfs_buf_get_uncached: dev 253:32 bno 0xffffffffffffffff nblks 0x1 ...
xfs_buf_unlock:       dev 253:32 bno 0xffffffffffffffff nblks 0x1 ...
xfs_buf_submit:       dev 253:32 bno 0xffffffffffffffff nblks 0x1 ...
xfs_buf_hold:         dev 253:32 bno 0xffffffffffffffff nblks 0x1 ...
xfs_buf_iowait:       dev 253:32 bno 0xffffffffffffffff nblks 0x1 ...
xfs_buf_iodone:       dev 253:32 bno 0xffffffffffffffff nblks 0x1 ...
xfs_buf_iowait_done:  dev 253:32 bno 0xffffffffffffffff nblks 0x1 ...
xfs_buf_rele:         dev 253:32 bno 0xffffffffffffffff nblks 0x1 ...

Useful:


xfs_buf_get_uncached: dev 253:32 bno 0xffffffffffffffff nblks 0x1 ...
xfs_buf_unlock:       dev 253:32 bno 0xffffffffffffffff nblks 0x1 ...
xfs_buf_submit:       dev 253:32 bno 0x200b5 nblks 0x1 ...
xfs_buf_hold:         dev 253:32 bno 0x200b5 nblks 0x1 ...
xfs_buf_iowait:       dev 253:32 bno 0x200b5 nblks 0x1 ...
xfs_buf_iodone:       dev 253:32 bno 0x200b5 nblks 0x1 ...
xfs_buf_iowait_done:  dev 253:32 bno 0x200b5 nblks 0x1 ...
xfs_buf_rele:         dev 253:32 bno 0x200b5 nblks 0x1 ...

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_trace.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 3043e5ed64955..8a6532aae779b 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -280,7 +280,10 @@ DECLARE_EVENT_CLASS(xfs_buf_class,
 	),
 	TP_fast_assign(
 		__entry->dev = bp->b_target->bt_dev;
-		__entry->bno = bp->b_bn;
+		if (bp->b_bn == XFS_BUF_DADDR_NULL)
+			__entry->bno = bp->b_maps[0].bm_bn;
+		else
+			__entry->bno = bp->b_bn;
 		__entry->nblks = bp->b_length;
 		__entry->hold = atomic_read(&bp->b_hold);
 		__entry->pincount = atomic_read(&bp->b_pin_count);
-- 
GitLab


From d43aaf1685aa471f0593685c9f54d53e3af3cf3f Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Mon, 19 Nov 2018 13:31:08 -0800
Subject: [PATCH 1275/1890] xfs: fix transient reference count error in
 xfs_buf_resubmit_failed_buffers

When retrying a failed inode or dquot buffer,
xfs_buf_resubmit_failed_buffers() clears all the failed flags from
the inde/dquot log items. In doing so, it also drops all the
reference counts on the buffer that the failed log items hold. This
means it can drop all the active references on the buffer and hence
free the buffer before it queues it for write again.

Putting the buffer on the delwri queue takes a reference to the
buffer (so that it hangs around until it has been written and
completed), but this goes bang if the buffer has already been freed.

Hence we need to add the buffer to the delwri queue before we remove
the failed flags from the log items attached to the buffer to ensure
it always remains referenced during the resubmit process.

Reported-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_buf_item.c | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 12d8455bfbb29..010db5f8fb00f 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -1233,9 +1233,23 @@ xfs_buf_iodone(
 }
 
 /*
- * Requeue a failed buffer for writeback
+ * Requeue a failed buffer for writeback.
  *
- * Return true if the buffer has been re-queued properly, false otherwise
+ * We clear the log item failed state here as well, but we have to be careful
+ * about reference counts because the only active reference counts on the buffer
+ * may be the failed log items. Hence if we clear the log item failed state
+ * before queuing the buffer for IO we can release all active references to
+ * the buffer and free it, leading to use after free problems in
+ * xfs_buf_delwri_queue. It makes no difference to the buffer or log items which
+ * order we process them in - the buffer is locked, and we own the buffer list
+ * so nothing on them is going to change while we are performing this action.
+ *
+ * Hence we can safely queue the buffer for IO before we clear the failed log
+ * item state, therefore  always having an active reference to the buffer and
+ * avoiding the transient zero-reference state that leads to use-after-free.
+ *
+ * Return true if the buffer was added to the buffer list, false if it was
+ * already on the buffer list.
  */
 bool
 xfs_buf_resubmit_failed_buffers(
@@ -1243,16 +1257,16 @@ xfs_buf_resubmit_failed_buffers(
 	struct list_head	*buffer_list)
 {
 	struct xfs_log_item	*lip;
+	bool			ret;
+
+	ret = xfs_buf_delwri_queue(bp, buffer_list);
 
 	/*
-	 * Clear XFS_LI_FAILED flag from all items before resubmit
-	 *
-	 * XFS_LI_FAILED set/clear is protected by ail_lock, caller  this
+	 * XFS_LI_FAILED set/clear is protected by ail_lock, caller of this
 	 * function already have it acquired
 	 */
 	list_for_each_entry(lip, &bp->b_li_list, li_bio_list)
 		xfs_clear_li_failed(lip);
 
-	/* Add this buffer back to the delayed write list */
-	return xfs_buf_delwri_queue(bp, buffer_list);
+	return ret;
 }
-- 
GitLab


From c08768977b9a65cab9bcfd1ba30ffb686b2b7c69 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Mon, 19 Nov 2018 13:31:08 -0800
Subject: [PATCH 1276/1890] xfs: finobt AG reserves don't consider last AG can
 be a runt

The last AG may be very small comapred to all other AGs, and hence
AG reservations based on the superblock AG size may actually consume
more space than the AG actually has. This results on assert failures
like:

XFS: Assertion failed: xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved + xfs_perag_resv(pag, XFS_AG_RESV_RMAPBT)->ar_reserved <= pag->pagf_freeblks + pag->pagf_flcount, file: fs/xfs/libxfs/xfs_ag_resv.c, line: 319
[   48.932891]  xfs_ag_resv_init+0x1bd/0x1d0
[   48.933853]  xfs_fs_reserve_ag_blocks+0x37/0xb0
[   48.934939]  xfs_mountfs+0x5b3/0x920
[   48.935804]  xfs_fs_fill_super+0x462/0x640
[   48.936784]  ? xfs_test_remount_options+0x60/0x60
[   48.937908]  mount_bdev+0x178/0x1b0
[   48.938751]  mount_fs+0x36/0x170
[   48.939533]  vfs_kern_mount.part.43+0x54/0x130
[   48.940596]  do_mount+0x20e/0xcb0
[   48.941396]  ? memdup_user+0x3e/0x70
[   48.942249]  ksys_mount+0xba/0xd0
[   48.943046]  __x64_sys_mount+0x21/0x30
[   48.943953]  do_syscall_64+0x54/0x170
[   48.944835]  entry_SYSCALL_64_after_hwframe+0x49/0xbe

Hence we need to ensure the finobt per-ag space reservations take
into account the size of the last AG rather than treat it like all
the other full size AGs.

Note that both refcountbt and rmapbt already take the size of the AG
into account via reading the AGF length directly.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_ialloc_btree.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index 86c50208a1437..7fbf8af0b1594 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -538,15 +538,18 @@ xfs_inobt_rec_check_count(
 
 static xfs_extlen_t
 xfs_inobt_max_size(
-	struct xfs_mount	*mp)
+	struct xfs_mount	*mp,
+	xfs_agnumber_t		agno)
 {
+	xfs_agblock_t		agblocks = xfs_ag_block_count(mp, agno);
+
 	/* Bail out if we're uninitialized, which can happen in mkfs. */
 	if (mp->m_inobt_mxr[0] == 0)
 		return 0;
 
 	return xfs_btree_calc_size(mp->m_inobt_mnr,
-		(uint64_t)mp->m_sb.sb_agblocks * mp->m_sb.sb_inopblock /
-				XFS_INODES_PER_CHUNK);
+				(uint64_t)agblocks * mp->m_sb.sb_inopblock /
+					XFS_INODES_PER_CHUNK);
 }
 
 static int
@@ -594,7 +597,7 @@ xfs_finobt_calc_reserves(
 	if (error)
 		return error;
 
-	*ask += xfs_inobt_max_size(mp);
+	*ask += xfs_inobt_max_size(mp, agno);
 	*used += tree_len;
 	return 0;
 }
-- 
GitLab


From 7f9f71be84bcab368e58020a42f6d0dd97adf0ce Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Mon, 19 Nov 2018 13:31:09 -0800
Subject: [PATCH 1277/1890] xfs: extent shifting doesn't fully invalidate page
 cache

The extent shifting code uses a flush and invalidate mechainsm prior
to shifting extents around. This is similar to what
xfs_free_file_space() does, but it doesn't take into account things
like page cache vs block size differences, and it will fail if there
is a page that it currently busy.

xfs_flush_unmap_range() handles all of these cases, so just convert
xfs_prepare_shift() to us that mechanism rather than having it's own
special sauce.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_bmap_util.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 5d263dfdb3bcc..167ff4297e5c7 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1195,13 +1195,7 @@ xfs_prepare_shift(
 	 * Writeback and invalidate cache for the remainder of the file as we're
 	 * about to shift down every extent from offset to EOF.
 	 */
-	error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, offset, -1);
-	if (error)
-		return error;
-	error = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
-					offset >> PAGE_SHIFT, -1);
-	if (error)
-		return error;
+	error = xfs_flush_unmap_range(ip, offset, XFS_ISIZE(ip));
 
 	/*
 	 * Clean out anything hanging around in the cow fork now that
-- 
GitLab


From a5d0f4565996e5595a10cb57b3d1e3d74379c502 Mon Sep 17 00:00:00 2001
From: Kenneth Feng <kenneth.feng@amd.com>
Date: Mon, 19 Nov 2018 14:49:16 +0800
Subject: [PATCH 1278/1890] drm/amdgpu: Enable HDP memory light sleep

Due to the register name and setting change of HDP
memory light sleep on Vega20,change accordingly in
the driver.

Signed-off-by: Kenneth Feng <kenneth.feng@amd.com>
Reviewed-by: Evan Quan <evan.quan@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/soc15.c | 39 ++++++++++++++++++++++++------
 1 file changed, 32 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index bf5e6a413dee6..4cc0dcb1a1875 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -65,6 +65,13 @@
 #define mmMP0_MISC_LIGHT_SLEEP_CTRL                                                             0x01ba
 #define mmMP0_MISC_LIGHT_SLEEP_CTRL_BASE_IDX                                                    0
 
+/* for Vega20 register name change */
+#define mmHDP_MEM_POWER_CTRL	0x00d4
+#define HDP_MEM_POWER_CTRL__IPH_MEM_POWER_CTRL_EN_MASK	0x00000001L
+#define HDP_MEM_POWER_CTRL__IPH_MEM_POWER_LS_EN_MASK	0x00000002L
+#define HDP_MEM_POWER_CTRL__RC_MEM_POWER_CTRL_EN_MASK	0x00010000L
+#define HDP_MEM_POWER_CTRL__RC_MEM_POWER_LS_EN_MASK		0x00020000L
+#define mmHDP_MEM_POWER_CTRL_BASE_IDX	0
 /*
  * Indirect registers accessor
  */
@@ -870,15 +877,33 @@ static void soc15_update_hdp_light_sleep(struct amdgpu_device *adev, bool enable
 {
 	uint32_t def, data;
 
-	def = data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS));
+	if (adev->asic_type == CHIP_VEGA20) {
+		def = data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_CTRL));
 
-	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS))
-		data |= HDP_MEM_POWER_LS__LS_ENABLE_MASK;
-	else
-		data &= ~HDP_MEM_POWER_LS__LS_ENABLE_MASK;
+		if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS))
+			data |= HDP_MEM_POWER_CTRL__IPH_MEM_POWER_CTRL_EN_MASK |
+				HDP_MEM_POWER_CTRL__IPH_MEM_POWER_LS_EN_MASK |
+				HDP_MEM_POWER_CTRL__RC_MEM_POWER_CTRL_EN_MASK |
+				HDP_MEM_POWER_CTRL__RC_MEM_POWER_LS_EN_MASK;
+		else
+			data &= ~(HDP_MEM_POWER_CTRL__IPH_MEM_POWER_CTRL_EN_MASK |
+				HDP_MEM_POWER_CTRL__IPH_MEM_POWER_LS_EN_MASK |
+				HDP_MEM_POWER_CTRL__RC_MEM_POWER_CTRL_EN_MASK |
+				HDP_MEM_POWER_CTRL__RC_MEM_POWER_LS_EN_MASK);
 
-	if (def != data)
-		WREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS), data);
+		if (def != data)
+			WREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_CTRL), data);
+	} else {
+		def = data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS));
+
+		if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS))
+			data |= HDP_MEM_POWER_LS__LS_ENABLE_MASK;
+		else
+			data &= ~HDP_MEM_POWER_LS__LS_ENABLE_MASK;
+
+		if (def != data)
+			WREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS), data);
+	}
 }
 
 static void soc15_update_drm_clock_gating(struct amdgpu_device *adev, bool enable)
-- 
GitLab


From b1d982330134bbba371821bc850da6fa4bfa3271 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Tue, 20 Nov 2018 21:22:50 +0100
Subject: [PATCH 1279/1890] MAINTAINERS: add myself as co-maintainer for r8169

Meanwhile I know the driver quite well and I refactored bigger parts
of it. As a result people contact me already with r8169 questions.
Therefore I'd volunteer to become co-maintainer of the driver also
officially.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index d258515ac1171..9aa47eeb469d4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -180,6 +180,7 @@ F:	drivers/net/hamradio/6pack.c
 
 8169 10/100/1000 GIGABIT ETHERNET DRIVER
 M:	Realtek linux nic maintainers <nic_swsd@realtek.com>
+M:	Heiner Kallweit <hkallweit1@gmail.com>
 L:	netdev@vger.kernel.org
 S:	Maintained
 F:	drivers/net/ethernet/realtek/r8169.c
-- 
GitLab


From f2cbd485282014132851bf37cb2ca624a456275d Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Tue, 20 Nov 2018 22:18:44 +0100
Subject: [PATCH 1280/1890] net/sched: act_police: fix race condition on state
 variables

after 'police' configuration parameters were converted to use RCU instead
of spinlock, the state variables used to compute the traffic rate (namely
'tcfp_toks', 'tcfp_ptoks' and 'tcfp_t_c') are erroneously read/updated in
the traffic path without any protection.

Use a dedicated spinlock to avoid race conditions on these variables, and
ensure proper cache-line alignment. In this way, 'police' is still faster
than what we observed when 'tcf_lock' was used in the traffic path _ i.e.
reverting commit 2d550dbad83c ("net/sched: act_police: don't use spinlock
in the data path"). Moreover, we preserve the throughput improvement that
was obtained after 'police' started using per-cpu counters, when 'avrate'
is used instead of 'rate'.

Changes since v1 (thanks to Eric Dumazet):
- call ktime_get_ns() before acquiring the lock in the traffic path
- use a dedicated spinlock instead of tcf_lock
- improve cache-line usage

Fixes: 2d550dbad83c ("net/sched: act_police: don't use spinlock in the data path")
Reported-and-suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
---
 net/sched/act_police.c | 35 +++++++++++++++++++++--------------
 1 file changed, 21 insertions(+), 14 deletions(-)

diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 052855d473542..ee4665a5a0222 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -27,10 +27,7 @@ struct tcf_police_params {
 	u32			tcfp_ewma_rate;
 	s64			tcfp_burst;
 	u32			tcfp_mtu;
-	s64			tcfp_toks;
-	s64			tcfp_ptoks;
 	s64			tcfp_mtu_ptoks;
-	s64			tcfp_t_c;
 	struct psched_ratecfg	rate;
 	bool			rate_present;
 	struct psched_ratecfg	peak;
@@ -41,6 +38,11 @@ struct tcf_police_params {
 struct tcf_police {
 	struct tc_action	common;
 	struct tcf_police_params __rcu *params;
+
+	spinlock_t		tcfp_lock ____cacheline_aligned_in_smp;
+	s64			tcfp_toks;
+	s64			tcfp_ptoks;
+	s64			tcfp_t_c;
 };
 
 #define to_police(pc) ((struct tcf_police *)pc)
@@ -186,12 +188,9 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
 	}
 
 	new->tcfp_burst = PSCHED_TICKS2NS(parm->burst);
-	new->tcfp_toks = new->tcfp_burst;
-	if (new->peak_present) {
+	if (new->peak_present)
 		new->tcfp_mtu_ptoks = (s64)psched_l2t_ns(&new->peak,
 							 new->tcfp_mtu);
-		new->tcfp_ptoks = new->tcfp_mtu_ptoks;
-	}
 
 	if (tb[TCA_POLICE_AVRATE])
 		new->tcfp_ewma_rate = nla_get_u32(tb[TCA_POLICE_AVRATE]);
@@ -207,7 +206,12 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
 	}
 
 	spin_lock_bh(&police->tcf_lock);
-	new->tcfp_t_c = ktime_get_ns();
+	spin_lock_bh(&police->tcfp_lock);
+	police->tcfp_t_c = ktime_get_ns();
+	police->tcfp_toks = new->tcfp_burst;
+	if (new->peak_present)
+		police->tcfp_ptoks = new->tcfp_mtu_ptoks;
+	spin_unlock_bh(&police->tcfp_lock);
 	police->tcf_action = parm->action;
 	rcu_swap_protected(police->params,
 			   new,
@@ -257,25 +261,28 @@ static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a,
 		}
 
 		now = ktime_get_ns();
-		toks = min_t(s64, now - p->tcfp_t_c, p->tcfp_burst);
+		spin_lock_bh(&police->tcfp_lock);
+		toks = min_t(s64, now - police->tcfp_t_c, p->tcfp_burst);
 		if (p->peak_present) {
-			ptoks = toks + p->tcfp_ptoks;
+			ptoks = toks + police->tcfp_ptoks;
 			if (ptoks > p->tcfp_mtu_ptoks)
 				ptoks = p->tcfp_mtu_ptoks;
 			ptoks -= (s64)psched_l2t_ns(&p->peak,
 						    qdisc_pkt_len(skb));
 		}
-		toks += p->tcfp_toks;
+		toks += police->tcfp_toks;
 		if (toks > p->tcfp_burst)
 			toks = p->tcfp_burst;
 		toks -= (s64)psched_l2t_ns(&p->rate, qdisc_pkt_len(skb));
 		if ((toks|ptoks) >= 0) {
-			p->tcfp_t_c = now;
-			p->tcfp_toks = toks;
-			p->tcfp_ptoks = ptoks;
+			police->tcfp_t_c = now;
+			police->tcfp_toks = toks;
+			police->tcfp_ptoks = ptoks;
+			spin_unlock_bh(&police->tcfp_lock);
 			ret = p->tcfp_result;
 			goto inc_drops;
 		}
+		spin_unlock_bh(&police->tcfp_lock);
 	}
 
 inc_overlimits:
-- 
GitLab


From 31a8b3ee16e3051a0d9abc557ca203e2330621b6 Mon Sep 17 00:00:00 2001
From: Jonathan Davies <jonathan.davies@nutanix.com>
Date: Tue, 20 Nov 2018 17:48:32 +0000
Subject: [PATCH 1281/1890] HID: samples/hidraw: fix typo in printed message

Signed-off-by: Jonathan Davies <jonathan.davies@nutanix.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 samples/hidraw/hid-example.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/samples/hidraw/hid-example.c b/samples/hidraw/hid-example.c
index 9bfd8ff6de82e..37a0ffcb4d63f 100644
--- a/samples/hidraw/hid-example.c
+++ b/samples/hidraw/hid-example.c
@@ -119,7 +119,7 @@ int main(int argc, char **argv)
 	if (res < 0)
 		perror("HIDIOCSFEATURE");
 	else
-		printf("ioctl HIDIOCGFEATURE returned: %d\n", res);
+		printf("ioctl HIDIOCSFEATURE returned: %d\n", res);
 
 	/* Get Feature */
 	buf[0] = 0x9; /* Report Number */
-- 
GitLab


From f8577fb3c273bcae821a5254c8fbcf82016d9a8d Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 19 Nov 2018 15:41:53 +0000
Subject: [PATCH 1282/1890] drm/i915: Write GPU relocs harder with gen3

Under moderate amounts of GPU stress, we can observe on Bearlake and
Pineview (later gen3 models) that we execute the following batch buffer
before the write into the batch is coherent. Adding extra (tested with
upto 32x) MI_FLUSH to either the invalidation, flush or both phases does
not solve the incoherency issue with the relocations, but emitting the
MI_STORE_DWORD_IMM twice does. So be it.

Fixes: 7dd4f6729f92 ("drm/i915: Async GPU relocation processing")
Testcase: igt/gem_tiled_fence_blits # blb/pnv
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181119154153.15327-1-chris@chris-wilson.co.uk
(cherry picked from commit 7fa28e146994da1e8a4124623d7da97b798ea520)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 1aaccbe7e1deb..d4fac09095f86 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1268,7 +1268,7 @@ relocate_entry(struct i915_vma *vma,
 		else if (gen >= 4)
 			len = 4;
 		else
-			len = 3;
+			len = 6;
 
 		batch = reloc_gpu(eb, vma, len);
 		if (IS_ERR(batch))
@@ -1309,6 +1309,11 @@ relocate_entry(struct i915_vma *vma,
 			*batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
 			*batch++ = addr;
 			*batch++ = target_offset;
+
+			/* And again for good measure (blb/pnv) */
+			*batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
+			*batch++ = addr;
+			*batch++ = target_offset;
 		}
 
 		goto out;
-- 
GitLab


From 6e34d358b24ffc40764eb3681164c79091765429 Mon Sep 17 00:00:00 2001
From: Manu Gautam <mgautam@codeaurora.org>
Date: Tue, 16 Oct 2018 12:52:06 +0530
Subject: [PATCH 1283/1890] phy: qcom-qusb2: Use HSTX_TRIM fused value as is

Fix HSTX_TRIM tuning logic which instead of using fused value
as HSTX_TRIM, incorrectly performs bitwise OR operation with
existing default value.

Fixes: ca04d9d3e1b1 ("phy: qcom-qusb2: New driver for QUSB2 PHY on Qcom chips")
Signed-off-by: Manu Gautam <mgautam@codeaurora.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: Stephen Boyd <swboyd@chromium.org>
Acked-by: Vivek Gautam <vivek.gautam@codeaurora.org>
Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
---
 drivers/phy/qualcomm/phy-qcom-qusb2.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qusb2.c b/drivers/phy/qualcomm/phy-qcom-qusb2.c
index 9ce531194f8af..a710118b00a8f 100644
--- a/drivers/phy/qualcomm/phy-qcom-qusb2.c
+++ b/drivers/phy/qualcomm/phy-qcom-qusb2.c
@@ -402,10 +402,10 @@ static void qusb2_phy_set_tune2_param(struct qusb2_phy *qphy)
 
 	/*
 	 * Read efuse register having TUNE2/1 parameter's high nibble.
-	 * If efuse register shows value as 0x0, or if we fail to find
-	 * a valid efuse register settings, then use default value
-	 * as 0xB for high nibble that we have already set while
-	 * configuring phy.
+	 * If efuse register shows value as 0x0 (indicating value is not
+	 * fused), or if we fail to find a valid efuse register setting,
+	 * then use default value for high nibble that we have already
+	 * set while configuring the phy.
 	 */
 	val = nvmem_cell_read(qphy->cell, NULL);
 	if (IS_ERR(val) || !val[0]) {
@@ -415,12 +415,13 @@ static void qusb2_phy_set_tune2_param(struct qusb2_phy *qphy)
 
 	/* Fused TUNE1/2 value is the higher nibble only */
 	if (cfg->update_tune1_with_efuse)
-		qusb2_setbits(qphy->base, cfg->regs[QUSB2PHY_PORT_TUNE1],
-			      val[0] << 0x4);
+		qusb2_write_mask(qphy->base, cfg->regs[QUSB2PHY_PORT_TUNE1],
+				 val[0] << HSTX_TRIM_SHIFT,
+				 HSTX_TRIM_MASK);
 	else
-		qusb2_setbits(qphy->base, cfg->regs[QUSB2PHY_PORT_TUNE2],
-			      val[0] << 0x4);
-
+		qusb2_write_mask(qphy->base, cfg->regs[QUSB2PHY_PORT_TUNE2],
+				 val[0] << HSTX_TRIM_SHIFT,
+				 HSTX_TRIM_MASK);
 }
 
 static int qusb2_phy_set_mode(struct phy *phy, enum phy_mode mode)
-- 
GitLab


From c88520db18ba0b9a41326c3b8680e7c09eb4c381 Mon Sep 17 00:00:00 2001
From: Manu Gautam <mgautam@codeaurora.org>
Date: Tue, 16 Oct 2018 12:52:07 +0530
Subject: [PATCH 1284/1890] phy: qcom-qusb2: Fix HSTX_TRIM tuning with fused
 value for SDM845

Tune1 register on sdm845 is used to update HSTX_TRIM with fused
setting. Enable same by specifying update_tune1_with_efuse flag
for sdm845, otherwise driver ends up programming tune2 register.

Fixes: ef17f6e212ca ("phy: qcom-qusb2: Add QUSB2 PHYs support for sdm845")
Signed-off-by: Manu Gautam <mgautam@codeaurora.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: Stephen Boyd <swboyd@chromium.org>
Acked-by: Vivek Gautam <vivek.gautam@codeaurora.org>
Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
---
 drivers/phy/qualcomm/phy-qcom-qusb2.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/phy/qualcomm/phy-qcom-qusb2.c b/drivers/phy/qualcomm/phy-qcom-qusb2.c
index a710118b00a8f..6d4b44b569bc7 100644
--- a/drivers/phy/qualcomm/phy-qcom-qusb2.c
+++ b/drivers/phy/qualcomm/phy-qcom-qusb2.c
@@ -231,6 +231,7 @@ static const struct qusb2_phy_cfg sdm845_phy_cfg = {
 	.mask_core_ready = CORE_READY_STATUS,
 	.has_pll_override = true,
 	.autoresume_en	  = BIT(0),
+	.update_tune1_with_efuse = true,
 };
 
 static const char * const qusb2_phy_vreg_names[] = {
-- 
GitLab


From 48d7f160b10711f014bf07b574c73452646c9fdd Mon Sep 17 00:00:00 2001
From: Stephen Boyd <swboyd@chromium.org>
Date: Mon, 5 Nov 2018 11:40:10 -0800
Subject: [PATCH 1285/1890] dt-bindings: clk: Introduce 'protected-clocks'
 property

Add a generic clk property for clks which are not intended to be used by
the OS due to security restrictions put in place by firmware. For
example, on some Qualcomm firmwares reading or writing certain clk
registers causes the entire system to reboot, but on other firmwares
reading and writing those same registers is required to make devices
like QSPI work. Rather than adding one-off properties each time a new
set of clks appears to be protected, let's add a generic clk property to
describe any set of clks that shouldn't be touched by the OS. This way
we never need to register the clks or use them in certain firmware
configurations.

Cc: Rob Herring <robh+dt@kernel.org>
Cc: Bjorn Andersson <bjorn.andersson@linaro.org>
Cc: Taniya Das <tdas@codeaurora.org>
Signed-off-by: Stephen Boyd <swboyd@chromium.org>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 .../devicetree/bindings/clock/clock-bindings.txt | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/Documentation/devicetree/bindings/clock/clock-bindings.txt b/Documentation/devicetree/bindings/clock/clock-bindings.txt
index 2ec489eebe723..b646bbcf7f924 100644
--- a/Documentation/devicetree/bindings/clock/clock-bindings.txt
+++ b/Documentation/devicetree/bindings/clock/clock-bindings.txt
@@ -168,3 +168,19 @@ a shared clock is forbidden.
 
 Configuration of common clocks, which affect multiple consumer devices can
 be similarly specified in the clock provider node.
+
+==Protected clocks==
+
+Some platforms or firmwares may not fully expose all the clocks to the OS, such
+as in situations where those clks are used by drivers running in ARM secure
+execution levels. Such a configuration can be specified in device tree with the
+protected-clocks property in the form of a clock specifier list. This property should
+only be specified in the node that is providing the clocks being protected:
+
+   clock-controller@a000f000 {
+        compatible = "vendor,clk95;
+        reg = <0xa000f000 0x1000>
+        #clocks-cells = <1>;
+        ...
+        protected-clocks = <UART3_CLK>, <SPI5_CLK>;
+   };
-- 
GitLab


From b181b3b801da8893c8eb706e448dd5111b02de60 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <swboyd@chromium.org>
Date: Mon, 5 Nov 2018 11:40:11 -0800
Subject: [PATCH 1286/1890] clk: qcom: Support 'protected-clocks' property

Certain firmware configurations "protect" clks and cause the entire
system to reboot when a non-secure OS such as Linux tries to read or
write protected clk registers. But other firmware configurations allow
reading or writing the same registers, and they may actually require
that the OS use the otherwise locked down clks. Support the
'protected-clocks' property by never registering these protected clks
with the common clk framework. This way, when firmware is protecting
these clks we won't have the chance to ever read or write these
registers and take down the entire system.

Cc: Taniya Das <tdas@codeaurora.org>
Cc: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Stephen Boyd <swboyd@chromium.org>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/qcom/common.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/drivers/clk/qcom/common.c b/drivers/clk/qcom/common.c
index db9b2471ac401..0a48ed56833b4 100644
--- a/drivers/clk/qcom/common.c
+++ b/drivers/clk/qcom/common.c
@@ -191,6 +191,22 @@ int qcom_cc_register_sleep_clk(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(qcom_cc_register_sleep_clk);
 
+/* Drop 'protected-clocks' from the list of clocks to register */
+static void qcom_cc_drop_protected(struct device *dev, struct qcom_cc *cc)
+{
+	struct device_node *np = dev->of_node;
+	struct property *prop;
+	const __be32 *p;
+	u32 i;
+
+	of_property_for_each_u32(np, "protected-clocks", prop, p, i) {
+		if (i >= cc->num_rclks)
+			continue;
+
+		cc->rclks[i] = NULL;
+	}
+}
+
 static struct clk_hw *qcom_cc_clk_hw_get(struct of_phandle_args *clkspec,
 					 void *data)
 {
@@ -251,6 +267,8 @@ int qcom_cc_really_probe(struct platform_device *pdev,
 	cc->rclks = rclks;
 	cc->num_rclks = num_clks;
 
+	qcom_cc_drop_protected(dev, cc);
+
 	for (i = 0; i < num_clks; i++) {
 		if (!rclks[i])
 			continue;
-- 
GitLab


From 7513edbc096a006f967eaf39088091442e623b83 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 20 Nov 2018 20:24:38 +0000
Subject: [PATCH 1287/1890] drm/i915/gvt: Avoid use-after-free iterating the
 gtt list

Found by smatch:

drivers/gpu/drm/i915/gvt/gtt.c:2452 intel_vgpu_destroy_ggtt_mm() error: dereferencing freed memory 'pos'

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
Reviewed-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/gtt.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index 919de5a1bafb7..3b32e69beac67 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -2443,10 +2443,11 @@ static void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu)
 
 static void intel_vgpu_destroy_ggtt_mm(struct intel_vgpu *vgpu)
 {
-	struct intel_gvt_partial_pte *pos;
+	struct intel_gvt_partial_pte *pos, *next;
 
-	list_for_each_entry(pos,
-			&vgpu->gtt.ggtt_mm->ggtt_mm.partial_pte_list, list) {
+	list_for_each_entry_safe(pos, next,
+				 &vgpu->gtt.ggtt_mm->ggtt_mm.partial_pte_list,
+				 list) {
 		gvt_dbg_mm("partial PTE update on hold 0x%lx : 0x%llx\n",
 			pos->offset, pos->data);
 		kfree(pos);
-- 
GitLab


From 8fd3b90300bec541806dac271de2fd44e2e4e2d2 Mon Sep 17 00:00:00 2001
From: Paul Kocialkowski <paul.kocialkowski@bootlin.com>
Date: Thu, 15 Nov 2018 17:32:48 +0100
Subject: [PATCH 1288/1890] drm/fb-helper: Blacklist writeback when adding
 connectors to fbdev

Writeback connectors do not produce any on-screen output and require
special care for use. Such connectors are hidden from enumeration in
DRM resources by default, but they are still picked-up by fbdev.
This makes rather little sense since fbdev is not really adapted for
dealing with writeback.

Moreover, this is also a source of issues when userspace disables the
CRTC (and associated plane) without detaching the CRTC from the
connector (which is hidden by default). In this case, the connector is
still using the CRTC, leading to am "enabled/connectors mismatch" and
eventually the failure of the associated atomic commit. This situation
happens with VC4 testing under IGT GPU Tools.

Filter out writeback connectors in the fbdev helper to solve this.

Signed-off-by: Paul Kocialkowski <paul.kocialkowski@bootlin.com>
Reviewed-by: Boris Brezillon <boris.brezillon@bootlin.com>
Reviewed-by: Maxime Ripard <maxime.ripard@bootlin.com>
Tested-by: Maxime Ripard <maxime.ripard@bootlin.com>
Fixes: 935774cd71fe ("drm: Add writeback connector type")
Cc: <stable@vger.kernel.org> # v4.19+
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20181115163248.21168-1-paul.kocialkowski@bootlin.com
---
 drivers/gpu/drm/drm_fb_helper.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index a502f3e519fdb..dd852a25d3754 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -219,6 +219,9 @@ int drm_fb_helper_single_add_all_connectors(struct drm_fb_helper *fb_helper)
 	mutex_lock(&fb_helper->lock);
 	drm_connector_list_iter_begin(dev, &conn_iter);
 	drm_for_each_connector_iter(connector, &conn_iter) {
+		if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+			continue;
+
 		ret = __drm_fb_helper_add_one_connector(fb_helper, connector);
 		if (ret)
 			goto fail;
-- 
GitLab


From 4ab7ca092c3c7ac8b16aa28eba723a8868f82f14 Mon Sep 17 00:00:00 2001
From: Romain Izard <romain.izard.pro@gmail.com>
Date: Tue, 20 Nov 2018 17:57:37 +0100
Subject: [PATCH 1289/1890] ARM: dts: at91: sama5d2: use the divided clock for
 SMC

The SAMA5D2 is different from SAMA5D3 and SAMA5D4, as there are two
different clocks for the peripherals in the SoC. The Static Memory
controller is connected to the divided master clock.

Unfortunately, the device tree does not correctly show this and uses the
master clock directly. This clock is then used by the code for the NAND
controller to calculate the timings for the controller, and we end up with
slow NAND Flash access.

Fix the device tree, and the performance of Flash access is improved.

Signed-off-by: Romain Izard <romain.izard.pro@gmail.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 arch/arm/boot/dts/sama5d2.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi
index 843052f14f1cf..dd0dda6ed44b5 100644
--- a/arch/arm/boot/dts/sama5d2.dtsi
+++ b/arch/arm/boot/dts/sama5d2.dtsi
@@ -314,7 +314,7 @@ ebi: ebi@10000000 {
 				  0x1 0x0 0x60000000 0x10000000
 				  0x2 0x0 0x70000000 0x10000000
 				  0x3 0x0 0x80000000 0x10000000>;
-			clocks = <&mck>;
+			clocks = <&h32ck>;
 			status = "disabled";
 
 			nand_controller: nand-controller {
-- 
GitLab


From 2bbb5fa37475d7aa5fa62f34db1623f3da2dfdfa Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Mon, 19 Nov 2018 19:06:01 +0100
Subject: [PATCH 1290/1890] ACPI / platform: Add SMB0001 HID to
 forbidden_id_list

Many HP AMD based laptops contain an SMB0001 device like this:

Device (SMBD)
{
    Name (_HID, "SMB0001")  // _HID: Hardware ID
    Name (_CRS, ResourceTemplate ()  // _CRS: Current Resource Settings
    {
        IO (Decode16,
            0x0B20,             // Range Minimum
            0x0B20,             // Range Maximum
            0x20,               // Alignment
            0x20,               // Length
            )
        IRQ (Level, ActiveLow, Shared, )
            {7}
    })
}

The legacy style IRQ resource here causes acpi_dev_get_irqresource() to
be called with legacy=true and this message to show in dmesg:
ACPI: IRQ 7 override to edge, high

This causes issues when later on the AMD0030 GPIO device gets enumerated:

Device (GPIO)
{
    Name (_HID, "AMDI0030")  // _HID: Hardware ID
    Name (_CID, "AMDI0030")  // _CID: Compatible ID
    Name (_UID, Zero)  // _UID: Unique ID
    Method (_CRS, 0, NotSerialized)  // _CRS: Current Resource Settings
    {
	Name (RBUF, ResourceTemplate ()
	{
	    Interrupt (ResourceConsumer, Level, ActiveLow, Shared, ,, )
	    {
		0x00000007,
	    }
	    Memory32Fixed (ReadWrite,
		0xFED81500,         // Address Base
		0x00000400,         // Address Length
		)
	})
	Return (RBUF) /* \_SB_.GPIO._CRS.RBUF */
    }
}

Now acpi_dev_get_irqresource() gets called with legacy=false, but because
of the earlier override of the trigger-type acpi_register_gsi() returns
-EBUSY (because we try to register the same interrupt with a different
trigger-type) and we end up setting IORESOURCE_DISABLED in the flags.

The setting of IORESOURCE_DISABLED causes platform_get_irq() to call
acpi_irq_get() which is not implemented on x86 and returns -EINVAL.
resulting in the following in dmesg:

amd_gpio AMDI0030:00: Failed to get gpio IRQ: -22
amd_gpio: probe of AMDI0030:00 failed with error -22

The SMB0001 is a "virtual" device in the sense that the only way the OS
interacts with it is through calling a couple of methods to do SMBus
transfers. As such it is weird that it has IO and IRQ resources at all,
because the driver for it is not expected to ever access the hardware
directly.

The Linux driver for the SMB0001 device directly binds to the acpi_device
through the acpi_bus, so we do not need to instantiate a platform_device
for this ACPI device. This commit adds the SMB0001 HID to the
forbidden_id_list, avoiding the instantiating of a platform_device for it.
Not instantiating a platform_device means we will no longer call
acpi_dev_get_irqresource() for the legacy IRQ resource fixing the probe of
the AMDI0030 device failing.

BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1644013
BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=198715
BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=199523
Reported-by: Lukas Kahnert <openproggerfreak@gmail.com>
Tested-by: Marc <suaefar@googlemail.com>
Cc: All applicable <stable@vger.kernel.org>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpi_platform.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c
index eaa60c94205a8..1f32caa87686e 100644
--- a/drivers/acpi/acpi_platform.c
+++ b/drivers/acpi/acpi_platform.c
@@ -30,6 +30,7 @@ static const struct acpi_device_id forbidden_id_list[] = {
 	{"PNP0200",  0},	/* AT DMA Controller */
 	{"ACPI0009", 0},	/* IOxAPIC */
 	{"ACPI000A", 0},	/* IOAPIC */
+	{"SMB0001",  0},	/* ACPI SMBUS virtual device */
 	{"", 0},
 };
 
-- 
GitLab


From c773058dde9a4f919a8069f3828d9f4adb1fce1e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Tue, 20 Nov 2018 15:54:49 +0200
Subject: [PATCH 1291/1890] drm/i915: Force a LUT update in
 intel_initial_commit()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If we force a plane update to fix up our half populated plane state
we'll also force on the pipe gamma for the plane (since we always
enable pipe gamma currently). If the BIOS hasn't programmed a sensible
LUT into the hardware this will cause the image to become corrupted.
Typical symptoms are a purple/yellow/etc. flash when the driver loads.

To avoid this let's program something sensible into the LUT when
we do the plane update. In the future I plan to add proper plane
gamma enable readout so this is just a temporary measure.

Cc: Hans de Goede <hdegoede@redhat.com>
Fixes: 516a49cc1946 ("drm/i915: Fix assert_plane() warning on bootup with external display")
Signed-off-by: Ville SyrjÃ¤lÃ¤ <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181120135450.3634-1-ville.syrjala@linux.intel.com
Tested-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
(cherry picked from commit fa6af5145b4e87a30a530be0d80734a9dd40da77)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_display.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index a54843fdeb2f0..fa6c1bad5ef7f 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -15267,6 +15267,14 @@ static int intel_initial_commit(struct drm_device *dev)
 			ret = drm_atomic_add_affected_planes(state, crtc);
 			if (ret)
 				goto out;
+
+			/*
+			 * FIXME hack to force a LUT update to avoid the
+			 * plane update forcing the pipe gamma on without
+			 * having a proper LUT loaded. Remove once we
+			 * have readout for pipe gamma enable.
+			 */
+			crtc_state->color_mgmt_changed = true;
 		}
 	}
 
-- 
GitLab


From f559156c399cfb11d53a128d210118fbea36816e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Tue, 20 Nov 2018 15:54:50 +0200
Subject: [PATCH 1292/1890] drm/i915: Add rotation readout for plane initial
 config
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If we need to force a full plane update before userspace/fbdev
have given us a proper plane state we should try to maintain the
current plane state as much as possible (apart from the parts
of the state we're trying to fix up with the plane update).
To that end add basic readout for the plane rotation and
maintain it during the initial fb takeover.

Cc: Hans de Goede <hdegoede@redhat.com>
Fixes: 516a49cc1946 ("drm/i915: Fix assert_plane() warning on bootup with external display")
Signed-off-by: Ville SyrjÃ¤lÃ¤ <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181120135450.3634-2-ville.syrjala@linux.intel.com
Tested-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
(cherry picked from commit f43348a3db89305bb1935da9fe4499fdcdde9796)
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_display.c | 31 ++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_drv.h     |  1 +
 2 files changed, 32 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index fa6c1bad5ef7f..c9878dd1f7cd0 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2890,6 +2890,7 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc,
 	return;
 
 valid_fb:
+	intel_state->base.rotation = plane_config->rotation;
 	intel_fill_fb_ggtt_view(&intel_state->view, fb,
 				intel_state->base.rotation);
 	intel_state->color_plane[0].stride =
@@ -7882,8 +7883,15 @@ i9xx_get_initial_plane_config(struct intel_crtc *crtc,
 			plane_config->tiling = I915_TILING_X;
 			fb->modifier = I915_FORMAT_MOD_X_TILED;
 		}
+
+		if (val & DISPPLANE_ROTATE_180)
+			plane_config->rotation = DRM_MODE_ROTATE_180;
 	}
 
+	if (IS_CHERRYVIEW(dev_priv) && pipe == PIPE_B &&
+	    val & DISPPLANE_MIRROR)
+		plane_config->rotation |= DRM_MODE_REFLECT_X;
+
 	pixel_format = val & DISPPLANE_PIXFORMAT_MASK;
 	fourcc = i9xx_format_to_fourcc(pixel_format);
 	fb->format = drm_format_info(fourcc);
@@ -8952,6 +8960,29 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc,
 		goto error;
 	}
 
+	/*
+	 * DRM_MODE_ROTATE_ is counter clockwise to stay compatible with Xrandr
+	 * while i915 HW rotation is clockwise, thats why this swapping.
+	 */
+	switch (val & PLANE_CTL_ROTATE_MASK) {
+	case PLANE_CTL_ROTATE_0:
+		plane_config->rotation = DRM_MODE_ROTATE_0;
+		break;
+	case PLANE_CTL_ROTATE_90:
+		plane_config->rotation = DRM_MODE_ROTATE_270;
+		break;
+	case PLANE_CTL_ROTATE_180:
+		plane_config->rotation = DRM_MODE_ROTATE_180;
+		break;
+	case PLANE_CTL_ROTATE_270:
+		plane_config->rotation = DRM_MODE_ROTATE_90;
+		break;
+	}
+
+	if (INTEL_GEN(dev_priv) >= 10 &&
+	    val & PLANE_CTL_FLIP_HORIZONTAL)
+		plane_config->rotation |= DRM_MODE_REFLECT_X;
+
 	base = I915_READ(PLANE_SURF(pipe, plane_id)) & 0xfffff000;
 	plane_config->base = base;
 
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 8b298e5f012da..db6fa1d0cbdae 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -547,6 +547,7 @@ struct intel_initial_plane_config {
 	unsigned int tiling;
 	int size;
 	u32 base;
+	u8 rotation;
 };
 
 #define SKL_MIN_SRC_W 8
-- 
GitLab


From aeed35faee5261e6e93d51192923b7a7c3c4dab5 Mon Sep 17 00:00:00 2001
From: "Daniel M. Lambea" <dmlambea@gmail.com>
Date: Sun, 18 Nov 2018 14:42:55 +0000
Subject: [PATCH 1293/1890] HID: cougar: Add support for Cougar 700K Gaming
 Keyboard

Add USB ID 060b:700a to the list of valid USB IDS for the
cougar hid driver.

Signed-off-by: Daniel M. Lambea <dmlambea@gmail.com>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
---
 drivers/hid/hid-cougar.c | 2 ++
 drivers/hid/hid-ids.h    | 1 +
 2 files changed, 3 insertions(+)

diff --git a/drivers/hid/hid-cougar.c b/drivers/hid/hid-cougar.c
index 3f0916b64c60e..e0bb7b34f3a4d 100644
--- a/drivers/hid/hid-cougar.c
+++ b/drivers/hid/hid-cougar.c
@@ -326,6 +326,8 @@ module_param_cb(g6_is_space, &cougar_g6_is_space_ops, &g6_is_space, 0644);
 static struct hid_device_id cougar_id_table[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_SOLID_YEAR,
 			 USB_DEVICE_ID_COUGAR_500K_GAMING_KEYBOARD) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_SOLID_YEAR,
+			 USB_DEVICE_ID_COUGAR_700K_GAMING_KEYBOARD) },
 	{}
 };
 MODULE_DEVICE_TABLE(hid, cougar_id_table);
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index c0d668944dbe8..e79c42d124b34 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -1013,6 +1013,7 @@
 
 #define USB_VENDOR_ID_SOLID_YEAR			0x060b
 #define USB_DEVICE_ID_COUGAR_500K_GAMING_KEYBOARD	0x500a
+#define USB_DEVICE_ID_COUGAR_700K_GAMING_KEYBOARD	0x700a
 
 #define USB_VENDOR_ID_SOUNDGRAPH	0x15c2
 #define USB_DEVICE_ID_SOUNDGRAPH_IMON_FIRST	0x0034
-- 
GitLab


From a4390aee72713d9e73f1132bcdeb17d72fbbf974 Mon Sep 17 00:00:00 2001
From: Robbie Ko <robbieko@synology.com>
Date: Wed, 14 Nov 2018 18:32:37 +0000
Subject: [PATCH 1294/1890] Btrfs: send, fix infinite loop due to directory
 rename dependencies

When doing an incremental send, due to the need of delaying directory move
(rename) operations we can end up in infinite loop at
apply_children_dir_moves().

An example scenario that triggers this problem is described below, where
directory names correspond to the numbers of their respective inodes.

Parent snapshot:

 .
 |--- 261/
       |--- 271/
             |--- 266/
                   |--- 259/
                   |--- 260/
                   |     |--- 267
                   |
                   |--- 264/
                   |     |--- 258/
                   |           |--- 257/
                   |
                   |--- 265/
                   |--- 268/
                   |--- 269/
                   |     |--- 262/
                   |
                   |--- 270/
                   |--- 272/
                   |     |--- 263/
                   |     |--- 275/
                   |
                   |--- 274/
                         |--- 273/

Send snapshot:

 .
 |-- 275/
      |-- 274/
           |-- 273/
                |-- 262/
                     |-- 269/
                          |-- 258/
                               |-- 271/
                                    |-- 268/
                                         |-- 267/
                                              |-- 270/
                                                   |-- 259/
                                                   |    |-- 265/
                                                   |
                                                   |-- 272/
                                                        |-- 257/
                                                             |-- 260/
                                                             |-- 264/
                                                                  |-- 263/
                                                                       |-- 261/
                                                                            |-- 266/

When processing inode 257 we delay its move (rename) operation because its
new parent in the send snapshot, inode 272, was not yet processed. Then
when processing inode 272, we delay the move operation for that inode
because inode 274 is its ancestor in the send snapshot. Finally we delay
the move operation for inode 274 when processing it because inode 275 is
its new parent in the send snapshot and was not yet moved.

When finishing processing inode 275, we start to do the move operations
that were previously delayed (at apply_children_dir_moves()), resulting in
the following iterations:

1) We issue the move operation for inode 274;

2) Because inode 262 depended on the move operation of inode 274 (it was
   delayed because 274 is its ancestor in the send snapshot), we issue the
   move operation for inode 262;

3) We issue the move operation for inode 272, because it was delayed by
   inode 274 too (ancestor of 272 in the send snapshot);

4) We issue the move operation for inode 269 (it was delayed by 262);

5) We issue the move operation for inode 257 (it was delayed by 272);

6) We issue the move operation for inode 260 (it was delayed by 272);

7) We issue the move operation for inode 258 (it was delayed by 269);

8) We issue the move operation for inode 264 (it was delayed by 257);

9) We issue the move operation for inode 271 (it was delayed by 258);

10) We issue the move operation for inode 263 (it was delayed by 264);

11) We issue the move operation for inode 268 (it was delayed by 271);

12) We verify if we can issue the move operation for inode 270 (it was
    delayed by 271). We detect a path loop in the current state, because
    inode 267 needs to be moved first before we can issue the move
    operation for inode 270. So we delay again the move operation for
    inode 270, this time we will attempt to do it after inode 267 is
    moved;

13) We issue the move operation for inode 261 (it was delayed by 263);

14) We verify if we can issue the move operation for inode 266 (it was
    delayed by 263). We detect a path loop in the current state, because
    inode 270 needs to be moved first before we can issue the move
    operation for inode 266. So we delay again the move operation for
    inode 266, this time we will attempt to do it after inode 270 is
    moved (its move operation was delayed in step 12);

15) We issue the move operation for inode 267 (it was delayed by 268);

16) We verify if we can issue the move operation for inode 266 (it was
    delayed by 270). We detect a path loop in the current state, because
    inode 270 needs to be moved first before we can issue the move
    operation for inode 266. So we delay again the move operation for
    inode 266, this time we will attempt to do it after inode 270 is
    moved (its move operation was delayed in step 12). So here we added
    again the same delayed move operation that we added in step 14;

17) We attempt again to see if we can issue the move operation for inode
    266, and as in step 16, we realize we can not due to a path loop in
    the current state due to a dependency on inode 270. Again we delay
    inode's 266 rename to happen after inode's 270 move operation, adding
    the same dependency to the empty stack that we did in steps 14 and 16.
    The next iteration will pick the same move dependency on the stack
    (the only entry) and realize again there is still a path loop and then
    again the same dependency to the stack, over and over, resulting in
    an infinite loop.

So fix this by preventing adding the same move dependency entries to the
stack by removing each pending move record from the red black tree of
pending moves. This way the next call to get_pending_dir_moves() will
not return anything for the current parent inode.

A test case for fstests, with this reproducer, follows soon.

Signed-off-by: Robbie Ko <robbieko@synology.com>
Reviewed-by: Filipe Manana <fdmanana@suse.com>
[Wrote changelog with example and more clear explanation]
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/send.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 094cc1444a90c..5be83b5a1b431 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -3340,7 +3340,8 @@ static void free_pending_move(struct send_ctx *sctx, struct pending_dir_move *m)
 	kfree(m);
 }
 
-static void tail_append_pending_moves(struct pending_dir_move *moves,
+static void tail_append_pending_moves(struct send_ctx *sctx,
+				      struct pending_dir_move *moves,
 				      struct list_head *stack)
 {
 	if (list_empty(&moves->list)) {
@@ -3351,6 +3352,10 @@ static void tail_append_pending_moves(struct pending_dir_move *moves,
 		list_add_tail(&moves->list, stack);
 		list_splice_tail(&list, stack);
 	}
+	if (!RB_EMPTY_NODE(&moves->node)) {
+		rb_erase(&moves->node, &sctx->pending_dir_moves);
+		RB_CLEAR_NODE(&moves->node);
+	}
 }
 
 static int apply_children_dir_moves(struct send_ctx *sctx)
@@ -3365,7 +3370,7 @@ static int apply_children_dir_moves(struct send_ctx *sctx)
 		return 0;
 
 	INIT_LIST_HEAD(&stack);
-	tail_append_pending_moves(pm, &stack);
+	tail_append_pending_moves(sctx, pm, &stack);
 
 	while (!list_empty(&stack)) {
 		pm = list_first_entry(&stack, struct pending_dir_move, list);
@@ -3376,7 +3381,7 @@ static int apply_children_dir_moves(struct send_ctx *sctx)
 			goto out;
 		pm = get_pending_dir_moves(sctx, parent_ino);
 		if (pm)
-			tail_append_pending_moves(pm, &stack);
+			tail_append_pending_moves(sctx, pm, &stack);
 	}
 	return 0;
 
-- 
GitLab


From b34087157dd76e8d96e5e52808134a791ac61e57 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Wed, 21 Nov 2018 16:00:50 +0000
Subject: [PATCH 1295/1890] dma-direct: Make DIRECT_MAPPING_ERROR viable for
 SWIOTLB

With the overflow buffer removed, we no longer have a unique address
which is guaranteed not to be a valid DMA target to use as an error
token. The DIRECT_MAPPING_ERROR value of 0 tries to at least represent
an unlikely DMA target, but unfortunately there are already SWIOTLB
users with DMA-able memory at physical address 0 which now gets falsely
treated as a mapping failure and leads to all manner of misbehaviour.

The best we can do to mitigate that is flip DIRECT_MAPPING_ERROR to the
other commonly-used error value of all-bits-set, since the last single
byte of memory is by far the least-likely-valid DMA target.

Fixes: dff8d6c1ed58 ("swiotlb: remove the overflow buffer")
Reported-by: John Stultz <john.stultz@linaro.org>
Tested-by: John Stultz <john.stultz@linaro.org>
Acked-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/dma-direct.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
index bd73e7a914107..9e66bfe369aa0 100644
--- a/include/linux/dma-direct.h
+++ b/include/linux/dma-direct.h
@@ -5,7 +5,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/mem_encrypt.h>
 
-#define DIRECT_MAPPING_ERROR		0
+#define DIRECT_MAPPING_ERROR		(~(dma_addr_t)0)
 
 #ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA
 #include <asm/dma-direct.h>
-- 
GitLab


From cb216b84d6ea24fa10f1e7aac35de77246841041 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Wed, 21 Nov 2018 16:00:51 +0000
Subject: [PATCH 1296/1890] swiotlb: Skip cache maintenance on map error

If swiotlb_bounce_page() failed, calling arch_sync_dma_for_device() may
lead to such delights as performing cache maintenance on whatever
address phys_to_virt(SWIOTLB_MAP_ERROR) looks like, which is typically
outside the kernel memory map and goes about as well as expected.

Don't do that.

Fixes: a4a4330db46a ("swiotlb: add support for non-coherent DMA")
Tested-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 kernel/dma/swiotlb.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 5731daa09a32e..045930e32c0e9 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -679,7 +679,8 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 	}
 
 	if (!dev_is_dma_coherent(dev) &&
-	    (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
+	    (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0 &&
+	    dev_addr != DIRECT_MAPPING_ERROR)
 		arch_sync_dma_for_device(dev, phys, size, dir);
 
 	return dev_addr;
-- 
GitLab


From 2c307174ab77e34645e75e12827646e044d273c3 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Mon, 19 Nov 2018 13:31:10 -0800
Subject: [PATCH 1297/1890] xfs: flush removing page cache in
 xfs_reflink_remap_prep

On a sub-page block size filesystem, fsx is failing with a data
corruption after a series of operations involving copying a file
with the destination offset beyond EOF of the destination of the file:

8093(157 mod 256): TRUNCATE DOWN        from 0x7a120 to 0x50000 ******WWWW
8094(158 mod 256): INSERT 0x25000 thru 0x25fff  (0x1000 bytes)
8095(159 mod 256): COPY 0x18000 thru 0x1afff    (0x3000 bytes) to 0x2f400
8096(160 mod 256): WRITE    0x5da00 thru 0x651ff        (0x7800 bytes) HOLE
8097(161 mod 256): COPY 0x2000 thru 0x5fff      (0x4000 bytes) to 0x6fc00

The second copy here is beyond EOF, and it is to sub-page (4k) but
block aligned (1k) offset. The clone runs the EOF zeroing, landing
in a pre-existing post-eof delalloc extent. This zeroes the post-eof
extents in the page cache just fine, dirtying the pages correctly.

The problem is that xfs_reflink_remap_prep() now truncates the page
cache over the range that it is copying it to, and rounds that down
to cover the entire start page. This removes the dirty page over the
delalloc extent from the page cache without having written it back.
Hence later, when the page cache is flushed, the page at offset
0x6f000 has not been written back and hence exposes stale data,
which fsx trips over less than 10 operations later.

Fix this by changing xfs_reflink_remap_prep() to use
xfs_flush_unmap_range().

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_bmap_util.c |  2 +-
 fs/xfs/xfs_bmap_util.h |  3 +++
 fs/xfs/xfs_reflink.c   | 17 +++++++++++++----
 3 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 167ff4297e5c7..404e581f1ea1e 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1042,7 +1042,7 @@ xfs_unmap_extent(
 	goto out_unlock;
 }
 
-static int
+int
 xfs_flush_unmap_range(
 	struct xfs_inode	*ip,
 	xfs_off_t		offset,
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 87363d136bb61..7a78229cf1a79 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -80,4 +80,7 @@ int xfs_bmap_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip,
 			  int whichfork, xfs_extnum_t *nextents,
 			  xfs_filblks_t *count);
 
+int	xfs_flush_unmap_range(struct xfs_inode *ip, xfs_off_t offset,
+			      xfs_off_t len);
+
 #endif	/* __XFS_BMAP_UTIL_H__ */
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index c56bdbfcf7ae5..322a852ce284a 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1352,10 +1352,19 @@ xfs_reflink_remap_prep(
 	if (ret)
 		goto out_unlock;
 
-	/* Zap any page cache for the destination file's range. */
-	truncate_inode_pages_range(&inode_out->i_data,
-			round_down(pos_out, PAGE_SIZE),
-			round_up(pos_out + *len, PAGE_SIZE) - 1);
+	/*
+	 * If pos_out > EOF, we may have dirtied blocks between EOF and
+	 * pos_out. In that case, we need to extend the flush and unmap to cover
+	 * from EOF to the end of the copy length.
+	 */
+	if (pos_out > XFS_ISIZE(dest)) {
+		loff_t	flen = *len + (pos_out - XFS_ISIZE(dest));
+		ret = xfs_flush_unmap_range(dest, XFS_ISIZE(dest), flen);
+	} else {
+		ret = xfs_flush_unmap_range(dest, pos_out, *len);
+	}
+	if (ret)
+		goto out_unlock;
 
 	return 1;
 out_unlock:
-- 
GitLab


From 9230a0b65b47fe6856c4468ec0175c4987e5bede Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Mon, 19 Nov 2018 22:50:08 -0800
Subject: [PATCH 1298/1890] xfs: delalloc -> unwritten COW fork allocation can
 go wrong

Long saga. There have been days spent following this through dead end
after dead end in multi-GB event traces. This morning, after writing
a trace-cmd wrapper that enabled me to be more selective about XFS
trace points, I discovered that I could get just enough essential
tracepoints enabled that there was a 50:50 chance the fsx config
would fail at ~115k ops. If it didn't fail at op 115547, I stopped
fsx at op 115548 anyway.

That gave me two traces - one where the problem manifested, and one
where it didn't. After refining the traces to have the necessary
information, I found that in the failing case there was a real
extent in the COW fork compared to an unwritten extent in the
working case.

Walking back through the two traces to the point where the CWO fork
extents actually diverged, I found that the bad case had an extra
unwritten extent in it. This is likely because the bug it led me to
had triggered multiple times in those 115k ops, leaving stray
COW extents around. What I saw was a COW delalloc conversion to an
unwritten extent (as they should always be through
xfs_iomap_write_allocate()) resulted in a /written extent/:

xfs_writepage:        dev 259:0 ino 0x83 pgoff 0x17000 size 0x79a00 offset 0 length 0
xfs_iext_remove:      dev 259:0 ino 0x83 state RC|LF|RF|COW cur 0xffff888247b899c0/2 offset 32 block 152 count 20 flag 1 caller xfs_bmap_add_extent_delay_real
xfs_bmap_pre_update:  dev 259:0 ino 0x83 state RC|LF|RF|COW cur 0xffff888247b899c0/1 offset 1 block 4503599627239429 count 31 flag 0 caller xfs_bmap_add_extent_delay_real
xfs_bmap_post_update: dev 259:0 ino 0x83 state RC|LF|RF|COW cur 0xffff888247b899c0/1 offset 1 block 121 count 51 flag 0 caller xfs_bmap_add_ex

Basically, Cow fork before:

	0 1            32          52
	+H+DDDDDDDDDDDD+UUUUUUUUUUU+
	   PREV		RIGHT

COW delalloc conversion allocates:

	  1	       32
	  +uuuuuuuuuuuu+
	  NEW

And the result according to the xfs_bmap_post_update trace was:

	0 1            32          52
	+H+wwwwwwwwwwwwwwwwwwwwwwww+
	   PREV

Which is clearly wrong - it should be a merged unwritten extent,
not an unwritten extent.

That lead me to look at the LEFT_FILLING|RIGHT_FILLING|RIGHT_CONTIG
case in xfs_bmap_add_extent_delay_real(), and sure enough, there's
the bug.

It takes the old delalloc extent (PREV) and adds the length of the
RIGHT extent to it, takes the start block from NEW, removes the
RIGHT extent and then updates PREV with the new extent.

What it fails to do is update PREV.br_state. For delalloc, this is
always XFS_EXT_NORM, while in this case we are converting the
delayed allocation to unwritten, so it needs to be updated to
XFS_EXT_UNWRITTEN. This LF|RF|RC case does not do this, and so
the resultant extent is always written.

And that's the bug I've been chasing for a week - a bmap btree bug,
not a reflink/dedupe/copy_file_range bug, but a BMBT bug introduced
with the recent in core extent tree scalability enhancements.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_bmap.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 74d7228e755b3..19e921d1586f2 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -1694,10 +1694,13 @@ xfs_bmap_add_extent_delay_real(
 	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
 		/*
 		 * Filling in all of a previously delayed allocation extent.
-		 * The right neighbor is contiguous, the left is not.
+		 * The right neighbor is contiguous, the left is not. Take care
+		 * with delay -> unwritten extent allocation here because the
+		 * delalloc record we are overwriting is always written.
 		 */
 		PREV.br_startblock = new->br_startblock;
 		PREV.br_blockcount += RIGHT.br_blockcount;
+		PREV.br_state = new->br_state;
 
 		xfs_iext_next(ifp, &bma->icur);
 		xfs_iext_remove(bma->ip, &bma->icur, state);
-- 
GitLab


From 0929d8580071c6a1cec1a7916a8f674c243ceee1 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Mon, 19 Nov 2018 13:31:10 -0800
Subject: [PATCH 1299/1890] iomap: FUA is wrong for DIO O_DSYNC writes into
 unwritten extents

When we write into an unwritten extent via direct IO, we dirty
metadata on IO completion to convert the unwritten extent to
written. However, when we do the FUA optimisation checks, the inode
may be clean and so we issue a FUA write into the unwritten extent.
This means we then bypass the generic_write_sync() call after
unwritten extent conversion has ben done and we don't force the
modified metadata to stable storage.

This violates O_DSYNC semantics. The window of exposure is a single
IO, as the next DIO write will see the inode has dirty metadata and
hence will not use the FUA optimisation. Calling
generic_write_sync() after completion of the second IO will also
sync the first write and it's metadata.

Fix this by avoiding the FUA optimisation when writing to unwritten
extents.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/iomap.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/fs/iomap.c b/fs/iomap.c
index 64ce240217a18..72f3864a2e6b9 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -1596,12 +1596,13 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
 
 	if (iomap->flags & IOMAP_F_NEW) {
 		need_zeroout = true;
-	} else {
+	} else if (iomap->type == IOMAP_MAPPED) {
 		/*
-		 * Use a FUA write if we need datasync semantics, this
-		 * is a pure data IO that doesn't require any metadata
-		 * updates and the underlying device supports FUA. This
-		 * allows us to avoid cache flushes on IO completion.
+		 * Use a FUA write if we need datasync semantics, this is a pure
+		 * data IO that doesn't require any metadata updates (including
+		 * after IO completion such as unwritten extent conversion) and
+		 * the underlying device supports FUA. This allows us to avoid
+		 * cache flushes on IO completion.
 		 */
 		if (!(iomap->flags & (IOMAP_F_SHARED|IOMAP_F_DIRTY)) &&
 		    (dio->flags & IOMAP_DIO_WRITE_FUA) &&
-- 
GitLab


From b450672fb66b4a991a5b55ee24209ac7ae7690ce Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Mon, 19 Nov 2018 13:31:10 -0800
Subject: [PATCH 1300/1890] iomap: sub-block dio needs to zeroout beyond EOF

If we are doing sub-block dio that extends EOF, we need to zero
the unused tail of the block to initialise the data in it it. If we
do not zero the tail of the block, then an immediate mmap read of
the EOF block will expose stale data beyond EOF to userspace. Found
with fsx running sub-block DIO sizes vs MAPREAD/MAPWRITE operations.

Fix this by detecting if the end of the DIO write is beyond EOF
and zeroing the tail if necessary.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/iomap.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/fs/iomap.c b/fs/iomap.c
index 72f3864a2e6b9..77c214194edf6 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -1677,7 +1677,14 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
 		dio->submit.cookie = submit_bio(bio);
 	} while (nr_pages);
 
-	if (need_zeroout) {
+	/*
+	 * We need to zeroout the tail of a sub-block write if the extent type
+	 * requires zeroing or the write extends beyond EOF. If we don't zero
+	 * the block tail in the latter case, we can expose stale data via mmap
+	 * reads of the EOF block.
+	 */
+	if (need_zeroout ||
+	    ((dio->flags & IOMAP_DIO_WRITE) && pos >= i_size_read(inode))) {
 		/* zero out from the end of the write to the end of the block */
 		pad = pos & (fs_block_size - 1);
 		if (pad)
-- 
GitLab


From 4721a6010990971440b4ffefbdf014976b8eda2f Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Mon, 19 Nov 2018 13:31:11 -0800
Subject: [PATCH 1301/1890] iomap: dio data corruption and spurious errors when
 pipes fill

When doing direct IO to a pipe for do_splice_direct(), then pipe is
trivial to fill up and overflow as it can only hold 16 pages. At
this point bio_iov_iter_get_pages() then returns -EFAULT, and we
abort the IO submission process. Unfortunately, iomap_dio_rw()
propagates the error back up the stack.

The error is converted from the EFAULT to EAGAIN in
generic_file_splice_read() to tell the splice layers that the pipe
is full. do_splice_direct() completely fails to handle EAGAIN errors
(it aborts on error) and returns EAGAIN to the caller.

copy_file_write() then completely fails to handle EAGAIN as well,
and so returns EAGAIN to userspace, having failed to copy the data
it was asked to.

Avoid this whole steaming pile of fail by having iomap_dio_rw()
silently swallow EFAULT errors and so do short reads.

To make matters worse, iomap_dio_actor() has a stale data exposure
bug bio_iov_iter_get_pages() fails - it does not zero the tail block
that it may have been left uncovered by partial IO. Fix the error
handling case to drop to the sub-block zeroing rather than
immmediately returning the -EFAULT error.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/iomap.c | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/fs/iomap.c b/fs/iomap.c
index 77c214194edf6..d51e7a2ae641c 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -1580,7 +1580,7 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
 	struct bio *bio;
 	bool need_zeroout = false;
 	bool use_fua = false;
-	int nr_pages, ret;
+	int nr_pages, ret = 0;
 	size_t copied = 0;
 
 	if ((pos | length | align) & ((1 << blkbits) - 1))
@@ -1645,8 +1645,14 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
 
 		ret = bio_iov_iter_get_pages(bio, &iter);
 		if (unlikely(ret)) {
+			/*
+			 * We have to stop part way through an IO. We must fall
+			 * through to the sub-block tail zeroing here, otherwise
+			 * this short IO may expose stale data in the tail of
+			 * the block we haven't written data to.
+			 */
 			bio_put(bio);
-			return copied ? copied : ret;
+			goto zero_tail;
 		}
 
 		n = bio->bi_iter.bi_size;
@@ -1683,6 +1689,7 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
 	 * the block tail in the latter case, we can expose stale data via mmap
 	 * reads of the EOF block.
 	 */
+zero_tail:
 	if (need_zeroout ||
 	    ((dio->flags & IOMAP_DIO_WRITE) && pos >= i_size_read(inode))) {
 		/* zero out from the end of the write to the end of the block */
@@ -1690,7 +1697,7 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
 		if (pad)
 			iomap_dio_zero(dio, iomap, pos, fs_block_size - pad);
 	}
-	return copied;
+	return copied ? copied : ret;
 }
 
 static loff_t
@@ -1865,6 +1872,15 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 				dio->wait_for_completion = true;
 				ret = 0;
 			}
+
+			/*
+			 * Splicing to pipes can fail on a full pipe. We have to
+			 * swallow this to make it look like a short IO
+			 * otherwise the higher splice layers will completely
+			 * mishandle the error and stop moving data.
+			 */
+			if (ret == -EFAULT)
+				ret = 0;
 			break;
 		}
 		pos += ret;
-- 
GitLab


From 494633fac7896afc2bce6f83fe7319946270540b Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Mon, 19 Nov 2018 13:31:12 -0800
Subject: [PATCH 1302/1890] vfs: vfs_dedupe_file_range() doesn't return
 EOPNOTSUPP

It returns EINVAL when the operation is not supported by the
filesystem. Fix it to return EOPNOTSUPP to be consistent with
the man page and clone_file_range().

Clean up the inconsistent error return handling while I'm there.
(I know, lipstick on a pig, but every little bit helps...)

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/read_write.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/fs/read_write.c b/fs/read_write.c
index bfcb4ced5664c..4dae0399c75a7 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -2094,17 +2094,18 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
 	off = same->src_offset;
 	len = same->src_length;
 
-	ret = -EISDIR;
 	if (S_ISDIR(src->i_mode))
-		goto out;
+		return -EISDIR;
 
-	ret = -EINVAL;
 	if (!S_ISREG(src->i_mode))
-		goto out;
+		return -EINVAL;
+
+	if (!file->f_op->remap_file_range)
+		return -EOPNOTSUPP;
 
 	ret = remap_verify_area(file, off, len, false);
 	if (ret < 0)
-		goto out;
+		return ret;
 	ret = 0;
 
 	if (off + len > i_size_read(src))
@@ -2147,10 +2148,8 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
 		fdput(dst_fd);
 next_loop:
 		if (fatal_signal_pending(current))
-			goto out;
+			break;
 	}
-
-out:
 	return ret;
 }
 EXPORT_SYMBOL(vfs_dedupe_file_range);
-- 
GitLab


From 8c110d43c6bca4b24dd13272a9d4e0ba6f2ec957 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Wed, 21 Nov 2018 08:06:37 -0800
Subject: [PATCH 1303/1890] iomap: readpages doesn't zero page tail beyond EOF

When we read the EOF page of the file via readpages, we need
to zero the region beyond EOF that we either do not read or
should not contain data so that mmap does not expose stale data to
user applications.

However, iomap_adjust_read_range() fails to detect EOF correctly,
and so fsx on 1k block size filesystems fails very quickly with
mapreads exposing data beyond EOF. There are two problems here.

Firstly, when calculating the end block of the EOF byte, we have
to round the size by one to avoid a block aligned EOF from reporting
a block too large. i.e. a size of 1024 bytes is 1 block, which in
index terms is block 0. Therefore we have to calculate the end block
from (isize - 1), not isize.

The second bug is determining if the current page spans EOF, and so
whether we need split it into two half, one for the IO, and the
other for zeroing. Unfortunately, the code that checks whether
we should split the block doesn't actually check if we span EOF, it
just checks if the read spans the /offset in the page/ that EOF
sits on. So it splits every read into two if EOF is not page
aligned, regardless of whether we are reading the EOF block or not.

Hence we need to restrict the "does the read span EOF" check to
just the page that spans EOF, not every page we read.

This patch results in correct EOF detection through readpages:

xfs_vm_readpages:     dev 259:0 ino 0x43 nr_pages 24
xfs_iomap_found:      dev 259:0 ino 0x43 size 0x66c00 offset 0x4f000 count 98304 type hole startoff 0x13c startblock 1368 blockcount 0x4
iomap_readpage_actor: orig pos 323584 pos 323584, length 4096, poff 0 plen 4096, isize 420864
xfs_iomap_found:      dev 259:0 ino 0x43 size 0x66c00 offset 0x50000 count 94208 type hole startoff 0x140 startblock 1497 blockcount 0x5c
iomap_readpage_actor: orig pos 327680 pos 327680, length 94208, poff 0 plen 4096, isize 420864
iomap_readpage_actor: orig pos 331776 pos 331776, length 90112, poff 0 plen 4096, isize 420864
iomap_readpage_actor: orig pos 335872 pos 335872, length 86016, poff 0 plen 4096, isize 420864
iomap_readpage_actor: orig pos 339968 pos 339968, length 81920, poff 0 plen 4096, isize 420864
iomap_readpage_actor: orig pos 344064 pos 344064, length 77824, poff 0 plen 4096, isize 420864
iomap_readpage_actor: orig pos 348160 pos 348160, length 73728, poff 0 plen 4096, isize 420864
iomap_readpage_actor: orig pos 352256 pos 352256, length 69632, poff 0 plen 4096, isize 420864
iomap_readpage_actor: orig pos 356352 pos 356352, length 65536, poff 0 plen 4096, isize 420864
iomap_readpage_actor: orig pos 360448 pos 360448, length 61440, poff 0 plen 4096, isize 420864
iomap_readpage_actor: orig pos 364544 pos 364544, length 57344, poff 0 plen 4096, isize 420864
iomap_readpage_actor: orig pos 368640 pos 368640, length 53248, poff 0 plen 4096, isize 420864
iomap_readpage_actor: orig pos 372736 pos 372736, length 49152, poff 0 plen 4096, isize 420864
iomap_readpage_actor: orig pos 376832 pos 376832, length 45056, poff 0 plen 4096, isize 420864
iomap_readpage_actor: orig pos 380928 pos 380928, length 40960, poff 0 plen 4096, isize 420864
iomap_readpage_actor: orig pos 385024 pos 385024, length 36864, poff 0 plen 4096, isize 420864
iomap_readpage_actor: orig pos 389120 pos 389120, length 32768, poff 0 plen 4096, isize 420864
iomap_readpage_actor: orig pos 393216 pos 393216, length 28672, poff 0 plen 4096, isize 420864
iomap_readpage_actor: orig pos 397312 pos 397312, length 24576, poff 0 plen 4096, isize 420864
iomap_readpage_actor: orig pos 401408 pos 401408, length 20480, poff 0 plen 4096, isize 420864
iomap_readpage_actor: orig pos 405504 pos 405504, length 16384, poff 0 plen 4096, isize 420864
iomap_readpage_actor: orig pos 409600 pos 409600, length 12288, poff 0 plen 4096, isize 420864
iomap_readpage_actor: orig pos 413696 pos 413696, length 8192, poff 0 plen 4096, isize 420864
iomap_readpage_actor: orig pos 417792 pos 417792, length 4096, poff 0 plen 3072, isize 420864
iomap_readpage_actor: orig pos 420864 pos 420864, length 1024, poff 3072 plen 1024, isize 420864

As you can see, it now does full page reads until the last one which
is split correctly at the block aligned EOF, reading 3072 bytes and
zeroing the last 1024 bytes. The original version of the patch got
this right, but it got another case wrong.

The EOF detection crossing really needs to the the original length
as plen, while it starts at the end of the block, will be shortened
as up-to-date blocks are found on the page. This means "orig_pos +
plen" no longer points to the end of the page, and so will not
correctly detect EOF crossing. Hence we have to use the length
passed in to detect this partial page case:

xfs_filemap_fault:    dev 259:1 ino 0x43  write_fault 0
xfs_vm_readpage:      dev 259:1 ino 0x43 nr_pages 1
xfs_iomap_found:      dev 259:1 ino 0x43 size 0x2cc00 offset 0x2c000 count 4096 type hole startoff 0xb0 startblock 282 blockcount 0x4
iomap_readpage_actor: orig pos 180224 pos 181248, length 4096, poff 1024 plen 2048, isize 183296
xfs_iomap_found:      dev 259:1 ino 0x43 size 0x2cc00 offset 0x2cc00 count 1024 type hole startoff 0xb3 startblock 285 blockcount 0x1
iomap_readpage_actor: orig pos 183296 pos 183296, length 1024, poff 3072 plen 1024, isize 183296

Heere we see a trace where the first block on the EOF page is up to
date, hence poff = 1024 bytes. The offset into the page of EOF is
3072, so the range we want to read is 1024 - 3071, and the range we
want to zero is 3072 - 4095. You can see this is split correctly
now.

This fixes the stale data beyond EOF problem that fsx quickly
uncovers on 1k block size filesystems.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/iomap.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/fs/iomap.c b/fs/iomap.c
index d51e7a2ae641c..3ffb776fbebe3 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -142,13 +142,14 @@ static void
 iomap_adjust_read_range(struct inode *inode, struct iomap_page *iop,
 		loff_t *pos, loff_t length, unsigned *offp, unsigned *lenp)
 {
+	loff_t orig_pos = *pos;
+	loff_t isize = i_size_read(inode);
 	unsigned block_bits = inode->i_blkbits;
 	unsigned block_size = (1 << block_bits);
 	unsigned poff = offset_in_page(*pos);
 	unsigned plen = min_t(loff_t, PAGE_SIZE - poff, length);
 	unsigned first = poff >> block_bits;
 	unsigned last = (poff + plen - 1) >> block_bits;
-	unsigned end = offset_in_page(i_size_read(inode)) >> block_bits;
 
 	/*
 	 * If the block size is smaller than the page size we need to check the
@@ -183,8 +184,12 @@ iomap_adjust_read_range(struct inode *inode, struct iomap_page *iop,
 	 * handle both halves separately so that we properly zero data in the
 	 * page cache for blocks that are entirely outside of i_size.
 	 */
-	if (first <= end && last > end)
-		plen -= (last - end) * block_size;
+	if (orig_pos <= isize && orig_pos + length > isize) {
+		unsigned end = offset_in_page(isize - 1) >> block_bits;
+
+		if (first <= end && last > end)
+			plen -= (last - end) * block_size;
+	}
 
 	*offp = poff;
 	*lenp = plen;
-- 
GitLab


From c50cbd85cd7027d32ac5945bb60217936b4f7eaf Mon Sep 17 00:00:00 2001
From: "Dmitry V. Levin" <ldv@altlinux.org>
Date: Wed, 21 Nov 2018 22:14:39 +0300
Subject: [PATCH 1304/1890] mips: fix mips_get_syscall_arg o32 check

When checking for TIF_32BIT_REGS flag, mips_get_syscall_arg() should
use the task specified as its argument instead of the current task.

This potentially affects all syscall_get_arguments() users
who specify tasks different from the current.

Fixes: c0ff3c53d4f99 ("MIPS: Enable HAVE_ARCH_TRACEHOOK.")
Signed-off-by: Dmitry V. Levin <ldv@altlinux.org>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Patchwork: https://patchwork.linux-mips.org/patch/21185/
Cc: Elvira Khabirova <lineprinter@altlinux.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Cc: stable@vger.kernel.org # v3.13+
---
 arch/mips/include/asm/syscall.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/include/asm/syscall.h b/arch/mips/include/asm/syscall.h
index 0170602a1e4e3..6cf8ffb5367ec 100644
--- a/arch/mips/include/asm/syscall.h
+++ b/arch/mips/include/asm/syscall.h
@@ -73,7 +73,7 @@ static inline unsigned long mips_get_syscall_arg(unsigned long *arg,
 #ifdef CONFIG_64BIT
 	case 4: case 5: case 6: case 7:
 #ifdef CONFIG_MIPS32_O32
-		if (test_thread_flag(TIF_32BIT_REGS))
+		if (test_tsk_thread_flag(task, TIF_32BIT_REGS))
 			return get_user(*arg, (int *)usp + n);
 		else
 #endif
-- 
GitLab


From 6b04114f6fae5e84d33404c2970b1949c032546e Mon Sep 17 00:00:00 2001
From: Alexey Brodkin <abrodkin@synopsys.com>
Date: Tue, 20 Nov 2018 13:30:19 +0300
Subject: [PATCH 1305/1890] arc: [devboards] Add support of NFSv3 ACL

By default NFSv3 doesn't support ACL (Access Control Lists)
which might be quite convenient to have so that
mounted NFS behaves exactly as any other local file-system.

In particular missing support of ACL makes umask useless.
This among other thigs fixes Glibc's "nptl/tst-umask1".

Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
Cc: Cupertino Miranda <cmiranda@synopsys.com>
Cc: stable@vger.kernel.org	#4.14+
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/configs/axs101_defconfig          | 1 +
 arch/arc/configs/axs103_defconfig          | 1 +
 arch/arc/configs/axs103_smp_defconfig      | 1 +
 arch/arc/configs/hsdk_defconfig            | 1 +
 arch/arc/configs/nps_defconfig             | 1 +
 arch/arc/configs/nsimosci_defconfig        | 1 +
 arch/arc/configs/nsimosci_hs_defconfig     | 1 +
 arch/arc/configs/nsimosci_hs_smp_defconfig | 1 +
 arch/arc/configs/vdk_hs38_defconfig        | 1 +
 arch/arc/configs/vdk_hs38_smp_defconfig    | 1 +
 10 files changed, 10 insertions(+)

diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig
index 41bc08be6a3b4..8c23bd086cd07 100644
--- a/arch/arc/configs/axs101_defconfig
+++ b/arch/arc/configs/axs101_defconfig
@@ -95,6 +95,7 @@ CONFIG_VFAT_FS=y
 CONFIG_NTFS_FS=y
 CONFIG_TMPFS=y
 CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y
 # CONFIG_ENABLE_WARN_DEPRECATED is not set
diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig
index 1e1c4a8011b52..666314fffc601 100644
--- a/arch/arc/configs/axs103_defconfig
+++ b/arch/arc/configs/axs103_defconfig
@@ -94,6 +94,7 @@ CONFIG_VFAT_FS=y
 CONFIG_NTFS_FS=y
 CONFIG_TMPFS=y
 CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y
 # CONFIG_ENABLE_WARN_DEPRECATED is not set
diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig
index 6b0c0cfd5c304..429832b8560b8 100644
--- a/arch/arc/configs/axs103_smp_defconfig
+++ b/arch/arc/configs/axs103_smp_defconfig
@@ -97,6 +97,7 @@ CONFIG_VFAT_FS=y
 CONFIG_NTFS_FS=y
 CONFIG_TMPFS=y
 CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y
 # CONFIG_ENABLE_WARN_DEPRECATED is not set
diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig
index eca10b8baea5a..87b23b7fb7814 100644
--- a/arch/arc/configs/hsdk_defconfig
+++ b/arch/arc/configs/hsdk_defconfig
@@ -68,6 +68,7 @@ CONFIG_EXT3_FS=y
 CONFIG_VFAT_FS=y
 CONFIG_TMPFS=y
 CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y
 # CONFIG_ENABLE_WARN_DEPRECATED is not set
diff --git a/arch/arc/configs/nps_defconfig b/arch/arc/configs/nps_defconfig
index 31ba224bbfb47..ae7a0d8be98d5 100644
--- a/arch/arc/configs/nps_defconfig
+++ b/arch/arc/configs/nps_defconfig
@@ -73,6 +73,7 @@ CONFIG_PROC_KCORE=y
 CONFIG_TMPFS=y
 # CONFIG_MISC_FILESYSTEMS is not set
 CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
 CONFIG_ROOT_NFS=y
 CONFIG_DEBUG_INFO=y
 # CONFIG_ENABLE_WARN_DEPRECATED is not set
diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig
index f14eeff7d3084..ad77f20e5aa6e 100644
--- a/arch/arc/configs/nsimosci_defconfig
+++ b/arch/arc/configs/nsimosci_defconfig
@@ -66,5 +66,6 @@ CONFIG_EXT2_FS_XATTR=y
 CONFIG_TMPFS=y
 # CONFIG_MISC_FILESYSTEMS is not set
 CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
 # CONFIG_ENABLE_WARN_DEPRECATED is not set
 # CONFIG_ENABLE_MUST_CHECK is not set
diff --git a/arch/arc/configs/nsimosci_hs_defconfig b/arch/arc/configs/nsimosci_hs_defconfig
index 025298a483056..1638e5bc96724 100644
--- a/arch/arc/configs/nsimosci_hs_defconfig
+++ b/arch/arc/configs/nsimosci_hs_defconfig
@@ -65,5 +65,6 @@ CONFIG_EXT2_FS_XATTR=y
 CONFIG_TMPFS=y
 # CONFIG_MISC_FILESYSTEMS is not set
 CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
 # CONFIG_ENABLE_WARN_DEPRECATED is not set
 # CONFIG_ENABLE_MUST_CHECK is not set
diff --git a/arch/arc/configs/nsimosci_hs_smp_defconfig b/arch/arc/configs/nsimosci_hs_smp_defconfig
index df7b77b13b823..11cfbdb0f4415 100644
--- a/arch/arc/configs/nsimosci_hs_smp_defconfig
+++ b/arch/arc/configs/nsimosci_hs_smp_defconfig
@@ -76,6 +76,7 @@ CONFIG_EXT2_FS_XATTR=y
 CONFIG_TMPFS=y
 # CONFIG_MISC_FILESYSTEMS is not set
 CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
 # CONFIG_ENABLE_WARN_DEPRECATED is not set
 # CONFIG_ENABLE_MUST_CHECK is not set
 CONFIG_FTRACE=y
diff --git a/arch/arc/configs/vdk_hs38_defconfig b/arch/arc/configs/vdk_hs38_defconfig
index db47c3541f159..1e59a2e9c602f 100644
--- a/arch/arc/configs/vdk_hs38_defconfig
+++ b/arch/arc/configs/vdk_hs38_defconfig
@@ -85,6 +85,7 @@ CONFIG_NTFS_FS=y
 CONFIG_TMPFS=y
 CONFIG_JFFS2_FS=y
 CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y
 # CONFIG_ENABLE_WARN_DEPRECATED is not set
diff --git a/arch/arc/configs/vdk_hs38_smp_defconfig b/arch/arc/configs/vdk_hs38_smp_defconfig
index a8ac5e917d9a5..b5c3f6c54b032 100644
--- a/arch/arc/configs/vdk_hs38_smp_defconfig
+++ b/arch/arc/configs/vdk_hs38_smp_defconfig
@@ -90,6 +90,7 @@ CONFIG_NTFS_FS=y
 CONFIG_TMPFS=y
 CONFIG_JFFS2_FS=y
 CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y
 # CONFIG_ENABLE_WARN_DEPRECATED is not set
-- 
GitLab


From c9e0ab86b2e03154bb898cd2f851827783224727 Mon Sep 17 00:00:00 2001
From: Lyude Paul <lyude@redhat.com>
Date: Mon, 19 Nov 2018 15:00:10 +0000
Subject: [PATCH 1306/1890] drm/amd/dm: Don't forget to attach MST encoders
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The change fixed huge delay in SST daisy chain and S3 soft hang
observed in 4.19 kernel rebase.

Regression point in drm:
drm/fb-helper: Eliminate the .best_encoder() usage

The aux sequence is altered due to the failure in
drm_connector_for_each_possible_encoder(). The failure is
caused by missing attached encoder in the process of adding
MST connector.
Â 
drm_dp_send_enum_path_resources() aux transaction is pushed after
mode probe, which causes conflict to drm_dp_mst_i2c_xfer(),
leading to the transaction timeout.

Signed-off-by: Lyude Paul <lyude@redhat.com>
Reviewed-by: Jerry (Fangzhi) Zuo <Jerry.Zuo@amd.com>
Cc: Stable <stable@vger.kernel.org>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index d02c32a1039c0..0cca1809fdcd3 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -342,6 +342,8 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
 		master->connector_id);
 
 	aconnector->mst_encoder = dm_dp_create_fake_mst_encoder(master);
+	drm_connector_attach_encoder(&aconnector->base,
+				     &aconnector->mst_encoder->base);
 
 	/*
 	 * TODO: understand why this one is needed
-- 
GitLab


From 04ac4b0ed412f65230b456fcd9aa07e13befff89 Mon Sep 17 00:00:00 2001
From: Lyude Paul <lyude@redhat.com>
Date: Mon, 19 Nov 2018 19:44:55 +0000
Subject: [PATCH 1307/1890] drm/amd/dm: Understand why attaching path/tile
 properties are needed

Path property is used for userspace to know what MST connector goes to what actual DRM DisplayPort connector, the tiling property is for tiling configurations. Not sure what else there is to figure out.

Signed-off-by: Lyude Paul <lyude@redhat.com>
Reviewed-by: Jerry (Fangzhi) Zuo <Jerry.Zuo@amd.com>
Cc: Stable <stable@vger.kernel.org>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index 0cca1809fdcd3..1b0d209d83676 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -345,9 +345,6 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
 	drm_connector_attach_encoder(&aconnector->base,
 				     &aconnector->mst_encoder->base);
 
-	/*
-	 * TODO: understand why this one is needed
-	 */
 	drm_object_attach_property(
 		&connector->base,
 		dev->mode_config.path_property,
-- 
GitLab


From 074fca3a18e7e1e0d4d7dcc9d7badc43b90232f4 Mon Sep 17 00:00:00 2001
From: Majd Dibbiny <majd@mellanox.com>
Date: Mon, 5 Nov 2018 08:07:37 +0200
Subject: [PATCH 1308/1890] RDMA/mlx5: Fix fence type for IB_WR_LOCAL_INV WR

Currently, for IB_WR_LOCAL_INV WR, when the next fence is None, the
current fence will be SMALL instead of Normal Fence.

Without this patch krping doesn't work on CX-5 devices and throws
following error:

The error messages are from CX5 driver are: (from server side)
[ 710.434014] mlx5_0:dump_cqe:278:(pid 2712): dump error cqe
[ 710.434016] 00000000 00000000 00000000 00000000
[ 710.434016] 00000000 00000000 00000000 00000000
[ 710.434017] 00000000 00000000 00000000 00000000
[ 710.434018] 00000000 93003204 100000b8 000524d2
[ 710.434019] krping: cq completion failed with wr_id 0 status 4 opcode 128 vender_err 32

Fixed the logic to set the correct fence type.

Fixes: 6e8484c5cf07 ("RDMA/mlx5: set UMR wqe fence according to HCA cap")
Signed-off-by: Majd Dibbiny <majd@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/mlx5/qp.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 6841c0f9237fb..8c74afc91a479 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -4678,17 +4678,18 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 			goto out;
 		}
 
-		if (wr->opcode == IB_WR_LOCAL_INV ||
-		    wr->opcode == IB_WR_REG_MR) {
+		if (wr->opcode == IB_WR_REG_MR) {
 			fence = dev->umr_fence;
 			next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
-		} else if (wr->send_flags & IB_SEND_FENCE) {
-			if (qp->next_fence)
-				fence = MLX5_FENCE_MODE_SMALL_AND_FENCE;
-			else
-				fence = MLX5_FENCE_MODE_FENCE;
-		} else {
-			fence = qp->next_fence;
+		} else  {
+			if (wr->send_flags & IB_SEND_FENCE) {
+				if (qp->next_fence)
+					fence = MLX5_FENCE_MODE_SMALL_AND_FENCE;
+				else
+					fence = MLX5_FENCE_MODE_FENCE;
+			} else {
+				fence = qp->next_fence;
+			}
 		}
 
 		switch (ibqp->qp_type) {
-- 
GitLab


From d52ef88a9f4be523425730da3239cf87bee936da Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@mellanox.com>
Date: Mon, 19 Nov 2018 09:58:24 +0200
Subject: [PATCH 1309/1890] RDMA/core: Add GIDs while changing MAC addr only
 for registered ndev

Currently when MAC address is changed, regardless of the netdev reg_state,
GID entries are removed and added to reflect the new MAC address and new
default GID entries.

When a bonding device is used and the underlying PCI device is removed
several netdevice events are generated. Two events of the interest are
CHANGEADDR and UNREGISTER event on lower(slave) netdevice of the bond
netdevice.

Sometimes CHANGEADDR event is generated when netdev state is
UNREGISTERING (after UNREGISTER event is generated). In this scenario, GID
entries for default GIDs are added and never deleted because GID entries
are deleted only when netdev state is < UNREGISTERED.

This leads to non zero reference count on the netdevice. Due to this, PCI
device unbind operation is getting stuck.

To avoid it, when changing mac address, add GID entries only if netdev is
in REGISTERED state.

Fixes: 03db3a2d81e6 ("IB/core: Add RoCE GID table management")
Signed-off-by: Parav Pandit <parav@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/roce_gid_mgmt.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c
index ee366199b169c..25d43c8f1c2a8 100644
--- a/drivers/infiniband/core/roce_gid_mgmt.c
+++ b/drivers/infiniband/core/roce_gid_mgmt.c
@@ -767,8 +767,10 @@ static int netdevice_event(struct notifier_block *this, unsigned long event,
 
 	case NETDEV_CHANGEADDR:
 		cmds[0] = netdev_del_cmd;
-		cmds[1] = add_default_gid_cmd;
-		cmds[2] = add_cmd;
+		if (ndev->reg_state == NETREG_REGISTERED) {
+			cmds[1] = add_default_gid_cmd;
+			cmds[2] = add_cmd;
+		}
 		break;
 
 	case NETDEV_CHANGEUPPER:
-- 
GitLab


From 3c4b1419c33c2417836a63f8126834ee36968321 Mon Sep 17 00:00:00 2001
From: Selvin Xavier <selvin.xavier@broadcom.com>
Date: Wed, 21 Nov 2018 00:05:00 -0800
Subject: [PATCH 1310/1890] RDMA/bnxt_re: Fix system hang when registration
 with L2 driver fails

Driver doesn't release rtnl lock if registration with
L2 driver (bnxt_re_register_netdev) fais and this causes
hang while requesting for the next lock.

[  371.635416] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  371.635417] kworker/u48:1   D    0   634      2 0x80000000
[  371.635423] Workqueue: bnxt_re bnxt_re_task [bnxt_re]
[  371.635424] Call Trace:
[  371.635426]  ? __schedule+0x36b/0xbd0
[  371.635429]  schedule+0x39/0x90
[  371.635430]  schedule_preempt_disabled+0x11/0x20
[  371.635431]  __mutex_lock+0x45b/0x9c0
[  371.635433]  ? __mutex_lock+0x16d/0x9c0
[  371.635435]  ? bnxt_re_ib_reg+0x2b/0xb30 [bnxt_re]
[  371.635438]  ? wake_up_klogd+0x37/0x40
[  371.635442]  bnxt_re_ib_reg+0x2b/0xb30 [bnxt_re]
[  371.635447]  bnxt_re_task+0xfd/0x180 [bnxt_re]
[  371.635449]  process_one_work+0x216/0x5b0
[  371.635450]  ? process_one_work+0x189/0x5b0
[  371.635453]  worker_thread+0x4e/0x3d0
[  371.635455]  kthread+0x10e/0x140
[  371.635456]  ? process_one_work+0x5b0/0x5b0
[  371.635458]  ? kthread_stop+0x220/0x220
[  371.635460]  ret_from_fork+0x3a/0x50
[  371.635477] INFO: task NetworkManager:1228 blocked for more than 120 seconds.
[  371.635478]       Tainted: G    B      OE     4.20.0-rc1+ #42
[  371.635479] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.

Release the rtnl_lock correctly in the failure path.

Fixes: de5c95d0f518 ("RDMA/bnxt_re: Fix system crash during RDMA resource initialization")
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/bnxt_re/main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index cf22826542100..bd5ded5809f5b 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -1268,6 +1268,7 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
 	/* Registered a new RoCE device instance to netdev */
 	rc = bnxt_re_register_netdev(rdev);
 	if (rc) {
+		rtnl_unlock();
 		pr_err("Failed to register with netedev: %#x\n", rc);
 		return -EINVAL;
 	}
-- 
GitLab


From a6c66d6a08b88cc10aca9d3f65cfae31e7652a99 Mon Sep 17 00:00:00 2001
From: Selvin Xavier <selvin.xavier@broadcom.com>
Date: Wed, 21 Nov 2018 00:05:01 -0800
Subject: [PATCH 1311/1890] RDMA/bnxt_re: Avoid accessing the device structure
 after it is freed

When bnxt_re_ib_reg returns failure, the device structure gets
freed. Driver tries to access the device pointer
after it is freed.

[ 4871.034744] Failed to register with netedev: 0xffffffa1
[ 4871.034765] infiniband (null): Failed to register with IB: 0xffffffea
[ 4871.046430] ==================================================================
[ 4871.046437] BUG: KASAN: use-after-free in bnxt_re_task+0x63/0x180 [bnxt_re]
[ 4871.046439] Write of size 4 at addr ffff880fa8406f48 by task kworker/u48:2/17813

[ 4871.046443] CPU: 20 PID: 17813 Comm: kworker/u48:2 Kdump: loaded Tainted: G B OE  4.20.0-rc1+ #42
[ 4871.046444] Hardware name: Dell Inc. PowerEdge R730/0599V5, BIOS 1.0.4 08/28/2014
[ 4871.046447] Workqueue: bnxt_re bnxt_re_task [bnxt_re]
[ 4871.046449] Call Trace:
[ 4871.046454]  dump_stack+0x91/0xeb
[ 4871.046458]  print_address_description+0x6a/0x2a0
[ 4871.046461]  kasan_report+0x176/0x2d0
[ 4871.046463]  ? bnxt_re_task+0x63/0x180 [bnxt_re]
[ 4871.046466]  bnxt_re_task+0x63/0x180 [bnxt_re]
[ 4871.046470]  process_one_work+0x216/0x5b0
[ 4871.046471]  ? process_one_work+0x189/0x5b0
[ 4871.046475]  worker_thread+0x4e/0x3d0
[ 4871.046479]  kthread+0x10e/0x140
[ 4871.046480]  ? process_one_work+0x5b0/0x5b0
[ 4871.046482]  ? kthread_stop+0x220/0x220
[ 4871.046486]  ret_from_fork+0x3a/0x50

[ 4871.046492] The buggy address belongs to the page:
[ 4871.046494] page:ffffea003ea10180 count:0 mapcount:0 mapping:0000000000000000 index:0x0
[ 4871.046495] flags: 0x57ffffc0000000()
[ 4871.046498] raw: 0057ffffc0000000 0000000000000000 ffffea003ea10188 0000000000000000
[ 4871.046500] raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000
[ 4871.046501] page dumped because: kasan: bad access detected

Avoid accessing the device structure once it is freed.

Fixes: 497158aa5f52 ("RDMA/bnxt_re: Fix the ib_reg failure cleanup")
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/bnxt_re/main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index bd5ded5809f5b..77f095e5fbe38 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -1467,6 +1467,7 @@ static void bnxt_re_task(struct work_struct *work)
 				"Failed to register with IB: %#x", rc);
 			bnxt_re_remove_one(rdev);
 			bnxt_re_dev_unreg(rdev);
+			goto exit;
 		}
 		break;
 	case NETDEV_UP:
@@ -1490,6 +1491,7 @@ static void bnxt_re_task(struct work_struct *work)
 	}
 	smp_mb__before_atomic();
 	atomic_dec(&rdev->sched_count);
+exit:
 	kfree(re_work);
 }
 
-- 
GitLab


From 13f8d9c16693afb908ead3d2a758adbe6a79eccd Mon Sep 17 00:00:00 2001
From: Yonatan Cohen <yonatanc@mellanox.com>
Date: Wed, 21 Nov 2018 13:48:39 +0200
Subject: [PATCH 1312/1890] IB/mlx5: Fix XRC QP support after introducing
 extended atomic

Extended atomics are supported with RC and XRC QP types, but the commit
citied in the Fixes line added an unneeded check to
to_mlx5_access_flags. This broke XRC QPs.

The following ib_atomic_bw invocation over XRC reproduces the issue:
   ib_atomic_bw -d mlx5_1 --connection=XRC --atomic_type=FETCH_AND_ADD

It is safe to remove such checks because the QP type was already checked
in ib_modify_qp_is_ok(), which was previously called from
mlx5_ib_modify_qp.

Fixes: a60109dc9a95 ("IB/mlx5: Add support for extended atomic operations")
Signed-off-by: Yonatan Cohen <yonatanc@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/mlx5/qp.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 8c74afc91a479..3747cc681b18a 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -2633,8 +2633,7 @@ static int to_mlx5_access_flags(struct mlx5_ib_qp *qp,
 
 	if (access_flags & IB_ACCESS_REMOTE_READ)
 		*hw_access_flags |= MLX5_QP_BIT_RRE;
-	if ((access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
-	    qp->ibqp.qp_type == IB_QPT_RC) {
+	if (access_flags & IB_ACCESS_REMOTE_ATOMIC) {
 		int atomic_mode;
 
 		atomic_mode = get_atomic_mode(dev, qp->ibqp.qp_type);
-- 
GitLab


From db7a691a1551a748cb92d9c89c6b190ea87e28d5 Mon Sep 17 00:00:00 2001
From: Michael Guralnik <michaelgur@mellanox.com>
Date: Wed, 21 Nov 2018 15:03:54 +0200
Subject: [PATCH 1313/1890] IB/mlx5: Avoid load failure due to unknown link
 width

If the firmware reports a connection width that is not 1x, 4x, 8x or 12x
it causes the driver to fail during initialization.

To prevent this failure every time a new width is introduced to the RDMA
stack, we will set a default 4x width for these widths which ar unknown to
the driver.

This is needed to allow to run old kernels with new firmware.

Cc: <stable@vger.kernel.org> # 4.1
Fixes: 1b5daf11b015 ("IB/mlx5: Avoid using the MAD_IFC command under ISSI > 0 mode")
Signed-off-by: Michael Guralnik <michaelgur@mellanox.com>
Reviewed-by: Majd Dibbiny <majd@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/mlx5/main.c | 29 +++++++++++------------------
 1 file changed, 11 insertions(+), 18 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index e9c428071df31..3569fda07e07f 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1094,31 +1094,26 @@ enum mlx5_ib_width {
 	MLX5_IB_WIDTH_12X	= 1 << 4
 };
 
-static int translate_active_width(struct ib_device *ibdev, u8 active_width,
+static void translate_active_width(struct ib_device *ibdev, u8 active_width,
 				  u8 *ib_width)
 {
 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
-	int err = 0;
 
-	if (active_width & MLX5_IB_WIDTH_1X) {
+	if (active_width & MLX5_IB_WIDTH_1X)
 		*ib_width = IB_WIDTH_1X;
-	} else if (active_width & MLX5_IB_WIDTH_2X) {
-		mlx5_ib_dbg(dev, "active_width %d is not supported by IB spec\n",
-			    (int)active_width);
-		err = -EINVAL;
-	} else if (active_width & MLX5_IB_WIDTH_4X) {
+	else if (active_width & MLX5_IB_WIDTH_4X)
 		*ib_width = IB_WIDTH_4X;
-	} else if (active_width & MLX5_IB_WIDTH_8X) {
+	else if (active_width & MLX5_IB_WIDTH_8X)
 		*ib_width = IB_WIDTH_8X;
-	} else if (active_width & MLX5_IB_WIDTH_12X) {
+	else if (active_width & MLX5_IB_WIDTH_12X)
 		*ib_width = IB_WIDTH_12X;
-	} else {
-		mlx5_ib_dbg(dev, "Invalid active_width %d\n",
+	else {
+		mlx5_ib_dbg(dev, "Invalid active_width %d, setting width to default value: 4x\n",
 			    (int)active_width);
-		err = -EINVAL;
+		*ib_width = IB_WIDTH_4X;
 	}
 
-	return err;
+	return;
 }
 
 static int mlx5_mtu_to_ib_mtu(int mtu)
@@ -1225,10 +1220,8 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port,
 	if (err)
 		goto out;
 
-	err = translate_active_width(ibdev, ib_link_width_oper,
-				     &props->active_width);
-	if (err)
-		goto out;
+	translate_active_width(ibdev, ib_link_width_oper, &props->active_width);
+
 	err = mlx5_query_port_ib_proto_oper(mdev, &props->active_speed, port);
 	if (err)
 		goto out;
-- 
GitLab


From 4f32fb921b153ae9ea280e02a3e91509fffc03d3 Mon Sep 17 00:00:00 2001
From: Kamal Heib <kamalheib1@gmail.com>
Date: Thu, 15 Nov 2018 09:49:38 -0800
Subject: [PATCH 1314/1890] RDMA/rdmavt: Fix rvt_create_ah function signature

rdmavt uses a crazy system that looses the type checking when assinging
functions to struct ib_device function pointers. Because of this the
signature to this function was not changed when the below commit revised
things.

Fix the signature so we are not calling a function pointer with a
mismatched signature.

Fixes: 477864c8fcd9 ("IB/core: Let create_ah return extended response to user")
Signed-off-by: Kamal Heib <kamalheib1@gmail.com>
Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/sw/rdmavt/ah.c | 4 +++-
 drivers/infiniband/sw/rdmavt/ah.h | 3 ++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c
index 89ec0f64abfc3..084bb4baebb50 100644
--- a/drivers/infiniband/sw/rdmavt/ah.c
+++ b/drivers/infiniband/sw/rdmavt/ah.c
@@ -91,13 +91,15 @@ EXPORT_SYMBOL(rvt_check_ah);
  * rvt_create_ah - create an address handle
  * @pd: the protection domain
  * @ah_attr: the attributes of the AH
+ * @udata: pointer to user's input output buffer information.
  *
  * This may be called from interrupt context.
  *
  * Return: newly allocated ah
  */
 struct ib_ah *rvt_create_ah(struct ib_pd *pd,
-			    struct rdma_ah_attr *ah_attr)
+			    struct rdma_ah_attr *ah_attr,
+			    struct ib_udata *udata)
 {
 	struct rvt_ah *ah;
 	struct rvt_dev_info *dev = ib_to_rvt(pd->device);
diff --git a/drivers/infiniband/sw/rdmavt/ah.h b/drivers/infiniband/sw/rdmavt/ah.h
index 16105af991890..25271b48a6830 100644
--- a/drivers/infiniband/sw/rdmavt/ah.h
+++ b/drivers/infiniband/sw/rdmavt/ah.h
@@ -51,7 +51,8 @@
 #include <rdma/rdma_vt.h>
 
 struct ib_ah *rvt_create_ah(struct ib_pd *pd,
-			    struct rdma_ah_attr *ah_attr);
+			    struct rdma_ah_attr *ah_attr,
+			    struct ib_udata *udata);
 int rvt_destroy_ah(struct ib_ah *ibah);
 int rvt_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
 int rvt_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
-- 
GitLab


From 24c3456c8d5ee6fc1933ca40f7b4406130682668 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Wed, 14 Nov 2018 10:17:01 -0800
Subject: [PATCH 1315/1890] iser: set sector for ambiguous mr status errors

If for some reason we failed to query the mr status, we need to make sure
to provide sufficient information for an ambiguous error (guard error on
sector 0).

Fixes: 0a7a08ad6f5f ("IB/iser: Implement check_protection")
Cc: <stable@vger.kernel.org>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/ulp/iser/iser_verbs.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 946b623ba5eb7..4ff3d98fa6a4e 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -1124,7 +1124,9 @@ u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
 					 IB_MR_CHECK_SIG_STATUS, &mr_status);
 		if (ret) {
 			pr_err("ib_check_mr_status failed, ret %d\n", ret);
-			goto err;
+			/* Not a lot we can do, return ambiguous guard error */
+			*sector = 0;
+			return 0x1;
 		}
 
 		if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) {
@@ -1152,9 +1154,6 @@ u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
 	}
 
 	return 0;
-err:
-	/* Not alot we can do here, return ambiguous guard error */
-	return 0x1;
 }
 
 void iser_err_comp(struct ib_wc *wc, const char *type)
-- 
GitLab


From b5dd186d10ba59e6b5ba60e42b3b083df56df6f3 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Tue, 20 Nov 2018 11:39:56 +0000
Subject: [PATCH 1316/1890] net: skb_scrub_packet(): Scrub offload_fwd_mark

When a packet is trapped and the corresponding SKB marked as
already-forwarded, it retains this marking even after it is forwarded
across veth links into another bridge. There, since it ingresses the
bridge over veth, which doesn't have offload_fwd_mark, it triggers a
warning in nbp_switchdev_frame_mark().

Then nbp_switchdev_allowed_egress() decides not to allow egress from
this bridge through another veth, because the SKB is already marked, and
the mark (of 0) of course matches. Thus the packet is incorrectly
blocked.

Solve by resetting offload_fwd_mark() in skb_scrub_packet(). That
function is called from tunnels and also from veth, and thus catches the
cases where traffic is forwarded between bridges and transformed in a
way that invalidates the marking.

Fixes: 6bc506b4fb06 ("bridge: switchdev: Add forward mark support for stacked devices")
Fixes: abf4bb6b63d0 ("skbuff: Add the offload_mr_fwd_mark field")
Signed-off-by: Petr Machata <petrm@mellanox.com>
Suggested-by: Ido Schimmel <idosch@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b4ee5c8b928f0..a8217e221e195 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4854,6 +4854,11 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
 	nf_reset(skb);
 	nf_reset_trace(skb);
 
+#ifdef CONFIG_NET_SWITCHDEV
+	skb->offload_fwd_mark = 0;
+	skb->offload_mr_fwd_mark = 0;
+#endif
+
 	if (!xnet)
 		return;
 
-- 
GitLab


From dde7011a824cfa815b03f853ec985ff46b740939 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 21 Nov 2018 13:53:17 -0800
Subject: [PATCH 1317/1890] tools: bpftool: fix potential NULL pointer
 dereference in do_load

This patch fixes a possible null pointer dereference in
do_load, detected by the semantic patch deref_null.cocci,
with the following warning:

./tools/bpf/bpftool/prog.c:1021:23-25: ERROR: map_replace is NULL but dereferenced.

The following code has potential null pointer references:
881             map_replace = reallocarray(map_replace, old_map_fds + 1,
882                                        sizeof(*map_replace));
883             if (!map_replace) {
884                     p_err("mem alloc failed");
885                     goto err_free_reuse_maps;
886             }

...
1019 err_free_reuse_maps:
1020         for (i = 0; i < old_map_fds; i++)
1021                 close(map_replace[i].fd);
1022         free(map_replace);

Fixes: 3ff5a4dc5d89 ("tools: bpftool: allow reuse of maps with bpftool prog load")
Co-developed-by: Wen Yang <wen.yang99@zte.com.cn>
Signed-off-by: Wen Yang <wen.yang99@zte.com.cn>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/prog.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 9785244acc7be..ccee180dfb761 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -844,6 +844,7 @@ static int do_load(int argc, char **argv)
 			}
 			NEXT_ARG();
 		} else if (is_prefix(*argv, "map")) {
+			void *new_map_replace;
 			char *endptr, *name;
 			int fd;
 
@@ -877,12 +878,15 @@ static int do_load(int argc, char **argv)
 			if (fd < 0)
 				goto err_free_reuse_maps;
 
-			map_replace = reallocarray(map_replace, old_map_fds + 1,
-						   sizeof(*map_replace));
-			if (!map_replace) {
+			new_map_replace = reallocarray(map_replace,
+						       old_map_fds + 1,
+						       sizeof(*map_replace));
+			if (!new_map_replace) {
 				p_err("mem alloc failed");
 				goto err_free_reuse_maps;
 			}
+			map_replace = new_map_replace;
+
 			map_replace[old_map_fds].idx = idx;
 			map_replace[old_map_fds].name = name;
 			map_replace[old_map_fds].fd = fd;
-- 
GitLab


From 86de5921a3d5dd246df661e09bdd0a6131b39ae3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 20 Nov 2018 05:53:59 -0800
Subject: [PATCH 1318/1890] tcp: defer SACK compression after DupThresh

Jean-Louis reported a TCP regression and bisected to recent SACK
compression.

After a loss episode (receiver not able to keep up and dropping
packets because its backlog is full), linux TCP stack is sending
a single SACK (DUPACK).

Sender waits a full RTO timer before recovering losses.

While RFC 6675 says in section 5, "Algorithm Details",

   (2) If DupAcks < DupThresh but IsLost (HighACK + 1) returns true --
       indicating at least three segments have arrived above the current
       cumulative acknowledgment point, which is taken to indicate loss
       -- go to step (4).
...
   (4) Invoke fast retransmit and enter loss recovery as follows:

there are old TCP stacks not implementing this strategy, and
still counting the dupacks before starting fast retransmit.

While these stacks probably perform poorly when receivers implement
LRO/GRO, we should be a little more gentle to them.

This patch makes sure we do not enable SACK compression unless
3 dupacks have been sent since last rcv_nxt update.

Ideally we should even rearm the timer to send one or two
more DUPACK if no more packets are coming, but that will
be work aiming for linux-4.21.

Many thanks to Jean-Louis for bisecting the issue, providing
packet captures and testing this patch.

Fixes: 5d9f4262b7ea ("tcp: add SACK compression")
Reported-by: Jean-Louis Dupond <jean-louis@dupond.be>
Tested-by: Jean-Louis Dupond <jean-louis@dupond.be>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h   |  1 +
 net/ipv4/tcp_input.c  | 14 ++++++++++++--
 net/ipv4/tcp_output.c |  6 +++---
 net/ipv4/tcp_timer.c  |  2 +-
 4 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 8ed77bb4ed863..a9b0280687d52 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -196,6 +196,7 @@ struct tcp_sock {
 	u32	rcv_tstamp;	/* timestamp of last received ACK (for keepalives) */
 	u32	lsndtime;	/* timestamp of last sent data packet (for restart window) */
 	u32	last_oow_ack_time;  /* timestamp of last out-of-window ACK */
+	u32	compressed_ack_rcv_nxt;
 
 	u32	tsoffset;	/* timestamp offset */
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e695584bb33fb..1e37c13881893 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4268,7 +4268,7 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
 	 * If the sack array is full, forget about the last one.
 	 */
 	if (this_sack >= TCP_NUM_SACKS) {
-		if (tp->compressed_ack)
+		if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
 			tcp_send_ack(sk);
 		this_sack--;
 		tp->rx_opt.num_sacks--;
@@ -5189,7 +5189,17 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
 	if (!tcp_is_sack(tp) ||
 	    tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)
 		goto send_now;
-	tp->compressed_ack++;
+
+	if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) {
+		tp->compressed_ack_rcv_nxt = tp->rcv_nxt;
+		if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
+			NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
+				      tp->compressed_ack - TCP_FASTRETRANS_THRESH);
+		tp->compressed_ack = 0;
+	}
+
+	if (++tp->compressed_ack <= TCP_FASTRETRANS_THRESH)
+		goto send_now;
 
 	if (hrtimer_is_queued(&tp->compressed_ack_timer))
 		return;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 9c34b97d365d7..3f510cad0b3ec 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -180,10 +180,10 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	if (unlikely(tp->compressed_ack)) {
+	if (unlikely(tp->compressed_ack > TCP_FASTRETRANS_THRESH)) {
 		NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
-			      tp->compressed_ack);
-		tp->compressed_ack = 0;
+			      tp->compressed_ack - TCP_FASTRETRANS_THRESH);
+		tp->compressed_ack = TCP_FASTRETRANS_THRESH;
 		if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
 			__sock_put(sk);
 	}
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 676020663ce80..5f8b6d3cd855d 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -740,7 +740,7 @@ static enum hrtimer_restart tcp_compressed_ack_kick(struct hrtimer *timer)
 
 	bh_lock_sock(sk);
 	if (!sock_owned_by_user(sk)) {
-		if (tp->compressed_ack)
+		if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
 			tcp_send_ack(sk);
 	} else {
 		if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED,
-- 
GitLab


From f07920ad9c6f5781c90ac4915f8254d999d8c1cc Mon Sep 17 00:00:00 2001
From: Hans Wippel <hwippel@linux.ibm.com>
Date: Tue, 20 Nov 2018 16:46:39 +0100
Subject: [PATCH 1319/1890] net/smc: abort CLC connection in smc_release

In case of a non-blocking SMC socket, the initial CLC handshake is
performed over a blocking TCP connection in a worker. If the SMC socket
is released, smc_release has to wait for the blocking CLC socket
operations (e.g., kernel_connect) inside the worker.

This patch aborts a CLC connection when the respective non-blocking SMC
socket is released to avoid waiting on socket operations or timeouts.

Signed-off-by: Hans Wippel <hwippel@linux.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/af_smc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 80e2119f1c701..84f67f601838e 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -127,6 +127,8 @@ static int smc_release(struct socket *sock)
 	smc = smc_sk(sk);
 
 	/* cleanup for a dangling non-blocking connect */
+	if (smc->connect_info && sk->sk_state == SMC_INIT)
+		tcp_abort(smc->clcsock->sk, ECONNABORTED);
 	flush_work(&smc->connect_work);
 	kfree(smc->connect_info);
 	smc->connect_info = NULL;
-- 
GitLab


From ee05ff7af26509f39360534a5225ee714416cdfd Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Tue, 20 Nov 2018 16:46:40 +0100
Subject: [PATCH 1320/1890] net/smc: use queue pair number when matching link
 group

When searching for an existing link group the queue pair number is also
to be taken into consideration. When the SMC server sends a new number
in a CLC packet (keeping all other values equal) then a new link group
is to be created on the SMC client side.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/af_smc.c   |  9 +++++----
 net/smc/smc_core.c | 10 ++++++----
 net/smc/smc_core.h |  2 +-
 3 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 84f67f601838e..5fbaf1901571c 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -549,7 +549,8 @@ static int smc_connect_rdma(struct smc_sock *smc,
 
 	mutex_lock(&smc_create_lgr_pending);
 	local_contact = smc_conn_create(smc, false, aclc->hdr.flag, ibdev,
-					ibport, &aclc->lcl, NULL, 0);
+					ibport, ntoh24(aclc->qpn), &aclc->lcl,
+					NULL, 0);
 	if (local_contact < 0) {
 		if (local_contact == -ENOMEM)
 			reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/
@@ -620,7 +621,7 @@ static int smc_connect_ism(struct smc_sock *smc,
 	int rc = 0;
 
 	mutex_lock(&smc_create_lgr_pending);
-	local_contact = smc_conn_create(smc, true, aclc->hdr.flag, NULL, 0,
+	local_contact = smc_conn_create(smc, true, aclc->hdr.flag, NULL, 0, 0,
 					NULL, ismdev, aclc->gid);
 	if (local_contact < 0)
 		return smc_connect_abort(smc, SMC_CLC_DECL_MEM, 0);
@@ -1085,7 +1086,7 @@ static int smc_listen_rdma_init(struct smc_sock *new_smc,
 				int *local_contact)
 {
 	/* allocate connection / link group */
-	*local_contact = smc_conn_create(new_smc, false, 0, ibdev, ibport,
+	*local_contact = smc_conn_create(new_smc, false, 0, ibdev, ibport, 0,
 					 &pclc->lcl, NULL, 0);
 	if (*local_contact < 0) {
 		if (*local_contact == -ENOMEM)
@@ -1109,7 +1110,7 @@ static int smc_listen_ism_init(struct smc_sock *new_smc,
 	struct smc_clc_msg_smcd *pclc_smcd;
 
 	pclc_smcd = smc_get_clc_msg_smcd(pclc);
-	*local_contact = smc_conn_create(new_smc, true, 0, NULL, 0, NULL,
+	*local_contact = smc_conn_create(new_smc, true, 0, NULL, 0, 0, NULL,
 					 ismdev, pclc_smcd->gid);
 	if (*local_contact < 0) {
 		if (*local_contact == -ENOMEM)
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 18daebcef1813..3c023de58afde 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -559,7 +559,7 @@ int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id)
 
 static bool smcr_lgr_match(struct smc_link_group *lgr,
 			   struct smc_clc_msg_local *lcl,
-			   enum smc_lgr_role role)
+			   enum smc_lgr_role role, u32 clcqpn)
 {
 	return !memcmp(lgr->peer_systemid, lcl->id_for_peer,
 		       SMC_SYSTEMID_LEN) &&
@@ -567,7 +567,9 @@ static bool smcr_lgr_match(struct smc_link_group *lgr,
 			SMC_GID_SIZE) &&
 		!memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac,
 			sizeof(lcl->mac)) &&
-		lgr->role == role;
+		lgr->role == role &&
+		(lgr->role == SMC_SERV ||
+		 lgr->lnk[SMC_SINGLE_LINK].peer_qpn == clcqpn);
 }
 
 static bool smcd_lgr_match(struct smc_link_group *lgr,
@@ -578,7 +580,7 @@ static bool smcd_lgr_match(struct smc_link_group *lgr,
 
 /* create a new SMC connection (and a new link group if necessary) */
 int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,
-		    struct smc_ib_device *smcibdev, u8 ibport,
+		    struct smc_ib_device *smcibdev, u8 ibport, u32 clcqpn,
 		    struct smc_clc_msg_local *lcl, struct smcd_dev *smcd,
 		    u64 peer_gid)
 {
@@ -603,7 +605,7 @@ int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,
 	list_for_each_entry(lgr, &smc_lgr_list.list, list) {
 		write_lock_bh(&lgr->conns_lock);
 		if ((is_smcd ? smcd_lgr_match(lgr, smcd, peer_gid) :
-		     smcr_lgr_match(lgr, lcl, role)) &&
+		     smcr_lgr_match(lgr, lcl, role, clcqpn)) &&
 		    !lgr->sync_err &&
 		    lgr->vlan_id == vlan_id &&
 		    (role == SMC_CLNT ||
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index c156674733c9d..5bc6cbaf0ed5a 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -262,7 +262,7 @@ int smc_vlan_by_tcpsk(struct socket *clcsock, unsigned short *vlan_id);
 
 void smc_conn_free(struct smc_connection *conn);
 int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,
-		    struct smc_ib_device *smcibdev, u8 ibport,
+		    struct smc_ib_device *smcibdev, u8 ibport, u32 clcqpn,
 		    struct smc_clc_msg_local *lcl, struct smcd_dev *smcd,
 		    u64 peer_gid);
 void smcd_conn_free(struct smc_connection *conn);
-- 
GitLab


From 0512f69e388c963dbe955d4bd9ae0f7d88d2dc54 Mon Sep 17 00:00:00 2001
From: Hans Wippel <hwippel@linux.ibm.com>
Date: Tue, 20 Nov 2018 16:46:41 +0100
Subject: [PATCH 1321/1890] net/smc: add SMC-D shutdown signal

When a SMC-D link group is freed, a shutdown signal should be sent to
the peer to indicate that the link group is invalid. This patch adds the
shutdown signal to the SMC code.

Signed-off-by: Hans Wippel <hwippel@linux.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_core.c | 10 ++++++++--
 net/smc/smc_core.h |  3 ++-
 net/smc/smc_ism.c  | 43 ++++++++++++++++++++++++++++++++-----------
 net/smc/smc_ism.h  |  1 +
 4 files changed, 43 insertions(+), 14 deletions(-)

diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 3c023de58afde..1c9fa7f0261a3 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -184,6 +184,8 @@ static void smc_lgr_free_work(struct work_struct *work)
 
 		if (!lgr->is_smcd && lnk->state != SMC_LNK_INACTIVE)
 			smc_llc_link_inactive(lnk);
+		if (lgr->is_smcd)
+			smc_ism_signal_shutdown(lgr);
 		smc_lgr_free(lgr);
 	}
 }
@@ -485,7 +487,7 @@ void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
 }
 
 /* Called when SMC-D device is terminated or peer is lost */
-void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid)
+void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
 {
 	struct smc_link_group *lgr, *l;
 	LIST_HEAD(lgr_free_list);
@@ -495,7 +497,7 @@ void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid)
 	list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
 		if (lgr->is_smcd && lgr->smcd == dev &&
 		    (!peer_gid || lgr->peer_gid == peer_gid) &&
-		    !list_empty(&lgr->list)) {
+		    (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) {
 			__smc_lgr_terminate(lgr);
 			list_move(&lgr->list, &lgr_free_list);
 		}
@@ -506,6 +508,8 @@ void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid)
 	list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
 		list_del_init(&lgr->list);
 		cancel_delayed_work_sync(&lgr->free_work);
+		if (!peer_gid && vlan == VLAN_VID_MASK) /* dev terminated? */
+			smc_ism_signal_shutdown(lgr);
 		smc_lgr_free(lgr);
 	}
 }
@@ -1026,6 +1030,8 @@ void smc_core_exit(void)
 			smc_llc_link_inactive(lnk);
 		}
 		cancel_delayed_work_sync(&lgr->free_work);
+		if (lgr->is_smcd)
+			smc_ism_signal_shutdown(lgr);
 		smc_lgr_free(lgr); /* free link group */
 	}
 }
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 5bc6cbaf0ed5a..cf98f4d6093e9 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -247,7 +247,8 @@ void smc_lgr_free(struct smc_link_group *lgr);
 void smc_lgr_forget(struct smc_link_group *lgr);
 void smc_lgr_terminate(struct smc_link_group *lgr);
 void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport);
-void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid);
+void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid,
+			unsigned short vlan);
 int smc_buf_create(struct smc_sock *smc, bool is_smcd);
 int smc_uncompress_bufsize(u8 compressed);
 int smc_rmb_rtoken_handling(struct smc_connection *conn,
diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c
index e36f21ce72520..2fff79db1a59c 100644
--- a/net/smc/smc_ism.c
+++ b/net/smc/smc_ism.c
@@ -187,22 +187,28 @@ struct smc_ism_event_work {
 #define ISM_EVENT_REQUEST		0x0001
 #define ISM_EVENT_RESPONSE		0x0002
 #define ISM_EVENT_REQUEST_IR		0x00000001
+#define ISM_EVENT_CODE_SHUTDOWN		0x80
 #define ISM_EVENT_CODE_TESTLINK		0x83
 
+union smcd_sw_event_info {
+	u64	info;
+	struct {
+		u8		uid[SMC_LGR_ID_SIZE];
+		unsigned short	vlan_id;
+		u16		code;
+	};
+};
+
 static void smcd_handle_sw_event(struct smc_ism_event_work *wrk)
 {
-	union {
-		u64	info;
-		struct {
-			u32		uid;
-			unsigned short	vlanid;
-			u16		code;
-		};
-	} ev_info;
+	union smcd_sw_event_info ev_info;
 
+	ev_info.info = wrk->event.info;
 	switch (wrk->event.code) {
+	case ISM_EVENT_CODE_SHUTDOWN:	/* Peer shut down DMBs */
+		smc_smcd_terminate(wrk->smcd, wrk->event.tok, ev_info.vlan_id);
+		break;
 	case ISM_EVENT_CODE_TESTLINK:	/* Activity timer */
-		ev_info.info = wrk->event.info;
 		if (ev_info.code == ISM_EVENT_REQUEST) {
 			ev_info.code = ISM_EVENT_RESPONSE;
 			wrk->smcd->ops->signal_event(wrk->smcd,
@@ -215,6 +221,21 @@ static void smcd_handle_sw_event(struct smc_ism_event_work *wrk)
 	}
 }
 
+int smc_ism_signal_shutdown(struct smc_link_group *lgr)
+{
+	int rc;
+	union smcd_sw_event_info ev_info;
+
+	memcpy(ev_info.uid, lgr->id, SMC_LGR_ID_SIZE);
+	ev_info.vlan_id = lgr->vlan_id;
+	ev_info.code = ISM_EVENT_REQUEST;
+	rc = lgr->smcd->ops->signal_event(lgr->smcd, lgr->peer_gid,
+					  ISM_EVENT_REQUEST_IR,
+					  ISM_EVENT_CODE_SHUTDOWN,
+					  ev_info.info);
+	return rc;
+}
+
 /* worker for SMC-D events */
 static void smc_ism_event_work(struct work_struct *work)
 {
@@ -223,7 +244,7 @@ static void smc_ism_event_work(struct work_struct *work)
 
 	switch (wrk->event.type) {
 	case ISM_EVENT_GID:	/* GID event, token is peer GID */
-		smc_smcd_terminate(wrk->smcd, wrk->event.tok);
+		smc_smcd_terminate(wrk->smcd, wrk->event.tok, VLAN_VID_MASK);
 		break;
 	case ISM_EVENT_DMB:
 		break;
@@ -289,7 +310,7 @@ void smcd_unregister_dev(struct smcd_dev *smcd)
 	spin_unlock(&smcd_dev_list.lock);
 	flush_workqueue(smcd->event_wq);
 	destroy_workqueue(smcd->event_wq);
-	smc_smcd_terminate(smcd, 0);
+	smc_smcd_terminate(smcd, 0, VLAN_VID_MASK);
 
 	device_del(&smcd->dev);
 }
diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h
index aee45b860b799..4da946cbfa29c 100644
--- a/net/smc/smc_ism.h
+++ b/net/smc/smc_ism.h
@@ -45,4 +45,5 @@ int smc_ism_register_dmb(struct smc_link_group *lgr, int buf_size,
 int smc_ism_unregister_dmb(struct smcd_dev *dev, struct smc_buf_desc *dmb_desc);
 int smc_ism_write(struct smcd_dev *dev, const struct smc_ism_position *pos,
 		  void *data, size_t len);
+int smc_ism_signal_shutdown(struct smc_link_group *lgr);
 #endif
-- 
GitLab


From b9a22dd9811dbcddb5623c499e5b736400059df6 Mon Sep 17 00:00:00 2001
From: Ursula Braun <ubraun@linux.ibm.com>
Date: Tue, 20 Nov 2018 16:46:42 +0100
Subject: [PATCH 1322/1890] net/smc: atomic SMCD cursor handling

Running uperf tests with SMCD on LPARs results in corrupted cursors.
SMCD cursors should be treated atomically to fix cursor corruption.

Signed-off-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_cdc.c | 26 +++++++++++---------
 net/smc/smc_cdc.h | 60 +++++++++++++++++++++++++++++++++++------------
 2 files changed, 60 insertions(+), 26 deletions(-)

diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index ed5dcf03fe0b6..db83332ac1c8c 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -81,7 +81,7 @@ static inline void smc_cdc_add_pending_send(struct smc_connection *conn,
 		sizeof(struct smc_cdc_msg) > SMC_WR_BUF_SIZE,
 		"must increase SMC_WR_BUF_SIZE to at least sizeof(struct smc_cdc_msg)");
 	BUILD_BUG_ON_MSG(
-		sizeof(struct smc_cdc_msg) != SMC_WR_TX_SIZE,
+		offsetofend(struct smc_cdc_msg, reserved) > SMC_WR_TX_SIZE,
 		"must adapt SMC_WR_TX_SIZE to sizeof(struct smc_cdc_msg); if not all smc_wr upper layer protocols use the same message size any more, must start to set link->wr_tx_sges[i].length on each individual smc_wr_tx_send()");
 	BUILD_BUG_ON_MSG(
 		sizeof(struct smc_cdc_tx_pend) > SMC_WR_TX_PEND_PRIV_SIZE,
@@ -177,23 +177,24 @@ void smc_cdc_tx_dismiss_slots(struct smc_connection *conn)
 int smcd_cdc_msg_send(struct smc_connection *conn)
 {
 	struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
+	union smc_host_cursor curs;
 	struct smcd_cdc_msg cdc;
 	int rc, diff;
 
 	memset(&cdc, 0, sizeof(cdc));
 	cdc.common.type = SMC_CDC_MSG_TYPE;
-	cdc.prod_wrap = conn->local_tx_ctrl.prod.wrap;
-	cdc.prod_count = conn->local_tx_ctrl.prod.count;
-
-	cdc.cons_wrap = conn->local_tx_ctrl.cons.wrap;
-	cdc.cons_count = conn->local_tx_ctrl.cons.count;
-	cdc.prod_flags = conn->local_tx_ctrl.prod_flags;
-	cdc.conn_state_flags = conn->local_tx_ctrl.conn_state_flags;
+	curs.acurs.counter = atomic64_read(&conn->local_tx_ctrl.prod.acurs);
+	cdc.prod.wrap = curs.wrap;
+	cdc.prod.count = curs.count;
+	curs.acurs.counter = atomic64_read(&conn->local_tx_ctrl.cons.acurs);
+	cdc.cons.wrap = curs.wrap;
+	cdc.cons.count = curs.count;
+	cdc.cons.prod_flags = conn->local_tx_ctrl.prod_flags;
+	cdc.cons.conn_state_flags = conn->local_tx_ctrl.conn_state_flags;
 	rc = smcd_tx_ism_write(conn, &cdc, sizeof(cdc), 0, 1);
 	if (rc)
 		return rc;
-	smc_curs_copy(&conn->rx_curs_confirmed, &conn->local_tx_ctrl.cons,
-		      conn);
+	smc_curs_copy(&conn->rx_curs_confirmed, &curs, conn);
 	/* Calculate transmitted data and increment free send buffer space */
 	diff = smc_curs_diff(conn->sndbuf_desc->len, &conn->tx_curs_fin,
 			     &conn->tx_curs_sent);
@@ -331,13 +332,16 @@ static void smc_cdc_msg_recv(struct smc_sock *smc, struct smc_cdc_msg *cdc)
 static void smcd_cdc_rx_tsklet(unsigned long data)
 {
 	struct smc_connection *conn = (struct smc_connection *)data;
+	struct smcd_cdc_msg *data_cdc;
 	struct smcd_cdc_msg cdc;
 	struct smc_sock *smc;
 
 	if (!conn)
 		return;
 
-	memcpy(&cdc, conn->rmb_desc->cpu_addr, sizeof(cdc));
+	data_cdc = (struct smcd_cdc_msg *)conn->rmb_desc->cpu_addr;
+	smcd_curs_copy(&cdc.prod, &data_cdc->prod, conn);
+	smcd_curs_copy(&cdc.cons, &data_cdc->cons, conn);
 	smc = container_of(conn, struct smc_sock, conn);
 	smc_cdc_msg_recv(smc, (struct smc_cdc_msg *)&cdc);
 }
diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h
index 934df4473a7ce..b5bfe38c7f9b6 100644
--- a/net/smc/smc_cdc.h
+++ b/net/smc/smc_cdc.h
@@ -48,21 +48,31 @@ struct smc_cdc_msg {
 	struct smc_cdc_producer_flags	prod_flags;
 	struct smc_cdc_conn_state_flags	conn_state_flags;
 	u8				reserved[18];
-} __packed;					/* format defined in RFC7609 */
+};
+
+/* SMC-D cursor format */
+union smcd_cdc_cursor {
+	struct {
+		u16	wrap;
+		u32	count;
+		struct smc_cdc_producer_flags	prod_flags;
+		struct smc_cdc_conn_state_flags	conn_state_flags;
+	} __packed;
+#ifdef KERNEL_HAS_ATOMIC64
+	atomic64_t		acurs;		/* for atomic processing */
+#else
+	u64			acurs;		/* for atomic processing */
+#endif
+} __aligned(8);
 
 /* CDC message for SMC-D */
 struct smcd_cdc_msg {
 	struct smc_wr_rx_hdr common;	/* Type = 0xFE */
 	u8 res1[7];
-	u16 prod_wrap;
-	u32 prod_count;
-	u8 res2[2];
-	u16 cons_wrap;
-	u32 cons_count;
-	struct smc_cdc_producer_flags	prod_flags;
-	struct smc_cdc_conn_state_flags conn_state_flags;
+	union smcd_cdc_cursor	prod;
+	union smcd_cdc_cursor	cons;
 	u8 res3[8];
-} __packed;
+} __aligned(8);
 
 static inline bool smc_cdc_rxed_any_close(struct smc_connection *conn)
 {
@@ -135,6 +145,21 @@ static inline void smc_curs_copy_net(union smc_cdc_cursor *tgt,
 #endif
 }
 
+static inline void smcd_curs_copy(union smcd_cdc_cursor *tgt,
+				  union smcd_cdc_cursor *src,
+				  struct smc_connection *conn)
+{
+#ifndef KERNEL_HAS_ATOMIC64
+	unsigned long flags;
+
+	spin_lock_irqsave(&conn->acurs_lock, flags);
+	tgt->acurs = src->acurs;
+	spin_unlock_irqrestore(&conn->acurs_lock, flags);
+#else
+	atomic64_set(&tgt->acurs, atomic64_read(&src->acurs));
+#endif
+}
+
 /* calculate cursor difference between old and new, where old <= new */
 static inline int smc_curs_diff(unsigned int size,
 				union smc_host_cursor *old,
@@ -222,12 +247,17 @@ static inline void smcr_cdc_msg_to_host(struct smc_host_cdc_msg *local,
 static inline void smcd_cdc_msg_to_host(struct smc_host_cdc_msg *local,
 					struct smcd_cdc_msg *peer)
 {
-	local->prod.wrap = peer->prod_wrap;
-	local->prod.count = peer->prod_count;
-	local->cons.wrap = peer->cons_wrap;
-	local->cons.count = peer->cons_count;
-	local->prod_flags = peer->prod_flags;
-	local->conn_state_flags = peer->conn_state_flags;
+	union smc_host_cursor temp;
+
+	temp.wrap = peer->prod.wrap;
+	temp.count = peer->prod.count;
+	atomic64_set(&local->prod.acurs, atomic64_read(&temp.acurs));
+
+	temp.wrap = peer->cons.wrap;
+	temp.count = peer->cons.count;
+	atomic64_set(&local->cons.acurs, atomic64_read(&temp.acurs));
+	local->prod_flags = peer->cons.prod_flags;
+	local->conn_state_flags = peer->cons.conn_state_flags;
 }
 
 static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local,
-- 
GitLab


From e438bae43c1e08e688c09c410407b59fc1c173b4 Mon Sep 17 00:00:00 2001
From: Ursula Braun <ursula.braun@linux.ibm.com>
Date: Tue, 20 Nov 2018 16:46:43 +0100
Subject: [PATCH 1323/1890] net/smc: use after free fix in smc_wr_tx_put_slot()

In smc_wr_tx_put_slot() field pend->idx is used after being
cleared. That means always idx 0 is cleared in the wr_tx_mask.
This results in a broken administration of available WR send
payload buffers.

Signed-off-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_wr.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index 3c458d2798557..c2694750a6a8a 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -215,12 +215,14 @@ int smc_wr_tx_put_slot(struct smc_link *link,
 
 	pend = container_of(wr_pend_priv, struct smc_wr_tx_pend, priv);
 	if (pend->idx < link->wr_tx_cnt) {
+		u32 idx = pend->idx;
+
 		/* clear the full struct smc_wr_tx_pend including .priv */
 		memset(&link->wr_tx_pends[pend->idx], 0,
 		       sizeof(link->wr_tx_pends[pend->idx]));
 		memset(&link->wr_tx_bufs[pend->idx], 0,
 		       sizeof(link->wr_tx_bufs[pend->idx]));
-		test_and_clear_bit(pend->idx, link->wr_tx_mask);
+		test_and_clear_bit(idx, link->wr_tx_mask);
 		return 1;
 	}
 
-- 
GitLab


From 1a37bd823891568f8721989aed0615835632d81a Mon Sep 17 00:00:00 2001
From: "Y.C. Chen" <yc_chen@aspeedtech.com>
Date: Wed, 3 Oct 2018 14:57:47 +0800
Subject: [PATCH 1324/1890] drm/ast: change resolution may cause screen blurred

The value of pitches is not correct while calling mode_set.
The issue we found so far on following system:
- Debian8 with XFCE Desktop
- Ubuntu with KDE Desktop
- SUSE15 with KDE Desktop

Signed-off-by: Y.C. Chen <yc_chen@aspeedtech.com>
Cc: <stable@vger.kernel.org>
Tested-by: Jean Delvare <jdelvare@suse.de>
Reviewed-by: Jean Delvare <jdelvare@suse.de>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/ast/ast_mode.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c
index 5e77d456d9bb9..f06aae7701edf 100644
--- a/drivers/gpu/drm/ast/ast_mode.c
+++ b/drivers/gpu/drm/ast/ast_mode.c
@@ -568,6 +568,7 @@ static int ast_crtc_do_set_base(struct drm_crtc *crtc,
 	}
 	ast_bo_unreserve(bo);
 
+	ast_set_offset_reg(crtc);
 	ast_set_start_address_crt1(crtc, (u32)gpu_addr);
 
 	return 0;
-- 
GitLab


From 426a593e641ebf0d9288f0a2fcab644a86820220 Mon Sep 17 00:00:00 2001
From: Vincent Chen <vincentc@andestech.com>
Date: Wed, 21 Nov 2018 09:38:11 +0800
Subject: [PATCH 1325/1890] net: faraday: ftmac100: remove
 netif_running(netdev) check before disabling interrupts

In the original ftmac100_interrupt(), the interrupts are only disabled when
the condition "netif_running(netdev)" is true. However, this condition
causes kerenl hang in the following case. When the user requests to
disable the network device, kernel will clear the bit __LINK_STATE_START
from the dev->state and then call the driver's ndo_stop function. Network
device interrupts are not blocked during this process. If an interrupt
occurs between clearing __LINK_STATE_START and stopping network device,
kernel cannot disable the interrupts due to the condition
"netif_running(netdev)" in the ISR. Hence, kernel will hang due to the
continuous interruption of the network device.

In order to solve the above problem, the interrupts of the network device
should always be disabled in the ISR without being restricted by the
condition "netif_running(netdev)".

[V2]
Remove unnecessary curly braces.

Signed-off-by: Vincent Chen <vincentc@andestech.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/faraday/ftmac100.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/faraday/ftmac100.c b/drivers/net/ethernet/faraday/ftmac100.c
index 570caeb8ee9ed..084f24daf2b5a 100644
--- a/drivers/net/ethernet/faraday/ftmac100.c
+++ b/drivers/net/ethernet/faraday/ftmac100.c
@@ -872,11 +872,10 @@ static irqreturn_t ftmac100_interrupt(int irq, void *dev_id)
 	struct net_device *netdev = dev_id;
 	struct ftmac100 *priv = netdev_priv(netdev);
 
-	if (likely(netif_running(netdev))) {
-		/* Disable interrupts for polling */
-		ftmac100_disable_all_int(priv);
+	/* Disable interrupts for polling */
+	ftmac100_disable_all_int(priv);
+	if (likely(netif_running(netdev)))
 		napi_schedule(&priv->napi);
-	}
 
 	return IRQ_HANDLED;
 }
-- 
GitLab


From 7989b9ee8bafe5cc625381dd0c3c4586de27ca26 Mon Sep 17 00:00:00 2001
From: "Y.C. Chen" <yc_chen@aspeedtech.com>
Date: Tue, 30 Oct 2018 11:34:46 +0800
Subject: [PATCH 1326/1890] drm/ast: fixed cursor may disappear sometimes

Signed-off-by: Y.C. Chen <yc_chen@aspeedtech.com>
Cc: <stable@vger.kernel.org>
Reviewed-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/ast/ast_mode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c
index f06aae7701edf..7c6ac3cadb6b7 100644
--- a/drivers/gpu/drm/ast/ast_mode.c
+++ b/drivers/gpu/drm/ast/ast_mode.c
@@ -1255,7 +1255,7 @@ static int ast_cursor_move(struct drm_crtc *crtc,
 	ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0xc7, ((y >> 8) & 0x07));
 
 	/* dummy write to fire HWC */
-	ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xCB, 0xFF, 0x00);
+	ast_show_cursor(crtc);
 
 	return 0;
 }
-- 
GitLab


From bf21c6e455539a495ce6d2877da2f9e364a89062 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Thu, 22 Nov 2018 03:41:07 +0000
Subject: [PATCH 1327/1890] opp: ti-opp-supply: Fix platform_no_drv_owner.cocci
 warnings

Remove .owner field if calls are used which set it automatically
Generated by: scripts/coccinelle/api/platform_no_drv_owner.cocci

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/opp/ti-opp-supply.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/opp/ti-opp-supply.c b/drivers/opp/ti-opp-supply.c
index 3f4fb4dbbe33b..1c69c404df114 100644
--- a/drivers/opp/ti-opp-supply.c
+++ b/drivers/opp/ti-opp-supply.c
@@ -417,7 +417,6 @@ static struct platform_driver ti_opp_supply_driver = {
 	.probe = ti_opp_supply_probe,
 	.driver = {
 		   .name = "ti_opp_supply",
-		   .owner = THIS_MODULE,
 		   .of_match_table = of_match_ptr(ti_opp_supply_of_match),
 		   },
 };
-- 
GitLab


From ed6101bbf6266ee83e620b19faa7c6ad56bb41ab Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 21 Nov 2018 11:16:10 +0100
Subject: [PATCH 1328/1890] perf/x86/intel: Move branch tracing setup to the
 Intel-specific source file

Moving branch tracing setup to Intel core object into separate
intel_pmu_bts_config function, because it's Intel specific.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: <stable@vger.kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/20181121101612.16272-1-jolsa@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/core.c       | 20 ------------------
 arch/x86/events/intel/core.c | 41 +++++++++++++++++++++++++++++++++++-
 2 files changed, 40 insertions(+), 21 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 106911b603bd9..374a19712e200 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -438,26 +438,6 @@ int x86_setup_perfctr(struct perf_event *event)
 	if (config == -1LL)
 		return -EINVAL;
 
-	/*
-	 * Branch tracing:
-	 */
-	if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
-	    !attr->freq && hwc->sample_period == 1) {
-		/* BTS is not supported by this architecture. */
-		if (!x86_pmu.bts_active)
-			return -EOPNOTSUPP;
-
-		/* BTS is currently only allowed for user-mode. */
-		if (!attr->exclude_kernel)
-			return -EOPNOTSUPP;
-
-		/* disallow bts if conflicting events are present */
-		if (x86_add_exclusive(x86_lbr_exclusive_lbr))
-			return -EBUSY;
-
-		event->destroy = hw_perf_lbr_event_destroy;
-	}
-
 	hwc->config |= config;
 
 	return 0;
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index af8bea9d4006a..a95079abeb034 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3102,10 +3102,49 @@ static unsigned long intel_pmu_large_pebs_flags(struct perf_event *event)
 	return flags;
 }
 
+static int intel_pmu_bts_config(struct perf_event *event)
+{
+	struct perf_event_attr *attr = &event->attr;
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
+	    !attr->freq && hwc->sample_period == 1) {
+		/* BTS is not supported by this architecture. */
+		if (!x86_pmu.bts_active)
+			return -EOPNOTSUPP;
+
+		/* BTS is currently only allowed for user-mode. */
+		if (!attr->exclude_kernel)
+			return -EOPNOTSUPP;
+
+		/* disallow bts if conflicting events are present */
+		if (x86_add_exclusive(x86_lbr_exclusive_lbr))
+			return -EBUSY;
+
+		event->destroy = hw_perf_lbr_event_destroy;
+	}
+
+	return 0;
+}
+
+static int core_pmu_hw_config(struct perf_event *event)
+{
+	int ret = x86_pmu_hw_config(event);
+
+	if (ret)
+		return ret;
+
+	return intel_pmu_bts_config(event);
+}
+
 static int intel_pmu_hw_config(struct perf_event *event)
 {
 	int ret = x86_pmu_hw_config(event);
 
+	if (ret)
+		return ret;
+
+	ret = intel_pmu_bts_config(event);
 	if (ret)
 		return ret;
 
@@ -3600,7 +3639,7 @@ static __initconst const struct x86_pmu core_pmu = {
 	.enable_all		= core_pmu_enable_all,
 	.enable			= core_pmu_enable_event,
 	.disable		= x86_pmu_disable_event,
-	.hw_config		= x86_pmu_hw_config,
+	.hw_config		= core_pmu_hw_config,
 	.schedule_events	= x86_schedule_events,
 	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
 	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
-- 
GitLab


From 67266c1080ad56c31af72b9c18355fde8ccc124a Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 21 Nov 2018 11:16:11 +0100
Subject: [PATCH 1329/1890] perf/x86/intel: Add generic branch tracing check to
 intel_pmu_has_bts()

Currently we check the branch tracing only by checking for the
PERF_COUNT_HW_BRANCH_INSTRUCTIONS event of PERF_TYPE_HARDWARE
type. But we can define the same event with the PERF_TYPE_RAW
type.

Changing the intel_pmu_has_bts() code to check on event's final
hw config value, so both HW types are covered.

Adding unlikely to intel_pmu_has_bts() condition calls, because
it was used in the original code in intel_bts_constraints.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: <stable@vger.kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: http://lkml.kernel.org/r/20181121101612.16272-2-jolsa@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/core.c | 17 +++--------------
 arch/x86/events/perf_event.h | 13 +++++++++----
 2 files changed, 12 insertions(+), 18 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index a95079abeb034..a15a737886933 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2474,16 +2474,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
 static struct event_constraint *
 intel_bts_constraints(struct perf_event *event)
 {
-	struct hw_perf_event *hwc = &event->hw;
-	unsigned int hw_event, bts_event;
-
-	if (event->attr.freq)
-		return NULL;
-
-	hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
-	bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
-
-	if (unlikely(hw_event == bts_event && hwc->sample_period == 1))
+	if (unlikely(intel_pmu_has_bts(event)))
 		return &bts_constraint;
 
 	return NULL;
@@ -3105,10 +3096,8 @@ static unsigned long intel_pmu_large_pebs_flags(struct perf_event *event)
 static int intel_pmu_bts_config(struct perf_event *event)
 {
 	struct perf_event_attr *attr = &event->attr;
-	struct hw_perf_event *hwc = &event->hw;
 
-	if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
-	    !attr->freq && hwc->sample_period == 1) {
+	if (unlikely(intel_pmu_has_bts(event))) {
 		/* BTS is not supported by this architecture. */
 		if (!x86_pmu.bts_active)
 			return -EOPNOTSUPP;
@@ -3170,7 +3159,7 @@ static int intel_pmu_hw_config(struct perf_event *event)
 		/*
 		 * BTS is set up earlier in this path, so don't account twice
 		 */
-		if (!intel_pmu_has_bts(event)) {
+		if (!unlikely(intel_pmu_has_bts(event))) {
 			/* disallow lbr if conflicting events are present */
 			if (x86_add_exclusive(x86_lbr_exclusive_lbr))
 				return -EBUSY;
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index adae087cecdda..78d7b7031bfcc 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -859,11 +859,16 @@ static inline int amd_pmu_init(void)
 
 static inline bool intel_pmu_has_bts(struct perf_event *event)
 {
-	if (event->attr.config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
-	    !event->attr.freq && event->hw.sample_period == 1)
-		return true;
+	struct hw_perf_event *hwc = &event->hw;
+	unsigned int hw_event, bts_event;
+
+	if (event->attr.freq)
+		return false;
+
+	hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
+	bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
 
-	return false;
+	return hw_event == bts_event && hwc->sample_period == 1;
 }
 
 int intel_pmu_save_and_restart(struct perf_event *event);
-- 
GitLab


From 472de49fdc53365c880ab81ae2b5cfdd83db0b06 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 21 Nov 2018 11:16:12 +0100
Subject: [PATCH 1330/1890] perf/x86/intel: Disallow precise_ip on BTS events

Vince reported a crash in the BTS flush code when touching the callchain
data, which was supposed to be initialized as an 'early' callchain,
but intel_pmu_drain_bts_buffer() does not do that:

  BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
  ...
  Call Trace:
   <IRQ>
   intel_pmu_drain_bts_buffer+0x151/0x220
   ? intel_get_event_constraints+0x219/0x360
   ? perf_assign_events+0xe2/0x2a0
   ? select_idle_sibling+0x22/0x3a0
   ? __update_load_avg_se+0x1ec/0x270
   ? enqueue_task_fair+0x377/0xdd0
   ? cpumask_next_and+0x19/0x20
   ? load_balance+0x134/0x950
   ? check_preempt_curr+0x7a/0x90
   ? ttwu_do_wakeup+0x19/0x140
   x86_pmu_stop+0x3b/0x90
   x86_pmu_del+0x57/0x160
   event_sched_out.isra.106+0x81/0x170
   group_sched_out.part.108+0x51/0xc0
   __perf_event_disable+0x7f/0x160
   event_function+0x8c/0xd0
   remote_function+0x3c/0x50
   flush_smp_call_function_queue+0x35/0xe0
   smp_call_function_single_interrupt+0x3a/0xd0
   call_function_single_interrupt+0xf/0x20
   </IRQ>

It was triggered by fuzzer but can be easily reproduced by:

  # perf record -e cpu/branch-instructions/pu -g -c 1

Peter suggested not to allow branch tracing for precise events:

 > Now arguably, this is really stupid behaviour. Who in his right mind
 > wants callchain output on BTS entries. And even if they do, BTS +
 > precise_ip is nonsensical.
 >
 > So in my mind disallowing precise_ip on BTS would be the simplest fix.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Reported-by: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: <stable@vger.kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 6cbc304f2f36 ("perf/x86/intel: Fix unwind errors from PEBS entries (mk-II)")
Link: http://lkml.kernel.org/r/20181121101612.16272-3-jolsa@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/events/intel/core.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index a15a737886933..ecc3e34ca955f 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3106,6 +3106,10 @@ static int intel_pmu_bts_config(struct perf_event *event)
 		if (!attr->exclude_kernel)
 			return -EOPNOTSUPP;
 
+		/* BTS is not allowed for precise events. */
+		if (attr->precise_ip)
+			return -EOPNOTSUPP;
+
 		/* disallow bts if conflicting events are present */
 		if (x86_add_exclusive(x86_lbr_exclusive_lbr))
 			return -EBUSY;
-- 
GitLab


From f2a5fef1248beccacec0deecb67c1be693d72ae6 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Mon, 19 Nov 2018 14:59:45 +0100
Subject: [PATCH 1331/1890] x86/xen: cleanup includes in
 arch/x86/xen/spinlock.c

arch/x86/xen/spinlock.c includes several headers which are not needed.
Remove the #includes.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/x86/xen/spinlock.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 1c8a8816a402a..3776122c87cce 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -3,22 +3,17 @@
  * Split spinlock implementation out into its own file, so it can be
  * compiled in a FTRACE-compatible way.
  */
-#include <linux/kernel_stat.h>
+#include <linux/kernel.h>
 #include <linux/spinlock.h>
-#include <linux/debugfs.h>
-#include <linux/log2.h>
-#include <linux/gfp.h>
 #include <linux/slab.h>
 #include <linux/atomic.h>
 
 #include <asm/paravirt.h>
 #include <asm/qspinlock.h>
 
-#include <xen/interface/xen.h>
 #include <xen/events.h>
 
 #include "xen-ops.h"
-#include "debugfs.h"
 
 static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
 static DEFINE_PER_CPU(char *, irq_name);
-- 
GitLab


From 829383e183728dec7ed9150b949cd6de64127809 Mon Sep 17 00:00:00 2001
From: Pan Bian <bianpan2016@163.com>
Date: Wed, 21 Nov 2018 17:53:47 +0800
Subject: [PATCH 1332/1890] iommu/vt-d: Use memunmap to free memremap

memunmap() should be used to free the return of memremap(), not
iounmap().

Fixes: dfddb969edf0 ('iommu/vt-d: Switch from ioremap_cache to memremap')
Signed-off-by: Pan Bian <bianpan2016@163.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index f3ccf025108b4..41a4b8808802b 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -3075,7 +3075,7 @@ static int copy_context_table(struct intel_iommu *iommu,
 			}
 
 			if (old_ce)
-				iounmap(old_ce);
+				memunmap(old_ce);
 
 			ret = 0;
 			if (devfn < 0x80)
-- 
GitLab


From 552f0329c75b3e1d7f9bb8c9e421d37403f192cd Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Mon, 19 Nov 2018 16:20:34 +0000
Subject: [PATCH 1333/1890] Btrfs: fix race between enabling quotas and
 subvolume creation

We have a race between enabling quotas end subvolume creation that cause
subvolume creation to fail with -EINVAL, and the following diagram shows
how it happens:

              CPU 0                                          CPU 1

 btrfs_ioctl()
  btrfs_ioctl_quota_ctl()
   btrfs_quota_enable()
    mutex_lock(fs_info->qgroup_ioctl_lock)

                                                  btrfs_ioctl()
                                                   create_subvol()
                                                    btrfs_qgroup_inherit()
                                                     -> save fs_info->quota_root
                                                        into quota_root
                                                     -> stores a NULL value
                                                     -> tries to lock the mutex
                                                        qgroup_ioctl_lock
                                                        -> blocks waiting for
                                                           the task at CPU0

   -> sets BTRFS_FS_QUOTA_ENABLED in fs_info
   -> sets quota_root in fs_info->quota_root
      (non-NULL value)

   mutex_unlock(fs_info->qgroup_ioctl_lock)

                                                     -> checks quota enabled
                                                        flag is set
                                                     -> returns -EINVAL because
                                                        fs_info->quota_root was
                                                        NULL before it acquired
                                                        the mutex
                                                        qgroup_ioctl_lock
                                                   -> ioctl returns -EINVAL

Returning -EINVAL to user space will be confusing if all the arguments
passed to the subvolume creation ioctl were valid.

Fix it by grabbing the value from fs_info->quota_root after acquiring
the mutex.

CC: stable@vger.kernel.org # 4.4+
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/qgroup.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 45868fd762090..f70825af6438e 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -2659,7 +2659,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
 	int i;
 	u64 *i_qgroups;
 	struct btrfs_fs_info *fs_info = trans->fs_info;
-	struct btrfs_root *quota_root = fs_info->quota_root;
+	struct btrfs_root *quota_root;
 	struct btrfs_qgroup *srcgroup;
 	struct btrfs_qgroup *dstgroup;
 	u32 level_size = 0;
@@ -2669,6 +2669,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
 	if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
 		goto out;
 
+	quota_root = fs_info->quota_root;
 	if (!quota_root) {
 		ret = -EINVAL;
 		goto out;
-- 
GitLab


From f96d84488f7d5f9123428c700cea82a292bca53e Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Tue, 20 Nov 2018 05:13:04 -0500
Subject: [PATCH 1334/1890] media: gspca: fix frame overflow error

When converting gspca to vb2 I missed that fact that the buffer sizes
were rounded up to the next page size. As a result some gspca drivers
(spca561 being one of them) reported frame overflows.

Modify the code to align the buffer sizes to the next page size, just
as the original code did.

Fixes: 1f5965c4dfd7 ("media: gspca: convert to vb2")
Tested-off-by: Hans Verkuil <hans.verkuil@cisco.com>

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Reported-by: softwarebugs <softwarebugs@protonmail.com>
Cc: <stable@vger.kernel.org>      # for v4.18 and up
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/usb/gspca/gspca.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/media/usb/gspca/gspca.c b/drivers/media/usb/gspca/gspca.c
index fce9d6f4b7c92..3137f5d89d803 100644
--- a/drivers/media/usb/gspca/gspca.c
+++ b/drivers/media/usb/gspca/gspca.c
@@ -426,10 +426,10 @@ void gspca_frame_add(struct gspca_dev *gspca_dev,
 
 	/* append the packet to the frame buffer */
 	if (len > 0) {
-		if (gspca_dev->image_len + len > gspca_dev->pixfmt.sizeimage) {
+		if (gspca_dev->image_len + len > PAGE_ALIGN(gspca_dev->pixfmt.sizeimage)) {
 			gspca_err(gspca_dev, "frame overflow %d > %d\n",
 				  gspca_dev->image_len + len,
-				  gspca_dev->pixfmt.sizeimage);
+				  PAGE_ALIGN(gspca_dev->pixfmt.sizeimage));
 			packet_type = DISCARD_PACKET;
 		} else {
 /* !! image is NULL only when last pkt is LAST or DISCARD
@@ -1297,18 +1297,19 @@ static int gspca_queue_setup(struct vb2_queue *vq,
 			     unsigned int sizes[], struct device *alloc_devs[])
 {
 	struct gspca_dev *gspca_dev = vb2_get_drv_priv(vq);
+	unsigned int size = PAGE_ALIGN(gspca_dev->pixfmt.sizeimage);
 
 	if (*nplanes)
-		return sizes[0] < gspca_dev->pixfmt.sizeimage ? -EINVAL : 0;
+		return sizes[0] < size ? -EINVAL : 0;
 	*nplanes = 1;
-	sizes[0] = gspca_dev->pixfmt.sizeimage;
+	sizes[0] = size;
 	return 0;
 }
 
 static int gspca_buffer_prepare(struct vb2_buffer *vb)
 {
 	struct gspca_dev *gspca_dev = vb2_get_drv_priv(vb->vb2_queue);
-	unsigned long size = gspca_dev->pixfmt.sizeimage;
+	unsigned long size = PAGE_ALIGN(gspca_dev->pixfmt.sizeimage);
 
 	if (vb2_plane_size(vb, 0) < size) {
 		gspca_err(gspca_dev, "buffer too small (%lu < %lu)\n",
-- 
GitLab


From ea2fc769719f9cc8e26845e8ffa2a719e2724645 Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@collabora.com>
Date: Fri, 9 Nov 2018 10:16:41 -0500
Subject: [PATCH 1335/1890] media: Revert "media: dt-bindings: Document the
 Rockchip VPU bindings"

This reverts commit e4183d3256e3cd668e899d06af66da5aac3a51af.

The commit was picked by mistake, as the Rockchip VPU driver
is not ready for inclusion yet, and it's still under discussion.

Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 .../bindings/media/rockchip-vpu.txt           | 29 -------------------
 1 file changed, 29 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/media/rockchip-vpu.txt

diff --git a/Documentation/devicetree/bindings/media/rockchip-vpu.txt b/Documentation/devicetree/bindings/media/rockchip-vpu.txt
deleted file mode 100644
index 35dc464ad7c86..0000000000000
--- a/Documentation/devicetree/bindings/media/rockchip-vpu.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-device-tree bindings for rockchip VPU codec
-
-Rockchip (Video Processing Unit) present in various Rockchip platforms,
-such as RK3288 and RK3399.
-
-Required properties:
-- compatible: value should be one of the following
-		"rockchip,rk3288-vpu";
-		"rockchip,rk3399-vpu";
-- interrupts: encoding and decoding interrupt specifiers
-- interrupt-names: should be "vepu" and "vdpu"
-- clocks: phandle to VPU aclk, hclk clocks
-- clock-names: should be "aclk" and "hclk"
-- power-domains: phandle to power domain node
-- iommus: phandle to a iommu node
-
-Example:
-SoC-specific DT entry:
-	vpu: video-codec@ff9a0000 {
-		compatible = "rockchip,rk3288-vpu";
-		reg = <0x0 0xff9a0000 0x0 0x800>;
-		interrupts = <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>,
-			     <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>;
-		interrupt-names = "vepu", "vdpu";
-		clocks = <&cru ACLK_VCODEC>, <&cru HCLK_VCODEC>;
-		clock-names = "aclk", "hclk";
-		power-domains = <&power RK3288_PD_VIDEO>;
-		iommus = <&vpu_mmu>;
-	};
-- 
GitLab


From 99f2c55591fb5c1b536263970d98c2ebc2089906 Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <kolga@netapp.com>
Date: Wed, 21 Nov 2018 11:24:22 -0500
Subject: [PATCH 1336/1890] NFSv4.2 copy do not allocate memory under the lock

Bruce pointed out that we shouldn't allocate memory while holding
a lock in the nfs4_callback_offload() and handle_async_copy()
that deal with a racing CB_OFFLOAD and reply to COPY case.

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/callback_proc.c | 22 +++++++++++-----------
 fs/nfs/nfs42proc.c     | 19 ++++++++++---------
 2 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 7b861bbc0b43f..3159673549540 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -686,20 +686,24 @@ __be32 nfs4_callback_offload(void *data, void *dummy,
 {
 	struct cb_offloadargs *args = data;
 	struct nfs_server *server;
-	struct nfs4_copy_state *copy;
+	struct nfs4_copy_state *copy, *tmp_copy;
 	bool found = false;
 
+	copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_NOFS);
+	if (!copy)
+		return htonl(NFS4ERR_SERVERFAULT);
+
 	spin_lock(&cps->clp->cl_lock);
 	rcu_read_lock();
 	list_for_each_entry_rcu(server, &cps->clp->cl_superblocks,
 				client_link) {
-		list_for_each_entry(copy, &server->ss_copies, copies) {
+		list_for_each_entry(tmp_copy, &server->ss_copies, copies) {
 			if (memcmp(args->coa_stateid.other,
-					copy->stateid.other,
+					tmp_copy->stateid.other,
 					sizeof(args->coa_stateid.other)))
 				continue;
-			nfs4_copy_cb_args(copy, args);
-			complete(&copy->completion);
+			nfs4_copy_cb_args(tmp_copy, args);
+			complete(&tmp_copy->completion);
 			found = true;
 			goto out;
 		}
@@ -707,15 +711,11 @@ __be32 nfs4_callback_offload(void *data, void *dummy,
 out:
 	rcu_read_unlock();
 	if (!found) {
-		copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_NOFS);
-		if (!copy) {
-			spin_unlock(&cps->clp->cl_lock);
-			return htonl(NFS4ERR_SERVERFAULT);
-		}
 		memcpy(&copy->stateid, &args->coa_stateid, NFS4_STATEID_SIZE);
 		nfs4_copy_cb_args(copy, args);
 		list_add_tail(&copy->copies, &cps->clp->pending_cb_stateids);
-	}
+	} else
+		kfree(copy);
 	spin_unlock(&cps->clp->cl_lock);
 
 	return 0;
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index ac5b784a1de05..fed06fd9998d3 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -137,31 +137,32 @@ static int handle_async_copy(struct nfs42_copy_res *res,
 			     struct file *dst,
 			     nfs4_stateid *src_stateid)
 {
-	struct nfs4_copy_state *copy;
+	struct nfs4_copy_state *copy, *tmp_copy;
 	int status = NFS4_OK;
 	bool found_pending = false;
 	struct nfs_open_context *ctx = nfs_file_open_context(dst);
 
+	copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_NOFS);
+	if (!copy)
+		return -ENOMEM;
+
 	spin_lock(&server->nfs_client->cl_lock);
-	list_for_each_entry(copy, &server->nfs_client->pending_cb_stateids,
+	list_for_each_entry(tmp_copy, &server->nfs_client->pending_cb_stateids,
 				copies) {
-		if (memcmp(&res->write_res.stateid, &copy->stateid,
+		if (memcmp(&res->write_res.stateid, &tmp_copy->stateid,
 				NFS4_STATEID_SIZE))
 			continue;
 		found_pending = true;
-		list_del(&copy->copies);
+		list_del(&tmp_copy->copies);
 		break;
 	}
 	if (found_pending) {
 		spin_unlock(&server->nfs_client->cl_lock);
+		kfree(copy);
+		copy = tmp_copy;
 		goto out;
 	}
 
-	copy = kzalloc(sizeof(struct nfs4_copy_state), GFP_NOFS);
-	if (!copy) {
-		spin_unlock(&server->nfs_client->cl_lock);
-		return -ENOMEM;
-	}
 	memcpy(&copy->stateid, &res->write_res.stateid, NFS4_STATEID_SIZE);
 	init_completion(&copy->completion);
 	copy->parent_state = ctx->state;
-- 
GitLab


From bb21ce0ad227b69ec0f83279297ee44232105d96 Mon Sep 17 00:00:00 2001
From: Tigran Mkrtchyan <tigran.mkrtchyan@desy.de>
Date: Wed, 21 Nov 2018 12:25:41 +0100
Subject: [PATCH 1337/1890] flexfiles: use per-mirror specified stateid for IO

rfc8435 says:

  For tight coupling, ffds_stateid provides the stateid to be used by
  the client to access the file.

However current implementation replaces per-mirror provided stateid with
by open or lock stateid.

Ensure that per-mirror stateid is used by ff_layout_write_prepare_v4 and
nfs4_ff_layout_prepare_ds.

Signed-off-by: Tigran Mkrtchyan <tigran.mkrtchyan@desy.de>
Signed-off-by: Rick Macklem <rmacklem@uoguelph.ca>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/flexfilelayout/flexfilelayout.c    | 21 +++++++++------------
 fs/nfs/flexfilelayout/flexfilelayout.h    |  4 ++++
 fs/nfs/flexfilelayout/flexfilelayoutdev.c | 19 +++++++++++++++++++
 3 files changed, 32 insertions(+), 12 deletions(-)

diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 86bcba40ca61b..74b36ed883caa 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1361,12 +1361,7 @@ static void ff_layout_read_prepare_v4(struct rpc_task *task, void *data)
 				task))
 		return;
 
-	if (ff_layout_read_prepare_common(task, hdr))
-		return;
-
-	if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
-			hdr->args.lock_context, FMODE_READ) == -EIO)
-		rpc_exit(task, -EIO); /* lost lock, terminate I/O */
+	ff_layout_read_prepare_common(task, hdr);
 }
 
 static void ff_layout_read_call_done(struct rpc_task *task, void *data)
@@ -1542,12 +1537,7 @@ static void ff_layout_write_prepare_v4(struct rpc_task *task, void *data)
 				task))
 		return;
 
-	if (ff_layout_write_prepare_common(task, hdr))
-		return;
-
-	if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
-			hdr->args.lock_context, FMODE_WRITE) == -EIO)
-		rpc_exit(task, -EIO); /* lost lock, terminate I/O */
+	ff_layout_write_prepare_common(task, hdr);
 }
 
 static void ff_layout_write_call_done(struct rpc_task *task, void *data)
@@ -1742,6 +1732,10 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
 	fh = nfs4_ff_layout_select_ds_fh(lseg, idx);
 	if (fh)
 		hdr->args.fh = fh;
+
+	if (!nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid))
+		goto out_failed;
+
 	/*
 	 * Note that if we ever decide to split across DSes,
 	 * then we may need to handle dense-like offsets.
@@ -1804,6 +1798,9 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
 	if (fh)
 		hdr->args.fh = fh;
 
+	if (!nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid))
+		goto out_failed;
+
 	/*
 	 * Note that if we ever decide to split across DSes,
 	 * then we may need to handle dense-like offsets.
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h
index 411798346e483..de50a342d5a50 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.h
+++ b/fs/nfs/flexfilelayout/flexfilelayout.h
@@ -215,6 +215,10 @@ unsigned int ff_layout_fetch_ds_ioerr(struct pnfs_layout_hdr *lo,
 		unsigned int maxnum);
 struct nfs_fh *
 nfs4_ff_layout_select_ds_fh(struct pnfs_layout_segment *lseg, u32 mirror_idx);
+int
+nfs4_ff_layout_select_ds_stateid(struct pnfs_layout_segment *lseg,
+				u32 mirror_idx,
+				nfs4_stateid *stateid);
 
 struct nfs4_pnfs_ds *
 nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index 74d8d53524382..d23347389626e 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -370,6 +370,25 @@ nfs4_ff_layout_select_ds_fh(struct pnfs_layout_segment *lseg, u32 mirror_idx)
 	return fh;
 }
 
+int
+nfs4_ff_layout_select_ds_stateid(struct pnfs_layout_segment *lseg,
+				u32 mirror_idx,
+				nfs4_stateid *stateid)
+{
+	struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, mirror_idx);
+
+	if (!ff_layout_mirror_valid(lseg, mirror, false)) {
+		pr_err_ratelimited("NFS: %s: No data server for mirror offset index %d\n",
+			__func__, mirror_idx);
+		goto out;
+	}
+
+	nfs4_stateid_copy(stateid, &mirror->stateid);
+	return 1;
+out:
+	return 0;
+}
+
 /**
  * nfs4_ff_layout_prepare_ds - prepare a DS connection for an RPC call
  * @lseg: the layout segment we're operating on
-- 
GitLab


From 0211dda68a4f6531923a2f72d8e8959207f59fba Mon Sep 17 00:00:00 2001
From: Tal Gilboa <talgi@mellanox.com>
Date: Wed, 21 Nov 2018 16:28:23 +0200
Subject: [PATCH 1338/1890] net/dim: Update DIM start sample after each DIM
 iteration

On every iteration of net_dim, the algorithm may choose to
check for the system state by comparing current data sample
with previous data sample. After each of these comparison,
regardless of the action taken, the sample used as baseline
is needed to be updated.

This patch fixes a bug that causes DIM to take wrong decisions,
due to never updating the baseline sample for comparison between
iterations. This way, DIM always compares current sample with
zeros.

Although this is a functional fix, it also improves and stabilizes
performance as the algorithm works properly now.

Performance:
Tested single UDP TX stream with pktgen:
samples/pktgen/pktgen_sample03_burst_single_flow.sh -i p4p2 -d 1.1.1.1
-m 24:8a:07:88:26:8b -f 3 -b 128

ConnectX-5 100GbE packet rate improved from 15-19Mpps to 19-20Mpps.
Also, toggling between profiles is less frequent with the fix.

Fixes: 8115b750dbcb ("net/dim: use struct net_dim_sample as arg to net_dim")
Signed-off-by: Tal Gilboa <talgi@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net_dim.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/linux/net_dim.h b/include/linux/net_dim.h
index c79e859408e62..fd458389f7d19 100644
--- a/include/linux/net_dim.h
+++ b/include/linux/net_dim.h
@@ -406,6 +406,8 @@ static inline void net_dim(struct net_dim *dim,
 		}
 		/* fall through */
 	case NET_DIM_START_MEASURE:
+		net_dim_sample(end_sample.event_ctr, end_sample.pkt_ctr, end_sample.byte_ctr,
+			       &dim->start_sample);
 		dim->state = NET_DIM_MEASURE_IN_PROGRESS;
 		break;
 	case NET_DIM_APPLY_NEW_PROFILE:
-- 
GitLab


From 6d0f60b0f8588fd4380ea5df9601e12fddd55ce2 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Date: Wed, 21 Nov 2018 16:32:10 +0100
Subject: [PATCH 1339/1890] net: thunderx: set xdp_prog to NULL if bpf_prog_add
 fails

Set xdp_prog pointer to NULL if bpf_prog_add fails since that routine
reports the error code instead of NULL in case of failure and xdp_prog
pointer value is used in the driver to verify if XDP is currently
enabled.
Moreover report the error code to userspace if nicvf_xdp_setup fails

Fixes: 05c773f52b96 ("net: thunderx: Add basic XDP support")
Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/thunder/nicvf_main.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 768f584f83927..88f8a8fa93cdc 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -1784,6 +1784,7 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog)
 	bool if_up = netif_running(nic->netdev);
 	struct bpf_prog *old_prog;
 	bool bpf_attached = false;
+	int ret = 0;
 
 	/* For now just support only the usual MTU sized frames */
 	if (prog && (dev->mtu > 1500)) {
@@ -1817,8 +1818,12 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog)
 	if (nic->xdp_prog) {
 		/* Attach BPF program */
 		nic->xdp_prog = bpf_prog_add(nic->xdp_prog, nic->rx_queues - 1);
-		if (!IS_ERR(nic->xdp_prog))
+		if (!IS_ERR(nic->xdp_prog)) {
 			bpf_attached = true;
+		} else {
+			ret = PTR_ERR(nic->xdp_prog);
+			nic->xdp_prog = NULL;
+		}
 	}
 
 	/* Calculate Tx queues needed for XDP and network stack */
@@ -1830,7 +1835,7 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog)
 		netif_trans_update(nic->netdev);
 	}
 
-	return 0;
+	return ret;
 }
 
 static int nicvf_xdp(struct net_device *netdev, struct netdev_bpf *xdp)
-- 
GitLab


From b7cdec3d699db2e5985ad39de0f25d3b6111928e Mon Sep 17 00:00:00 2001
From: Thomas Falcon <tlfalcon@linux.ibm.com>
Date: Wed, 21 Nov 2018 11:17:58 -0600
Subject: [PATCH 1340/1890] ibmvnic: Fix RX queue buffer cleanup

The wrong index is used when cleaning up RX buffer objects during release
of RX queues. Update to use the correct index counter.

Signed-off-by: Thomas Falcon <tlfalcon@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 27a6df30eafd2..066897a350a58 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -485,8 +485,8 @@ static void release_rx_pools(struct ibmvnic_adapter *adapter)
 
 		for (j = 0; j < rx_pool->size; j++) {
 			if (rx_pool->rx_buff[j].skb) {
-				dev_kfree_skb_any(rx_pool->rx_buff[i].skb);
-				rx_pool->rx_buff[i].skb = NULL;
+				dev_kfree_skb_any(rx_pool->rx_buff[j].skb);
+				rx_pool->rx_buff[j].skb = NULL;
 			}
 		}
 
-- 
GitLab


From 5bf032ef08e6a110edc1e3bfb3c66a208fb55125 Mon Sep 17 00:00:00 2001
From: Thomas Falcon <tlfalcon@linux.ibm.com>
Date: Wed, 21 Nov 2018 11:17:59 -0600
Subject: [PATCH 1341/1890] ibmvnic: Update driver queues after change in ring
 size support

During device reset, queue memory is not being updated to accommodate
changes in ring buffer sizes supported by backing hardware. Track
any differences in ring buffer sizes following the reset and update
queue memory when possible.

Signed-off-by: Thomas Falcon <tlfalcon@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 066897a350a58..c0203a0d5e3b8 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1737,6 +1737,7 @@ static int do_reset(struct ibmvnic_adapter *adapter,
 		    struct ibmvnic_rwi *rwi, u32 reset_state)
 {
 	u64 old_num_rx_queues, old_num_tx_queues;
+	u64 old_num_rx_slots, old_num_tx_slots;
 	struct net_device *netdev = adapter->netdev;
 	int i, rc;
 
@@ -1748,6 +1749,8 @@ static int do_reset(struct ibmvnic_adapter *adapter,
 
 	old_num_rx_queues = adapter->req_rx_queues;
 	old_num_tx_queues = adapter->req_tx_queues;
+	old_num_rx_slots = adapter->req_rx_add_entries_per_subcrq;
+	old_num_tx_slots = adapter->req_tx_entries_per_subcrq;
 
 	ibmvnic_cleanup(netdev);
 
@@ -1810,7 +1813,11 @@ static int do_reset(struct ibmvnic_adapter *adapter,
 			if (rc)
 				return rc;
 		} else if (adapter->req_rx_queues != old_num_rx_queues ||
-			   adapter->req_tx_queues != old_num_tx_queues) {
+			   adapter->req_tx_queues != old_num_tx_queues ||
+			   adapter->req_rx_add_entries_per_subcrq !=
+							old_num_rx_slots ||
+			   adapter->req_tx_entries_per_subcrq !=
+							old_num_tx_slots) {
 			release_rx_pools(adapter);
 			release_tx_pools(adapter);
 			release_napi(adapter);
-- 
GitLab


From 813961de3ee6474dd5703e883471fd941d6c8f69 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 22 Nov 2018 10:49:56 -0800
Subject: [PATCH 1342/1890] bpf: fix integer overflow in queue_stack_map

Fix the following issues:

- allow queue_stack_map for root only
- fix u32 max_entries overflow
- disallow value_size == 0

Fixes: f1a2e44a3aec ("bpf: add queue and stack maps")
Reported-by: Wei Wu <ww9210@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Cc: Mauricio Vasquez B <mauricio.vasquez@polito.it>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/queue_stack_maps.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c
index 8bbd72d3a121f..b384ea9f32549 100644
--- a/kernel/bpf/queue_stack_maps.c
+++ b/kernel/bpf/queue_stack_maps.c
@@ -7,6 +7,7 @@
 #include <linux/bpf.h>
 #include <linux/list.h>
 #include <linux/slab.h>
+#include <linux/capability.h>
 #include "percpu_freelist.h"
 
 #define QUEUE_STACK_CREATE_FLAG_MASK \
@@ -45,8 +46,12 @@ static bool queue_stack_map_is_full(struct bpf_queue_stack *qs)
 /* Called from syscall */
 static int queue_stack_map_alloc_check(union bpf_attr *attr)
 {
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
 	/* check sanity of attributes */
 	if (attr->max_entries == 0 || attr->key_size != 0 ||
+	    attr->value_size == 0 ||
 	    attr->map_flags & ~QUEUE_STACK_CREATE_FLAG_MASK)
 		return -EINVAL;
 
@@ -63,15 +68,10 @@ static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr)
 {
 	int ret, numa_node = bpf_map_attr_numa_node(attr);
 	struct bpf_queue_stack *qs;
-	u32 size, value_size;
-	u64 queue_size, cost;
-
-	size = attr->max_entries + 1;
-	value_size = attr->value_size;
-
-	queue_size = sizeof(*qs) + (u64) value_size * size;
+	u64 size, queue_size, cost;
 
-	cost = queue_size;
+	size = (u64) attr->max_entries + 1;
+	cost = queue_size = sizeof(*qs) + size * attr->value_size;
 	if (cost >= U32_MAX - PAGE_SIZE)
 		return ERR_PTR(-E2BIG);
 
-- 
GitLab


From 3e27c79c4b121a64e85f26ab8957e5d0a09ff28e Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 23 Nov 2018 10:36:07 +0530
Subject: [PATCH 1343/1890] OPP: Fix parsing of multiple phandles in
 "operating-points-v2" property

We currently return error if more than one phandle is present in the
"operating-points-v2" property, which is incorrect. We only want to
check the count of phandles here and set index to 0 if only one phandle
is present.

Fix it.

Fixes: 5ed4cecd75e9 ("OPP: Pass OPP table to _of_add_opp_table_v{1|2}()")
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/opp/of.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/opp/of.c b/drivers/opp/of.c
index 5a4b47958073e..38a08805a30c6 100644
--- a/drivers/opp/of.c
+++ b/drivers/opp/of.c
@@ -579,10 +579,8 @@ int dev_pm_opp_of_add_table_indexed(struct device *dev, int index)
 		 */
 		count = of_count_phandle_with_args(dev->of_node,
 						   "operating-points-v2", NULL);
-		if (count != 1)
-			return -ENODEV;
-
-		index = 0;
+		if (count == 1)
+			index = 0;
 	}
 
 	opp_table = dev_pm_opp_get_opp_table_indexed(dev, index);
-- 
GitLab


From 09d3f015d1e1b4fee7e9bbdcf54201d239393391 Mon Sep 17 00:00:00 2001
From: Andrea Parri <andrea.parri@amarulasolutions.com>
Date: Thu, 22 Nov 2018 17:10:31 +0100
Subject: [PATCH 1344/1890] uprobes: Fix handle_swbp() vs. unregister() +
 register() race once more

Commit:

  142b18ddc8143 ("uprobes: Fix handle_swbp() vs unregister() + register() race")

added the UPROBE_COPY_INSN flag, and corresponding smp_wmb() and smp_rmb()
memory barriers, to ensure that handle_swbp() uses fully-initialized
uprobes only.

However, the smp_rmb() is mis-placed: this barrier should be placed
after handle_swbp() has tested for the flag, thus guaranteeing that
(program-order) subsequent loads from the uprobe can see the initial
stores performed by prepare_uprobe().

Move the smp_rmb() accordingly.  Also amend the comments associated
to the two memory barriers to indicate their actual locations.

Signed-off-by: Andrea Parri <andrea.parri@amarulasolutions.com>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: stable@kernel.org
Fixes: 142b18ddc8143 ("uprobes: Fix handle_swbp() vs unregister() + register() race")
Link: http://lkml.kernel.org/r/20181122161031.15179-1-andrea.parri@amarulasolutions.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/events/uprobes.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 96d4bee83489b..322e97bbb4370 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -829,7 +829,7 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file,
 	BUG_ON((uprobe->offset & ~PAGE_MASK) +
 			UPROBE_SWBP_INSN_SIZE > PAGE_SIZE);
 
-	smp_wmb(); /* pairs with rmb() in find_active_uprobe() */
+	smp_wmb(); /* pairs with the smp_rmb() in handle_swbp() */
 	set_bit(UPROBE_COPY_INSN, &uprobe->flags);
 
  out:
@@ -2178,10 +2178,18 @@ static void handle_swbp(struct pt_regs *regs)
 	 * After we hit the bp, _unregister + _register can install the
 	 * new and not-yet-analyzed uprobe at the same address, restart.
 	 */
-	smp_rmb(); /* pairs with wmb() in install_breakpoint() */
 	if (unlikely(!test_bit(UPROBE_COPY_INSN, &uprobe->flags)))
 		goto out;
 
+	/*
+	 * Pairs with the smp_wmb() in prepare_uprobe().
+	 *
+	 * Guarantees that if we see the UPROBE_COPY_INSN bit set, then
+	 * we must also see the stores to &uprobe->arch performed by the
+	 * prepare_uprobe() call.
+	 */
+	smp_rmb();
+
 	/* Tracing handlers use ->utask to communicate with fetch methods */
 	if (!get_utask())
 		goto out;
-- 
GitLab


From 6c05946e349d92f527d98644fbc9c41f06312c00 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Mon, 12 Nov 2018 09:28:06 +0800
Subject: [PATCH 1345/1890] arm64: dts: mt7622: fix no more console output on
 rfb1

No default serial console on boot.
Fix this by using a 'stdout-path' property that points to the device.

Fixes: c0d9f9ad4f76 ("arm64: dts: mt7622: add earlycon to mt7622-rfb1 board")
Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Tested-by: Kevin Hilman <khilman@baylibre.com>
[mb: Fix commit message]
Signed-off-by: Matthias Brugger <matthias.bgg@gmail.com>
---
 arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts b/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts
index dcad0869b84ca..3f783348c66a6 100644
--- a/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts
+++ b/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts
@@ -17,8 +17,13 @@ / {
 	model = "MediaTek MT7622 RFB1 board";
 	compatible = "mediatek,mt7622-rfb1", "mediatek,mt7622";
 
+	aliases {
+		serial0 = &uart0;
+	};
+
 	chosen {
-		bootargs = "earlycon=uart8250,mmio32,0x11002000 console=ttyS0,115200n1 swiotlb=512";
+		stdout-path = "serial0:115200n8";
+		bootargs = "earlycon=uart8250,mmio32,0x11002000 swiotlb=512";
 	};
 
 	cpus {
-- 
GitLab


From 396defa8523372645d6d5a8b7f4b5403b119e360 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Mon, 12 Nov 2018 09:28:07 +0800
Subject: [PATCH 1346/1890] arm64: dts: mt7622: fix no more console output on
 BPI-R64 board

Fix this by using a 'stdout-path' property that points to the device.

Fixes: 0b6286dd96c0 ("arm64: dts: mt7622: add bananapi BPI-R64 board")
Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Matthias Brugger <matthias.bgg@gmail.com>
---
 arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts b/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts
index 5d6005c9b0975..710c5c3d87d30 100644
--- a/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts
+++ b/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts
@@ -16,8 +16,13 @@ / {
 	model = "Bananapi BPI-R64";
 	compatible = "bananapi,bpi-r64", "mediatek,mt7622";
 
+	aliases {
+		serial0 = &uart0;
+	};
+
 	chosen {
-		bootargs = "earlycon=uart8250,mmio32,0x11002000 console=ttyS0,115200n1 swiotlb=512";
+		stdout-path = "serial0:115200n8";
+		bootargs = "earlycon=uart8250,mmio32,0x11002000 swiotlb=512";
 	};
 
 	cpus {
-- 
GitLab


From 42a657f57628402c73237547f0134e083e2f6764 Mon Sep 17 00:00:00 2001
From: Pan Bian <bianpan2016@163.com>
Date: Fri, 23 Nov 2018 18:10:15 +0800
Subject: [PATCH 1347/1890] btrfs: relocation: set trans to be NULL after
 ending transaction

The function relocate_block_group calls btrfs_end_transaction to release
trans when update_backref_cache returns 1, and then continues the loop
body. If btrfs_block_rsv_refill fails this time, it will jump out the
loop and the freed trans will be accessed. This may result in a
use-after-free bug. The patch assigns NULL to trans after trans is
released so that it will not be accessed.

Fixes: 0647bf564f1 ("Btrfs: improve forever loop when doing balance relocation")
CC: stable@vger.kernel.org # 4.4+
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Pan Bian <bianpan2016@163.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/relocation.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 924116f654a11..a3f75b8926d44 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3959,6 +3959,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
 restart:
 		if (update_backref_cache(trans, &rc->backref_cache)) {
 			btrfs_end_transaction(trans);
+			trans = NULL;
 			continue;
 		}
 
-- 
GitLab


From dd2f52d8991af9fe0928d59ec502ba52be7bc38d Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Wed, 14 Nov 2018 13:06:21 +0200
Subject: [PATCH 1348/1890] ASoC: omap-mcbsp: Fix latency value calculation for
 pm_qos

The latency number is in usec for the pm_qos. Correct the calculation to
give us the time in usec

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Acked-by: Jarkko Nikula <jarkko.nikula@bitmer.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/omap/omap-mcbsp.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sound/soc/omap/omap-mcbsp.c b/sound/soc/omap/omap-mcbsp.c
index d0ebb6b9bfac3..2d6decbfc99ef 100644
--- a/sound/soc/omap/omap-mcbsp.c
+++ b/sound/soc/omap/omap-mcbsp.c
@@ -308,9 +308,9 @@ static int omap_mcbsp_dai_hw_params(struct snd_pcm_substream *substream,
 			pkt_size = channels;
 		}
 
-		latency = ((((buffer_size - pkt_size) / channels) * 1000)
-				 / (params->rate_num / params->rate_den));
-
+		latency = (buffer_size - pkt_size) / channels;
+		latency = latency * USEC_PER_SEC /
+			  (params->rate_num / params->rate_den);
 		mcbsp->latency[substream->stream] = latency;
 
 		omap_mcbsp_set_threshold(substream, pkt_size);
-- 
GitLab


From 373a500e34aea97971c9d71e45edad458d3da98f Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Wed, 14 Nov 2018 13:06:22 +0200
Subject: [PATCH 1349/1890] ASoC: omap-mcpdm: Add pm_qos handling to avoid
 under/overruns with CPU_IDLE

We need to block sleep states which would require longer time to leave than
the time the DMA must react to the DMA request in order to keep the FIFO
serviced without under of overrun.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Acked-by: Jarkko Nikula <jarkko.nikula@bitmer.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/omap/omap-mcpdm.c | 43 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 42 insertions(+), 1 deletion(-)

diff --git a/sound/soc/omap/omap-mcpdm.c b/sound/soc/omap/omap-mcpdm.c
index 4c1be36c22075..7d5bdc5a28903 100644
--- a/sound/soc/omap/omap-mcpdm.c
+++ b/sound/soc/omap/omap-mcpdm.c
@@ -54,6 +54,8 @@ struct omap_mcpdm {
 	unsigned long phys_base;
 	void __iomem *io_base;
 	int irq;
+	struct pm_qos_request pm_qos_req;
+	int latency[2];
 
 	struct mutex mutex;
 
@@ -277,6 +279,9 @@ static void omap_mcpdm_dai_shutdown(struct snd_pcm_substream *substream,
 				  struct snd_soc_dai *dai)
 {
 	struct omap_mcpdm *mcpdm = snd_soc_dai_get_drvdata(dai);
+	int tx = (substream->stream == SNDRV_PCM_STREAM_PLAYBACK);
+	int stream1 = tx ? SNDRV_PCM_STREAM_PLAYBACK : SNDRV_PCM_STREAM_CAPTURE;
+	int stream2 = tx ? SNDRV_PCM_STREAM_CAPTURE : SNDRV_PCM_STREAM_PLAYBACK;
 
 	mutex_lock(&mcpdm->mutex);
 
@@ -289,6 +294,14 @@ static void omap_mcpdm_dai_shutdown(struct snd_pcm_substream *substream,
 		}
 	}
 
+	if (mcpdm->latency[stream2])
+		pm_qos_update_request(&mcpdm->pm_qos_req,
+				      mcpdm->latency[stream2]);
+	else if (mcpdm->latency[stream1])
+		pm_qos_remove_request(&mcpdm->pm_qos_req);
+
+	mcpdm->latency[stream1] = 0;
+
 	mutex_unlock(&mcpdm->mutex);
 }
 
@@ -300,7 +313,7 @@ static int omap_mcpdm_dai_hw_params(struct snd_pcm_substream *substream,
 	int stream = substream->stream;
 	struct snd_dmaengine_dai_dma_data *dma_data;
 	u32 threshold;
-	int channels;
+	int channels, latency;
 	int link_mask = 0;
 
 	channels = params_channels(params);
@@ -344,14 +357,25 @@ static int omap_mcpdm_dai_hw_params(struct snd_pcm_substream *substream,
 
 		dma_data->maxburst =
 				(MCPDM_DN_THRES_MAX - threshold) * channels;
+		latency = threshold;
 	} else {
 		/* If playback is not running assume a stereo stream to come */
 		if (!mcpdm->config[!stream].link_mask)
 			mcpdm->config[!stream].link_mask = (0x3 << 3);
 
 		dma_data->maxburst = threshold * channels;
+		latency = (MCPDM_DN_THRES_MAX - threshold);
 	}
 
+	/*
+	 * The DMA must act to a DMA request within latency time (usec) to avoid
+	 * under/overflow
+	 */
+	mcpdm->latency[stream] = latency * USEC_PER_SEC / params_rate(params);
+
+	if (!mcpdm->latency[stream])
+		mcpdm->latency[stream] = 10;
+
 	/* Check if we need to restart McPDM with this stream */
 	if (mcpdm->config[stream].link_mask &&
 	    mcpdm->config[stream].link_mask != link_mask)
@@ -366,6 +390,20 @@ static int omap_mcpdm_prepare(struct snd_pcm_substream *substream,
 				  struct snd_soc_dai *dai)
 {
 	struct omap_mcpdm *mcpdm = snd_soc_dai_get_drvdata(dai);
+	struct pm_qos_request *pm_qos_req = &mcpdm->pm_qos_req;
+	int tx = (substream->stream == SNDRV_PCM_STREAM_PLAYBACK);
+	int stream1 = tx ? SNDRV_PCM_STREAM_PLAYBACK : SNDRV_PCM_STREAM_CAPTURE;
+	int stream2 = tx ? SNDRV_PCM_STREAM_CAPTURE : SNDRV_PCM_STREAM_PLAYBACK;
+	int latency = mcpdm->latency[stream2];
+
+	/* Prevent omap hardware from hitting off between FIFO fills */
+	if (!latency || mcpdm->latency[stream1] < latency)
+		latency = mcpdm->latency[stream1];
+
+	if (pm_qos_request_active(pm_qos_req))
+		pm_qos_update_request(pm_qos_req, latency);
+	else if (latency)
+		pm_qos_add_request(pm_qos_req, PM_QOS_CPU_DMA_LATENCY, latency);
 
 	if (!omap_mcpdm_active(mcpdm)) {
 		omap_mcpdm_start(mcpdm);
@@ -427,6 +465,9 @@ static int omap_mcpdm_remove(struct snd_soc_dai *dai)
 	free_irq(mcpdm->irq, (void *)mcpdm);
 	pm_runtime_disable(mcpdm->dev);
 
+	if (pm_qos_request_active(&mcpdm->pm_qos_req))
+		pm_qos_remove_request(&mcpdm->pm_qos_req);
+
 	return 0;
 }
 
-- 
GitLab


From ffdcc3638c58d55a6fa68b6e5dfd4fb4109652eb Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Wed, 14 Nov 2018 13:06:23 +0200
Subject: [PATCH 1350/1890] ASoC: omap-dmic: Add pm_qos handling to avoid
 overruns with CPU_IDLE

We need to block sleep states which would require longer time to leave than
the time the DMA must react to the DMA request in order to keep the FIFO
serviced without overrun.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Acked-by: Jarkko Nikula <jarkko.nikula@bitmer.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/omap/omap-dmic.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/sound/soc/omap/omap-dmic.c b/sound/soc/omap/omap-dmic.c
index fe966272bd0cd..cba9645b64876 100644
--- a/sound/soc/omap/omap-dmic.c
+++ b/sound/soc/omap/omap-dmic.c
@@ -48,6 +48,8 @@ struct omap_dmic {
 	struct device *dev;
 	void __iomem *io_base;
 	struct clk *fclk;
+	struct pm_qos_request pm_qos_req;
+	int latency;
 	int fclk_freq;
 	int out_freq;
 	int clk_div;
@@ -124,6 +126,8 @@ static void omap_dmic_dai_shutdown(struct snd_pcm_substream *substream,
 
 	mutex_lock(&dmic->mutex);
 
+	pm_qos_remove_request(&dmic->pm_qos_req);
+
 	if (!dai->active)
 		dmic->active = 0;
 
@@ -228,6 +232,8 @@ static int omap_dmic_dai_hw_params(struct snd_pcm_substream *substream,
 	/* packet size is threshold * channels */
 	dma_data = snd_soc_dai_get_dma_data(dai, substream);
 	dma_data->maxburst = dmic->threshold * channels;
+	dmic->latency = (OMAP_DMIC_THRES_MAX - dmic->threshold) * USEC_PER_SEC /
+			params_rate(params);
 
 	return 0;
 }
@@ -238,6 +244,9 @@ static int omap_dmic_dai_prepare(struct snd_pcm_substream *substream,
 	struct omap_dmic *dmic = snd_soc_dai_get_drvdata(dai);
 	u32 ctrl;
 
+	if (pm_qos_request_active(&dmic->pm_qos_req))
+		pm_qos_update_request(&dmic->pm_qos_req, dmic->latency);
+
 	/* Configure uplink threshold */
 	omap_dmic_write(dmic, OMAP_DMIC_FIFO_CTRL_REG, dmic->threshold);
 
-- 
GitLab


From 2084ac6c505a58f7efdec13eba633c6aaa085ca5 Mon Sep 17 00:00:00 2001
From: Pan Bian <bianpan2016@163.com>
Date: Fri, 23 Nov 2018 15:56:33 +0800
Subject: [PATCH 1351/1890] exportfs: do not read dentry after free

The function dentry_connected calls dput(dentry) to drop the previously
acquired reference to dentry. In this case, dentry can be released.
After that, IS_ROOT(dentry) checks the condition
(dentry == dentry->d_parent), which may result in a use-after-free bug.
This patch directly compares dentry with its parent obtained before
dropping the reference.

Fixes: a056cc8934c("exportfs: stop retrying once we race with
rename/remove")

Signed-off-by: Pan Bian <bianpan2016@163.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/exportfs/expfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index c8a3dfda17646..c69927bed4eff 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -77,7 +77,7 @@ static bool dentry_connected(struct dentry *dentry)
 		struct dentry *parent = dget_parent(dentry);
 
 		dput(dentry);
-		if (IS_ROOT(dentry)) {
+		if (dentry == parent) {
 			dput(parent);
 			return false;
 		}
-- 
GitLab


From 7194eda1ba0872d917faf3b322540b4f57f11ba5 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 23 Nov 2018 15:44:00 +0100
Subject: [PATCH 1352/1890] ALSA: ac97: Fix incorrect bit shift at AC97-SPSA
 control write

The function snd_ac97_put_spsa() gets the bit shift value from the
associated private_value, but it extracts too much; the current code
extracts 8 bit values in bits 8-15, but this is a combination of two
nibbles (bits 8-11 and bits 12-15) for left and right shifts.
Due to the incorrect bits extraction, the actual shift may go beyond
the 32bit value, as spotted recently by UBSAN check:
 UBSAN: Undefined behaviour in sound/pci/ac97/ac97_codec.c:836:7
 shift exponent 68 is too large for 32-bit type 'int'

This patch fixes the shift value extraction by masking the properly
with 0x0f instead of 0xff.

Reported-and-tested-by: Meelis Roos <mroos@linux.ee>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/ac97/ac97_codec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/pci/ac97/ac97_codec.c b/sound/pci/ac97/ac97_codec.c
index f4459d1a9d67d..27b468f057dd4 100644
--- a/sound/pci/ac97/ac97_codec.c
+++ b/sound/pci/ac97/ac97_codec.c
@@ -824,7 +824,7 @@ static int snd_ac97_put_spsa(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_
 {
 	struct snd_ac97 *ac97 = snd_kcontrol_chip(kcontrol);
 	int reg = kcontrol->private_value & 0xff;
-	int shift = (kcontrol->private_value >> 8) & 0xff;
+	int shift = (kcontrol->private_value >> 8) & 0x0f;
 	int mask = (kcontrol->private_value >> 16) & 0xff;
 	// int invert = (kcontrol->private_value >> 24) & 0xff;
 	unsigned short value, old, new;
-- 
GitLab


From 39070a98d668db8fbaa2a6a6752f732cbcbb14b1 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Thu, 22 Nov 2018 12:38:12 +0100
Subject: [PATCH 1353/1890] ALSA: hda: Add ASRock N68C-S UCC the power_save
 blacklist

Power-saving is causing plops on audio start/stop on the built-in audio
of the nForce 430 based ASRock N68C-S UCC motherboard, add this model to
the power_save blacklist.

BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1525104
Cc: <stable@vger.kernel.org>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/hda_intel.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index d8eb2b5f51ae7..0bbdf1a01e763 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -2169,6 +2169,8 @@ static struct snd_pci_quirk power_save_blacklist[] = {
 	/* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */
 	SND_PCI_QUIRK(0x1849, 0xc892, "Asrock B85M-ITX", 0),
 	/* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */
+	SND_PCI_QUIRK(0x1849, 0x0397, "Asrock N68C-S UCC", 0),
+	/* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */
 	SND_PCI_QUIRK(0x1849, 0x7662, "Asrock H81M-HDS", 0),
 	/* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */
 	SND_PCI_QUIRK(0x1043, 0x8733, "Asus Prime X370-Pro", 0),
-- 
GitLab


From 8cd65271f8e545ddeed10ecc2e417936bdff168e Mon Sep 17 00:00:00 2001
From: Anisse Astier <anisse@astier.eu>
Date: Fri, 23 Nov 2018 17:59:11 +0100
Subject: [PATCH 1354/1890] ALSA: hda/realtek - fix headset mic detection for
 MSI MS-B171

MSI Cubi N 8GL (MS-B171) needs the same fixup as its older model, the
MS-B120, in order for the headset mic to be properly detected.

They both use a single 3-way jack for both mic and headset with an
ALC283 codec, with the same pins used.

Cc: stable@vger.kernel.org
Signed-off-by: Anisse Astier <anisse@astier.eu>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 970bc44a378b8..1118fd1bbf1a2 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -6532,6 +6532,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x144d, 0xc740, "Samsung Ativ book 8 (NP870Z5G)", ALC269_FIXUP_ATIV_BOOK_8),
 	SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC),
 	SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC),
+	SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC),
 	SND_PCI_QUIRK(0x17aa, 0x1036, "Lenovo P520", ALC233_FIXUP_LENOVO_MULTI_CODECS),
 	SND_PCI_QUIRK(0x17aa, 0x20f2, "Thinkpad SL410/510", ALC269_FIXUP_SKU_IGNORE),
 	SND_PCI_QUIRK(0x17aa, 0x215e, "Thinkpad L512", ALC269_FIXUP_SKU_IGNORE),
-- 
GitLab


From b5d9a07ef7736b2456b9d3c90568de25e43d8ec3 Mon Sep 17 00:00:00 2001
From: Sergey Matyukevich <geomatsi@gmail.com>
Date: Fri, 16 Nov 2018 21:21:30 +0300
Subject: [PATCH 1355/1890] arm64: sysreg: fix sparse warnings

Specify correct type for the constants to avoid
the following sparse complaints:

./arch/arm64/include/asm/sysreg.h:471:42: warning: constant 0xffffffffffffffff is so big it is unsigned long
./arch/arm64/include/asm/sysreg.h:512:42: warning: constant 0xffffffffffffffff is so big it is unsigned long

Acked-by: Will Deacon <will.deacon@arm.com>
Acked-by: Olof Johansson <olof@lixom.net>
Acked-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Signed-off-by: Sergey Matyukevich <geomatsi@gmail.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/sysreg.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 0c909c4a932ff..842fb95726610 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -468,7 +468,7 @@
 			 SCTLR_ELx_SA     | SCTLR_ELx_I    | SCTLR_ELx_WXN | \
 			 SCTLR_ELx_DSSBS | ENDIAN_CLEAR_EL2 | SCTLR_EL2_RES0)
 
-#if (SCTLR_EL2_SET ^ SCTLR_EL2_CLEAR) != 0xffffffffffffffff
+#if (SCTLR_EL2_SET ^ SCTLR_EL2_CLEAR) != 0xffffffffffffffffUL
 #error "Inconsistent SCTLR_EL2 set/clear bits"
 #endif
 
@@ -509,7 +509,7 @@
 			 SCTLR_EL1_UMA | SCTLR_ELx_WXN     | ENDIAN_CLEAR_EL1 |\
 			 SCTLR_ELx_DSSBS | SCTLR_EL1_NTWI  | SCTLR_EL1_RES0)
 
-#if (SCTLR_EL1_SET ^ SCTLR_EL1_CLEAR) != 0xffffffffffffffff
+#if (SCTLR_EL1_SET ^ SCTLR_EL1_CLEAR) != 0xffffffffffffffffUL
 #error "Inconsistent SCTLR_EL1 set/clear bits"
 #endif
 
-- 
GitLab


From 4f9f49646a5733c0c2bd49940673dde89a9c5add Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Wed, 21 Nov 2018 15:07:00 +0000
Subject: [PATCH 1356/1890] arm64: cpufeature: Fix mismerge of
 CONFIG_ARM64_SSBD block

When merging support for SSBD and the CRC32 instructions, the conflict
resolution for the new capability entries in arm64_features[]
inadvertedly predicated the availability of the CRC32 instructions on
CONFIG_ARM64_SSBD, despite the functionality being entirely unrelated.

Move the #ifdef CONFIG_ARM64_SSBD down so that it only covers the SSBD
capability.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/cpufeature.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index af50064dea51a..aec5ecb85737e 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1333,7 +1333,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.cpu_enable = cpu_enable_hw_dbm,
 	},
 #endif
-#ifdef CONFIG_ARM64_SSBD
 	{
 		.desc = "CRC32 instructions",
 		.capability = ARM64_HAS_CRC32,
@@ -1343,6 +1342,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.field_pos = ID_AA64ISAR0_CRC32_SHIFT,
 		.min_field_value = 1,
 	},
+#ifdef CONFIG_ARM64_SSBD
 	{
 		.desc = "Speculative Store Bypassing Safe (SSBS)",
 		.capability = ARM64_SSBS,
-- 
GitLab


From 5cd8d46ea1562be80063f53c7c6a5f40224de623 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Tue, 20 Nov 2018 13:00:18 -0500
Subject: [PATCH 1357/1890] packet: copy user buffers before orphan or clone

tpacket_snd sends packets with user pages linked into skb frags. It
notifies that pages can be reused when the skb is released by setting
skb->destructor to tpacket_destruct_skb.

This can cause data corruption if the skb is orphaned (e.g., on
transmit through veth) or cloned (e.g., on mirror to another psock).

Create a kernel-private copy of data in these cases, same as tun/tap
zerocopy transmission. Reuse that infrastructure: mark the skb as
SKBTX_ZEROCOPY_FRAG, which will trigger copy in skb_orphan_frags(_rx).

Unlike other zerocopy packets, do not set shinfo destructor_arg to
struct ubuf_info. tpacket_destruct_skb already uses that ptr to notify
when the original skb is released and a timestamp is recorded. Do not
change this timestamp behavior. The ubuf_info->callback is not needed
anyway, as no zerocopy notification is expected.

Mark destructor_arg as not-a-uarg by setting the lower bit to 1. The
resulting value is not a valid ubuf_info pointer, nor a valid
tpacket_snd frame address. Add skb_zcopy_.._nouarg helpers for this.

The fix relies on features introduced in commit 52267790ef52 ("sock:
add MSG_ZEROCOPY"), so can be backported as is only to 4.14.

Tested with from `./in_netns.sh ./txring_overwrite` from
http://github.com/wdebruij/kerneltools/tests

Fixes: 69e3c75f4d54 ("net: TX_RING and packet mmap")
Reported-by: Anand H. Krishnan <anandhkrishnan@gmail.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 18 +++++++++++++++++-
 net/packet/af_packet.c |  4 ++--
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 0ba687454267f..0d1b2c3f127b3 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1326,6 +1326,22 @@ static inline void skb_zcopy_set(struct sk_buff *skb, struct ubuf_info *uarg)
 	}
 }
 
+static inline void skb_zcopy_set_nouarg(struct sk_buff *skb, void *val)
+{
+	skb_shinfo(skb)->destructor_arg = (void *)((uintptr_t) val | 0x1UL);
+	skb_shinfo(skb)->tx_flags |= SKBTX_ZEROCOPY_FRAG;
+}
+
+static inline bool skb_zcopy_is_nouarg(struct sk_buff *skb)
+{
+	return (uintptr_t) skb_shinfo(skb)->destructor_arg & 0x1UL;
+}
+
+static inline void *skb_zcopy_get_nouarg(struct sk_buff *skb)
+{
+	return (void *)((uintptr_t) skb_shinfo(skb)->destructor_arg & ~0x1UL);
+}
+
 /* Release a reference on a zerocopy structure */
 static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy)
 {
@@ -1335,7 +1351,7 @@ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy)
 		if (uarg->callback == sock_zerocopy_callback) {
 			uarg->zerocopy = uarg->zerocopy && zerocopy;
 			sock_zerocopy_put(uarg);
-		} else {
+		} else if (!skb_zcopy_is_nouarg(skb)) {
 			uarg->callback(uarg, zerocopy);
 		}
 
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index ec3095f13aaee..a74650e98f423 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2394,7 +2394,7 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
 		void *ph;
 		__u32 ts;
 
-		ph = skb_shinfo(skb)->destructor_arg;
+		ph = skb_zcopy_get_nouarg(skb);
 		packet_dec_pending(&po->tx_ring);
 
 		ts = __packet_set_timestamp(po, ph, skb);
@@ -2461,7 +2461,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 	skb->mark = po->sk.sk_mark;
 	skb->tstamp = sockc->transmit_time;
 	sock_tx_timestamp(&po->sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags);
-	skb_shinfo(skb)->destructor_arg = ph.raw;
+	skb_zcopy_set_nouarg(skb, ph.raw);
 
 	skb_reserve(skb, hlen);
 	skb_reset_network_header(skb);
-- 
GitLab


From 896585d48e8e9ba44cd1754fbce8537feffcc1a5 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Wed, 21 Nov 2018 21:52:33 +0800
Subject: [PATCH 1358/1890] net/ipv6: re-do dad when interface has IFF_NOARP
 flag change

When we add a new IPv6 address, we should also join corresponding solicited-node
multicast address, unless the interface has IFF_NOARP flag, as function
addrconf_join_solict() did. But if we remove IFF_NOARP flag later, we do
not do dad and add the mcast address. So we will drop corresponding neighbour
discovery message that came from other nodes.

A typical example is after creating a ipvlan with mode l3, setting up an ipv6
address and changing the mode to l2. Then we will not be able to ping this
address as the interface doesn't join related solicited-node mcast address.

Fix it by re-doing dad when interface changed IFF_NOARP flag. Then we will add
corresponding mcast group and check if there is a duplicate address on the
network.

Reported-by: Jianlin Shi <jishi@redhat.com>
Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 63a808d5af157..045597b9a7c05 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -179,7 +179,7 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp);
 static void addrconf_dad_work(struct work_struct *w);
 static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id,
 				   bool send_na);
-static void addrconf_dad_run(struct inet6_dev *idev);
+static void addrconf_dad_run(struct inet6_dev *idev, bool restart);
 static void addrconf_rs_timer(struct timer_list *t);
 static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
 static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
@@ -3439,6 +3439,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 			   void *ptr)
 {
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct netdev_notifier_change_info *change_info;
 	struct netdev_notifier_changeupper_info *info;
 	struct inet6_dev *idev = __in6_dev_get(dev);
 	struct net *net = dev_net(dev);
@@ -3513,7 +3514,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 				break;
 			}
 
-			if (idev) {
+			if (!IS_ERR_OR_NULL(idev)) {
 				if (idev->if_flags & IF_READY) {
 					/* device is already configured -
 					 * but resend MLD reports, we might
@@ -3521,6 +3522,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 					 * multicast snooping switches
 					 */
 					ipv6_mc_up(idev);
+					change_info = ptr;
+					if (change_info->flags_changed & IFF_NOARP)
+						addrconf_dad_run(idev, true);
 					rt6_sync_up(dev, RTNH_F_LINKDOWN);
 					break;
 				}
@@ -3555,7 +3559,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 
 		if (!IS_ERR_OR_NULL(idev)) {
 			if (run_pending)
-				addrconf_dad_run(idev);
+				addrconf_dad_run(idev, false);
 
 			/* Device has an address by now */
 			rt6_sync_up(dev, RTNH_F_DEAD);
@@ -4173,16 +4177,19 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id,
 		addrconf_verify_rtnl();
 }
 
-static void addrconf_dad_run(struct inet6_dev *idev)
+static void addrconf_dad_run(struct inet6_dev *idev, bool restart)
 {
 	struct inet6_ifaddr *ifp;
 
 	read_lock_bh(&idev->lock);
 	list_for_each_entry(ifp, &idev->addr_list, if_list) {
 		spin_lock(&ifp->lock);
-		if (ifp->flags & IFA_F_TENTATIVE &&
-		    ifp->state == INET6_IFADDR_STATE_DAD)
+		if ((ifp->flags & IFA_F_TENTATIVE &&
+		     ifp->state == INET6_IFADDR_STATE_DAD) || restart) {
+			if (restart)
+				ifp->state = INET6_IFADDR_STATE_PREDAD;
 			addrconf_dad_kick(ifp);
+		}
 		spin_unlock(&ifp->lock);
 	}
 	read_unlock_bh(&idev->lock);
-- 
GitLab


From 605108acfe6233b72e2f803aa1cb59a2af3001ca Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Wed, 21 Nov 2018 18:21:35 +0100
Subject: [PATCH 1359/1890] net: don't keep lonely packets forever in the gro
 hash

Eric noted that with UDP GRO and NAPI timeout, we could keep a single
UDP packet inside the GRO hash forever, if the related NAPI instance
calls napi_gro_complete() at an higher frequency than the NAPI timeout.
Willem noted that even TCP packets could be trapped there, till the
next retransmission.
This patch tries to address the issue, flushing the old packets -
those with a NAPI_GRO_CB age before the current jiffy - before scheduling
the NAPI timeout. The rationale is that such a timeout should be
well below a jiffy and we are not flushing packets eligible for sane GRO.

v1  -> v2:
 - clarified the commit message and comment

RFC -> v1:
 - added 'Fixes tags', cleaned-up the wording.

Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Fixes: 3b47d30396ba ("net: gro: add a per device gro flush timer")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 066aa902d85c3..ddc551f24ba2a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5970,11 +5970,14 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
 		if (work_done)
 			timeout = n->dev->gro_flush_timeout;
 
+		/* When the NAPI instance uses a timeout and keeps postponing
+		 * it, we need to bound somehow the time packets are kept in
+		 * the GRO layer
+		 */
+		napi_gro_flush(n, !!timeout);
 		if (timeout)
 			hrtimer_start(&n->timer, ns_to_ktime(timeout),
 				      HRTIMER_MODE_REL_PINNED);
-		else
-			napi_gro_flush(n, false);
 	}
 	if (unlikely(!list_empty(&n->poll_list))) {
 		/* If n->poll_list is not empty, we need to mask irqs */
-- 
GitLab


From 484afd1bd3fc6f9f5347289fc8b285aa65f67054 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Wed, 21 Nov 2018 18:23:53 +0100
Subject: [PATCH 1360/1890] net/sched: act_police: add missing spinlock
 initialization

commit f2cbd4852820 ("net/sched: act_police: fix race condition on state
variables") introduces a new spinlock, but forgets its initialization.
Ensure that tcf_police_init() initializes 'tcfp_lock' every time a 'police'
action is newly created, to avoid the following lockdep splat:

 INFO: trying to register non-static key.
 the code is fine but needs lockdep annotation.
 turning off the locking correctness validator.
 <...>
 Call Trace:
  dump_stack+0x85/0xcb
  register_lock_class+0x581/0x590
  __lock_acquire+0xd4/0x1330
  ? tcf_police_init+0x2fa/0x650 [act_police]
  ? lock_acquire+0x9e/0x1a0
  lock_acquire+0x9e/0x1a0
  ? tcf_police_init+0x2fa/0x650 [act_police]
  ? tcf_police_init+0x55a/0x650 [act_police]
  _raw_spin_lock_bh+0x34/0x40
  ? tcf_police_init+0x2fa/0x650 [act_police]
  tcf_police_init+0x2fa/0x650 [act_police]
  tcf_action_init_1+0x384/0x4c0
  tcf_action_init+0xf6/0x160
  tcf_action_add+0x73/0x170
  tc_ctl_action+0x122/0x160
  rtnetlink_rcv_msg+0x2a4/0x490
  ? netlink_deliver_tap+0x99/0x400
  ? validate_linkmsg+0x370/0x370
  netlink_rcv_skb+0x4d/0x130
  netlink_unicast+0x196/0x230
  netlink_sendmsg+0x2e5/0x3e0
  sock_sendmsg+0x36/0x40
  ___sys_sendmsg+0x280/0x2f0
  ? _raw_spin_unlock+0x24/0x30
  ? handle_pte_fault+0xafe/0xf30
  ? find_held_lock+0x2d/0x90
  ? syscall_trace_enter+0x1df/0x360
  ? __sys_sendmsg+0x5e/0xa0
  __sys_sendmsg+0x5e/0xa0
  do_syscall_64+0x60/0x210
  entry_SYSCALL_64_after_hwframe+0x49/0xbe
 RIP: 0033:0x7f1841c7cf10
 Code: c3 48 8b 05 82 6f 2c 00 f7 db 64 89 18 48 83 cb ff eb dd 0f 1f 80 00 00 00 00 83 3d 8d d0 2c 00 00 75 10 b8 2e 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 ae cc 00 00 48 89 04 24
 RSP: 002b:00007ffcf9df4d68 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
 RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007f1841c7cf10
 RDX: 0000000000000000 RSI: 00007ffcf9df4dc0 RDI: 0000000000000003
 RBP: 000000005bf56105 R08: 0000000000000002 R09: 00007ffcf9df8edc
 R10: 00007ffcf9df47e0 R11: 0000000000000246 R12: 0000000000671be0
 R13: 00007ffcf9df4e84 R14: 0000000000000008 R15: 0000000000000000

Fixes: f2cbd4852820 ("net/sched: act_police: fix race condition on state variables")
Reported-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_police.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index ee4665a5a0222..37c9b8f0e10f0 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -124,6 +124,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
 			return ret;
 		}
 		ret = ACT_P_CREATED;
+		spin_lock_init(&(to_police(*a)->tcfp_lock));
 	} else if (!ovr) {
 		tcf_idr_release(*a, bind);
 		return -EEXIST;
-- 
GitLab


From e59ff2c49ae16e1d179de679aca81405829aee6c Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Thu, 22 Nov 2018 14:36:30 +0800
Subject: [PATCH 1361/1890] virtio-net: disable guest csum during XDP set

We don't disable VIRTIO_NET_F_GUEST_CSUM if XDP was set. This means we
can receive partial csumed packets with metadata kept in the
vnet_hdr. This may have several side effects:

- It could be overridden by header adjustment, thus is might be not
  correct after XDP processing.
- There's no way to pass such metadata information through
  XDP_REDIRECT to another driver.
- XDP does not support checksum offload right now.

So simply disable guest csum if possible in this the case of XDP.

Fixes: 3f93522ffab2d ("virtio-net: switch off offloads on demand if possible on XDP set")
Reported-by: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Pavel Popa <pashinho1990@gmail.com>
Cc: David Ahern <dsahern@gmail.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/virtio_net.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 3e2c041d76ac1..9b5ace538824a 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -70,7 +70,8 @@ static const unsigned long guest_offloads[] = {
 	VIRTIO_NET_F_GUEST_TSO4,
 	VIRTIO_NET_F_GUEST_TSO6,
 	VIRTIO_NET_F_GUEST_ECN,
-	VIRTIO_NET_F_GUEST_UFO
+	VIRTIO_NET_F_GUEST_UFO,
+	VIRTIO_NET_F_GUEST_CSUM
 };
 
 struct virtnet_stat_desc {
@@ -2334,9 +2335,6 @@ static int virtnet_clear_guest_offloads(struct virtnet_info *vi)
 	if (!vi->guest_offloads)
 		return 0;
 
-	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))
-		offloads = 1ULL << VIRTIO_NET_F_GUEST_CSUM;
-
 	return virtnet_set_guest_offloads(vi, offloads);
 }
 
@@ -2346,8 +2344,6 @@ static int virtnet_restore_guest_offloads(struct virtnet_info *vi)
 
 	if (!vi->guest_offloads)
 		return 0;
-	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))
-		offloads |= 1ULL << VIRTIO_NET_F_GUEST_CSUM;
 
 	return virtnet_set_guest_offloads(vi, offloads);
 }
-- 
GitLab


From 18ba58e1c234ea1a2d9835ac8c1735d965ce4640 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Thu, 22 Nov 2018 14:36:31 +0800
Subject: [PATCH 1362/1890] virtio-net: fail XDP set if guest csum is
 negotiated

We don't support partial csumed packet since its metadata will be lost
or incorrect during XDP processing. So fail the XDP set if guest_csum
feature is negotiated.

Fixes: f600b6905015 ("virtio_net: Add XDP support")
Reported-by: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Pavel Popa <pashinho1990@gmail.com>
Cc: David Ahern <dsahern@gmail.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/virtio_net.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 9b5ace538824a..cecfd77c9f3ca 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2361,8 +2361,9 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
 	    && (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) ||
 	        virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) ||
 	        virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
-		virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO))) {
-		NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing LRO, disable LRO first");
+		virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) ||
+		virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))) {
+		NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing LRO/CSUM, disable LRO/CSUM first");
 		return -EOPNOTSUPP;
 	}
 
-- 
GitLab


From ca088320a02537f36c243ac21794525d8eabb3bd Mon Sep 17 00:00:00 2001
From: Yixian Liu <liuyixian@huawei.com>
Date: Fri, 23 Nov 2018 15:46:07 +0800
Subject: [PATCH 1363/1890] RDMA/hns: Bugfix pbl configuration for rereg mr

Current hns driver assigned the first two PBL page addresses from previous
registered MR to the hardware when reregister MR changing the memory
locations occurred. This will lead to PBL addressing error as the PBL has
already been released. This patch fixes this wrong assignment by using the
page address from new allocated PBL.

Fixes: a2c80b7b4119 ("RDMA/hns: Add rereg mr support for hip08")
Signed-off-by: Yixian Liu <liuyixian@huawei.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 128 ++++++++++-----------
 1 file changed, 60 insertions(+), 68 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index a4c62ae23a9ae..3beb1523e17c2 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -1756,10 +1756,9 @@ static int hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port,
 	return hns_roce_cmq_send(hr_dev, &desc, 1);
 }
 
-static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
-				  unsigned long mtpt_idx)
+static int set_mtpt_pbl(struct hns_roce_v2_mpt_entry *mpt_entry,
+			struct hns_roce_mr *mr)
 {
-	struct hns_roce_v2_mpt_entry *mpt_entry;
 	struct scatterlist *sg;
 	u64 page_addr;
 	u64 *pages;
@@ -1767,6 +1766,53 @@ static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
 	int len;
 	int entry;
 
+	mpt_entry->pbl_size = cpu_to_le32(mr->pbl_size);
+	mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(mr->pbl_ba >> 3));
+	roce_set_field(mpt_entry->byte_48_mode_ba,
+		       V2_MPT_BYTE_48_PBL_BA_H_M, V2_MPT_BYTE_48_PBL_BA_H_S,
+		       upper_32_bits(mr->pbl_ba >> 3));
+
+	pages = (u64 *)__get_free_page(GFP_KERNEL);
+	if (!pages)
+		return -ENOMEM;
+
+	i = 0;
+	for_each_sg(mr->umem->sg_head.sgl, sg, mr->umem->nmap, entry) {
+		len = sg_dma_len(sg) >> PAGE_SHIFT;
+		for (j = 0; j < len; ++j) {
+			page_addr = sg_dma_address(sg) +
+				(j << mr->umem->page_shift);
+			pages[i] = page_addr >> 6;
+			/* Record the first 2 entry directly to MTPT table */
+			if (i >= HNS_ROCE_V2_MAX_INNER_MTPT_NUM - 1)
+				goto found;
+			i++;
+		}
+	}
+found:
+	mpt_entry->pa0_l = cpu_to_le32(lower_32_bits(pages[0]));
+	roce_set_field(mpt_entry->byte_56_pa0_h, V2_MPT_BYTE_56_PA0_H_M,
+		       V2_MPT_BYTE_56_PA0_H_S, upper_32_bits(pages[0]));
+
+	mpt_entry->pa1_l = cpu_to_le32(lower_32_bits(pages[1]));
+	roce_set_field(mpt_entry->byte_64_buf_pa1, V2_MPT_BYTE_64_PA1_H_M,
+		       V2_MPT_BYTE_64_PA1_H_S, upper_32_bits(pages[1]));
+	roce_set_field(mpt_entry->byte_64_buf_pa1,
+		       V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M,
+		       V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S,
+		       mr->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
+
+	free_page((unsigned long)pages);
+
+	return 0;
+}
+
+static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
+				  unsigned long mtpt_idx)
+{
+	struct hns_roce_v2_mpt_entry *mpt_entry;
+	int ret;
+
 	mpt_entry = mb_buf;
 	memset(mpt_entry, 0, sizeof(*mpt_entry));
 
@@ -1781,7 +1827,6 @@ static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
 		       mr->pbl_ba_pg_sz + PG_SHIFT_OFFSET);
 	roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
 		       V2_MPT_BYTE_4_PD_S, mr->pd);
-	mpt_entry->byte_4_pd_hop_st = cpu_to_le32(mpt_entry->byte_4_pd_hop_st);
 
 	roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RA_EN_S, 0);
 	roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1);
@@ -1796,13 +1841,11 @@ static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
 		     (mr->access & IB_ACCESS_REMOTE_WRITE ? 1 : 0));
 	roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_LW_EN_S,
 		     (mr->access & IB_ACCESS_LOCAL_WRITE ? 1 : 0));
-	mpt_entry->byte_8_mw_cnt_en = cpu_to_le32(mpt_entry->byte_8_mw_cnt_en);
 
 	roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S,
 		     mr->type == MR_TYPE_MR ? 0 : 1);
 	roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_INNER_PA_VLD_S,
 		     1);
-	mpt_entry->byte_12_mw_pa = cpu_to_le32(mpt_entry->byte_12_mw_pa);
 
 	mpt_entry->len_l = cpu_to_le32(lower_32_bits(mr->size));
 	mpt_entry->len_h = cpu_to_le32(upper_32_bits(mr->size));
@@ -1813,53 +1856,9 @@ static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
 	if (mr->type == MR_TYPE_DMA)
 		return 0;
 
-	mpt_entry->pbl_size = cpu_to_le32(mr->pbl_size);
-
-	mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(mr->pbl_ba >> 3));
-	roce_set_field(mpt_entry->byte_48_mode_ba, V2_MPT_BYTE_48_PBL_BA_H_M,
-		       V2_MPT_BYTE_48_PBL_BA_H_S,
-		       upper_32_bits(mr->pbl_ba >> 3));
-	mpt_entry->byte_48_mode_ba = cpu_to_le32(mpt_entry->byte_48_mode_ba);
-
-	pages = (u64 *)__get_free_page(GFP_KERNEL);
-	if (!pages)
-		return -ENOMEM;
-
-	i = 0;
-	for_each_sg(mr->umem->sg_head.sgl, sg, mr->umem->nmap, entry) {
-		len = sg_dma_len(sg) >> PAGE_SHIFT;
-		for (j = 0; j < len; ++j) {
-			page_addr = sg_dma_address(sg) +
-				    (j << mr->umem->page_shift);
-			pages[i] = page_addr >> 6;
-
-			/* Record the first 2 entry directly to MTPT table */
-			if (i >= HNS_ROCE_V2_MAX_INNER_MTPT_NUM - 1)
-				goto found;
-			i++;
-		}
-	}
-
-found:
-	mpt_entry->pa0_l = cpu_to_le32(lower_32_bits(pages[0]));
-	roce_set_field(mpt_entry->byte_56_pa0_h, V2_MPT_BYTE_56_PA0_H_M,
-		       V2_MPT_BYTE_56_PA0_H_S,
-		       upper_32_bits(pages[0]));
-	mpt_entry->byte_56_pa0_h = cpu_to_le32(mpt_entry->byte_56_pa0_h);
-
-	mpt_entry->pa1_l = cpu_to_le32(lower_32_bits(pages[1]));
-	roce_set_field(mpt_entry->byte_64_buf_pa1, V2_MPT_BYTE_64_PA1_H_M,
-		       V2_MPT_BYTE_64_PA1_H_S, upper_32_bits(pages[1]));
+	ret = set_mtpt_pbl(mpt_entry, mr);
 
-	free_page((unsigned long)pages);
-
-	roce_set_field(mpt_entry->byte_64_buf_pa1,
-		       V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M,
-		       V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S,
-		       mr->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
-	mpt_entry->byte_64_buf_pa1 = cpu_to_le32(mpt_entry->byte_64_buf_pa1);
-
-	return 0;
+	return ret;
 }
 
 static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev,
@@ -1868,6 +1867,7 @@ static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev,
 					u64 size, void *mb_buf)
 {
 	struct hns_roce_v2_mpt_entry *mpt_entry = mb_buf;
+	int ret = 0;
 
 	if (flags & IB_MR_REREG_PD) {
 		roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
@@ -1880,14 +1880,14 @@ static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev,
 			     V2_MPT_BYTE_8_BIND_EN_S,
 			     (mr_access_flags & IB_ACCESS_MW_BIND ? 1 : 0));
 		roce_set_bit(mpt_entry->byte_8_mw_cnt_en,
-			   V2_MPT_BYTE_8_ATOMIC_EN_S,
-			   (mr_access_flags & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0));
+			     V2_MPT_BYTE_8_ATOMIC_EN_S,
+			     mr_access_flags & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0);
 		roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RR_EN_S,
-			     (mr_access_flags & IB_ACCESS_REMOTE_READ ? 1 : 0));
+			     mr_access_flags & IB_ACCESS_REMOTE_READ ? 1 : 0);
 		roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RW_EN_S,
-			    (mr_access_flags & IB_ACCESS_REMOTE_WRITE ? 1 : 0));
+			     mr_access_flags & IB_ACCESS_REMOTE_WRITE ? 1 : 0);
 		roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_LW_EN_S,
-			     (mr_access_flags & IB_ACCESS_LOCAL_WRITE ? 1 : 0));
+			     mr_access_flags & IB_ACCESS_LOCAL_WRITE ? 1 : 0);
 	}
 
 	if (flags & IB_MR_REREG_TRANS) {
@@ -1896,21 +1896,13 @@ static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev,
 		mpt_entry->len_l = cpu_to_le32(lower_32_bits(size));
 		mpt_entry->len_h = cpu_to_le32(upper_32_bits(size));
 
-		mpt_entry->pbl_size = cpu_to_le32(mr->pbl_size);
-		mpt_entry->pbl_ba_l =
-				cpu_to_le32(lower_32_bits(mr->pbl_ba >> 3));
-		roce_set_field(mpt_entry->byte_48_mode_ba,
-			       V2_MPT_BYTE_48_PBL_BA_H_M,
-			       V2_MPT_BYTE_48_PBL_BA_H_S,
-			       upper_32_bits(mr->pbl_ba >> 3));
-		mpt_entry->byte_48_mode_ba =
-				cpu_to_le32(mpt_entry->byte_48_mode_ba);
-
 		mr->iova = iova;
 		mr->size = size;
+
+		ret = set_mtpt_pbl(mpt_entry, mr);
 	}
 
-	return 0;
+	return ret;
 }
 
 static int hns_roce_v2_frmr_write_mtpt(void *mb_buf, struct hns_roce_mr *mr)
-- 
GitLab


From 1efb6ee3edea57f57f9fb05dba8dcb3f7333f61f Mon Sep 17 00:00:00 2001
From: Martynas Pumputis <m@lambda.lt>
Date: Fri, 23 Nov 2018 17:43:26 +0100
Subject: [PATCH 1364/1890] bpf: fix check of allowed specifiers in
 bpf_trace_printk

A format string consisting of "%p" or "%s" followed by an invalid
specifier (e.g. "%p%\n" or "%s%") could pass the check which
would make format_decode (lib/vsprintf.c) to warn.

Fixes: 9c959c863f82 ("tracing: Allow BPF programs to call bpf_trace_printk()")
Reported-by: syzbot+1ec5c5ec949c4adaa0c4@syzkaller.appspotmail.com
Signed-off-by: Martynas Pumputis <m@lambda.lt>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/trace/bpf_trace.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 08fcfe440c637..9864a35c8bb57 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -196,11 +196,13 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
 			i++;
 		} else if (fmt[i] == 'p' || fmt[i] == 's') {
 			mod[fmt_cnt]++;
-			i++;
-			if (!isspace(fmt[i]) && !ispunct(fmt[i]) && fmt[i] != 0)
+			/* disallow any further format extensions */
+			if (fmt[i + 1] != 0 &&
+			    !isspace(fmt[i + 1]) &&
+			    !ispunct(fmt[i + 1]))
 				return -EINVAL;
 			fmt_cnt++;
-			if (fmt[i - 1] == 's') {
+			if (fmt[i] == 's') {
 				if (str_seen)
 					/* allow only one '%s' per fmt string */
 					return -EINVAL;
-- 
GitLab


From 5ed9dc99107144f83b6c1bb52a69b58875baf540 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Thu, 22 Nov 2018 16:15:28 +0800
Subject: [PATCH 1365/1890] team: no need to do team_notify_peers or
 team_mcast_rejoin when disabling port

team_notify_peers() will send ARP and NA to notify peers. team_mcast_rejoin()
will send multicast join group message to notify peers. We should do this when
enabling/changed to a new port. But it doesn't make sense to do it when a port
is disabled.

On the other hand, when we set mcast_rejoin_count to 2, and do a failover,
team_port_disable() will increase mcast_rejoin.count_pending to 2 and then
team_port_enable() will increase mcast_rejoin.count_pending to 4. We will send
4 mcast rejoin messages at latest, which will make user confused. The same
with notify_peers.count.

Fix it by deleting team_notify_peers() and team_mcast_rejoin() in
team_port_disable().

Reported-by: Liang Li <liali@redhat.com>
Fixes: fc423ff00df3a ("team: add peer notification")
Fixes: 492b200efdd20 ("team: add support for sending multicast rejoins")
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/team/team.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index db633ae9f784a..364f514d56d87 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -985,8 +985,6 @@ static void team_port_disable(struct team *team,
 	team->en_port_count--;
 	team_queue_override_port_del(team, port);
 	team_adjust_ops(team);
-	team_notify_peers(team);
-	team_mcast_rejoin(team);
 	team_lower_state_changed(port);
 }
 
-- 
GitLab


From c44c749d3b6fdfca39002e7e48e03fe9f9fe37a3 Mon Sep 17 00:00:00 2001
From: Yangtao Li <tiny.windzz@gmail.com>
Date: Thu, 22 Nov 2018 07:34:41 -0500
Subject: [PATCH 1366/1890] net: amd: add missing of_node_put()

of_find_node_by_path() acquires a reference to the node
returned by it and that reference needs to be dropped by its caller.
This place doesn't do that, so fix it.

Signed-off-by: Yangtao Li <tiny.windzz@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amd/sunlance.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/amd/sunlance.c b/drivers/net/ethernet/amd/sunlance.c
index b4fc0ed5bce83..9d48998268233 100644
--- a/drivers/net/ethernet/amd/sunlance.c
+++ b/drivers/net/ethernet/amd/sunlance.c
@@ -1419,7 +1419,7 @@ static int sparc_lance_probe_one(struct platform_device *op,
 
 			prop = of_get_property(nd, "tpe-link-test?", NULL);
 			if (!prop)
-				goto no_link_test;
+				goto node_put;
 
 			if (strcmp(prop, "true")) {
 				printk(KERN_NOTICE "SunLance: warning: overriding option "
@@ -1428,6 +1428,8 @@ static int sparc_lance_probe_one(struct platform_device *op,
 				       "to ecd@skynet.be\n");
 				auxio_set_lte(AUXIO_LTE_ON);
 			}
+node_put:
+			of_node_put(nd);
 no_link_test:
 			lp->auto_select = 1;
 			lp->tpe = 0;
-- 
GitLab


From ef2a7cf1d8831535b8991459567b385661eb4a36 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Date: Fri, 23 Nov 2018 18:28:01 +0100
Subject: [PATCH 1367/1890] net: thunderx: set tso_hdrs pointer to NULL in
 nicvf_free_snd_queue

Reset snd_queue tso_hdrs pointer to NULL in nicvf_free_snd_queue routine
since it is used to check if tso dma descriptor queue has been previously
allocated. The issue can be triggered with the following reproducer:

$ip link set dev enP2p1s0v0 xdpdrv obj xdp_dummy.o
$ip link set dev enP2p1s0v0 xdpdrv off

[  341.467649] WARNING: CPU: 74 PID: 2158 at mm/vmalloc.c:1511 __vunmap+0x98/0xe0
[  341.515010] Hardware name: GIGABYTE H270-T70/MT70-HD0, BIOS T49 02/02/2018
[  341.521874] pstate: 60400005 (nZCv daif +PAN -UAO)
[  341.526654] pc : __vunmap+0x98/0xe0
[  341.530132] lr : __vunmap+0x98/0xe0
[  341.533609] sp : ffff00001c5db860
[  341.536913] x29: ffff00001c5db860 x28: 0000000000020000
[  341.542214] x27: ffff810feb5090b0 x26: ffff000017e57000
[  341.547515] x25: 0000000000000000 x24: 00000000fbd00000
[  341.552816] x23: 0000000000000000 x22: ffff810feb5090b0
[  341.558117] x21: 0000000000000000 x20: 0000000000000000
[  341.563418] x19: ffff000017e57000 x18: 0000000000000000
[  341.568719] x17: 0000000000000000 x16: 0000000000000000
[  341.574020] x15: 0000000000000010 x14: ffffffffffffffff
[  341.579321] x13: ffff00008985eb27 x12: ffff00000985eb2f
[  341.584622] x11: ffff0000096b3000 x10: ffff00001c5db510
[  341.589923] x9 : 00000000ffffffd0 x8 : ffff0000086868e8
[  341.595224] x7 : 3430303030303030 x6 : 00000000000006ef
[  341.600525] x5 : 00000000003fffff x4 : 0000000000000000
[  341.605825] x3 : 0000000000000000 x2 : ffffffffffffffff
[  341.611126] x1 : ffff0000096b3728 x0 : 0000000000000038
[  341.616428] Call trace:
[  341.618866]  __vunmap+0x98/0xe0
[  341.621997]  vunmap+0x3c/0x50
[  341.624961]  arch_dma_free+0x68/0xa0
[  341.628534]  dma_direct_free+0x50/0x80
[  341.632285]  nicvf_free_resources+0x160/0x2d8 [nicvf]
[  341.637327]  nicvf_config_data_transfer+0x174/0x5e8 [nicvf]
[  341.642890]  nicvf_stop+0x298/0x340 [nicvf]
[  341.647066]  __dev_close_many+0x9c/0x108
[  341.650977]  dev_close_many+0xa4/0x158
[  341.654720]  rollback_registered_many+0x140/0x530
[  341.659414]  rollback_registered+0x54/0x80
[  341.663499]  unregister_netdevice_queue+0x9c/0xe8
[  341.668192]  unregister_netdev+0x28/0x38
[  341.672106]  nicvf_remove+0xa4/0xa8 [nicvf]
[  341.676280]  nicvf_shutdown+0x20/0x30 [nicvf]
[  341.680630]  pci_device_shutdown+0x44/0x88
[  341.684720]  device_shutdown+0x144/0x250
[  341.688640]  kernel_restart_prepare+0x44/0x50
[  341.692986]  kernel_restart+0x20/0x68
[  341.696638]  __se_sys_reboot+0x210/0x238
[  341.700550]  __arm64_sys_reboot+0x24/0x30
[  341.704555]  el0_svc_handler+0x94/0x110
[  341.708382]  el0_svc+0x8/0xc
[  341.711252] ---[ end trace 3f4019c8439959c9 ]---
[  341.715874] page:ffff7e0003ef4000 count:0 mapcount:0 mapping:0000000000000000 index:0x4
[  341.723872] flags: 0x1fffe000000000()
[  341.727527] raw: 001fffe000000000 ffff7e0003f1a008 ffff7e0003ef4048 0000000000000000
[  341.735263] raw: 0000000000000004 0000000000000000 00000000ffffffff 0000000000000000
[  341.742994] page dumped because: VM_BUG_ON_PAGE(page_ref_count(page) == 0)

where xdp_dummy.c is a simple bpf program that forwards the incoming
frames to the network stack (available here:
https://github.com/altoor/xdp_walkthrough_examples/blob/master/sample_1/xdp_dummy.c)

Fixes: 05c773f52b96 ("net: thunderx: Add basic XDP support")
Fixes: 4863dea3fab0 ("net: Adding support for Cavium ThunderX network controller")
Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index 187a249ff2d1d..fcaf18fa39048 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -585,10 +585,12 @@ static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq)
 	if (!sq->dmem.base)
 		return;
 
-	if (sq->tso_hdrs)
+	if (sq->tso_hdrs) {
 		dma_free_coherent(&nic->pdev->dev,
 				  sq->dmem.q_len * TSO_HEADER_SIZE,
 				  sq->tso_hdrs, sq->tso_hdrs_phys);
+		sq->tso_hdrs = NULL;
+	}
 
 	/* Free pending skbs in the queue */
 	smp_rmb();
-- 
GitLab


From e7b9fb4f545b1f7885e7c642643828f93d3d79c9 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <festevam@gmail.com>
Date: Fri, 23 Nov 2018 15:46:50 -0200
Subject: [PATCH 1368/1890] dt-bindings: dsa: Fix typo in "probed"

The correct form is "can be probed", so fix the typo.

Signed-off-by: Fabio Estevam <festevam@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/dsa/dsa.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/net/dsa/dsa.txt b/Documentation/devicetree/bindings/net/dsa/dsa.txt
index 3ceeb8de11963..35694c0c376b9 100644
--- a/Documentation/devicetree/bindings/net/dsa/dsa.txt
+++ b/Documentation/devicetree/bindings/net/dsa/dsa.txt
@@ -7,7 +7,7 @@ limitations.
 Current Binding
 ---------------
 
-Switches are true Linux devices and can be probes by any means. Once
+Switches are true Linux devices and can be probed by any means. Once
 probed, they register to the DSA framework, passing a node
 pointer. This node is expected to fulfil the following binding, and
 may contain additional properties as required by the device it is
-- 
GitLab


From 3fa528b7682e73e906266bcd43728b8f923bf9b2 Mon Sep 17 00:00:00 2001
From: Quentin Schulz <quentin.schulz@bootlin.com>
Date: Fri, 23 Nov 2018 19:01:51 +0100
Subject: [PATCH 1369/1890] net: phy: mscc: fix deadlock in
 vsc85xx_default_config

The vsc85xx_default_config function called in the vsc85xx_config_init
function which is used by VSC8530, VSC8531, VSC8540 and VSC8541 PHYs
mistakenly calls phy_read and phy_write in-between phy_select_page and
phy_restore_page.

phy_select_page and phy_restore_page actually take and release the MDIO
bus lock and phy_write and phy_read take and release the lock to write
or read to a PHY register.

Let's fix this deadlock by using phy_modify_paged which handles
correctly a read followed by a write in a non-standard page.

Fixes: 6a0bfbbe20b0 ("net: phy: mscc: migrate to phy_select/restore_page functions")
Signed-off-by: Quentin Schulz <quentin.schulz@bootlin.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mscc.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/drivers/net/phy/mscc.c b/drivers/net/phy/mscc.c
index a2e59f4f6f01f..7cae175177449 100644
--- a/drivers/net/phy/mscc.c
+++ b/drivers/net/phy/mscc.c
@@ -810,17 +810,13 @@ static int vsc85xx_default_config(struct phy_device *phydev)
 
 	phydev->mdix_ctrl = ETH_TP_MDI_AUTO;
 	mutex_lock(&phydev->lock);
-	rc = phy_select_page(phydev, MSCC_PHY_PAGE_EXTENDED_2);
-	if (rc < 0)
-		goto out_unlock;
 
-	reg_val = phy_read(phydev, MSCC_PHY_RGMII_CNTL);
-	reg_val &= ~(RGMII_RX_CLK_DELAY_MASK);
-	reg_val |= (RGMII_RX_CLK_DELAY_1_1_NS << RGMII_RX_CLK_DELAY_POS);
-	phy_write(phydev, MSCC_PHY_RGMII_CNTL, reg_val);
+	reg_val = RGMII_RX_CLK_DELAY_1_1_NS << RGMII_RX_CLK_DELAY_POS;
+
+	rc = phy_modify_paged(phydev, MSCC_PHY_PAGE_EXTENDED_2,
+			      MSCC_PHY_RGMII_CNTL, RGMII_RX_CLK_DELAY_MASK,
+			      reg_val);
 
-out_unlock:
-	rc = phy_restore_page(phydev, rc, rc > 0 ? 0 : rc);
 	mutex_unlock(&phydev->lock);
 
 	return rc;
-- 
GitLab


From 07093b76476903f820d83d56c3040e656fb4d9e3 Mon Sep 17 00:00:00 2001
From: Andreas Fiedler <andreas.fiedler@gmx.net>
Date: Sat, 24 Nov 2018 00:16:34 +0100
Subject: [PATCH 1370/1890] net: gemini: Fix copy/paste error

The TX stats should be started with the tx_stats_syncp,
there seems to be a copy/paste error in the driver.

Signed-off-by: Andreas Fiedler <andreas.fiedler@gmx.net>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cortina/gemini.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c
index ceec467f590d2..949103db8a8ad 100644
--- a/drivers/net/ethernet/cortina/gemini.c
+++ b/drivers/net/ethernet/cortina/gemini.c
@@ -660,7 +660,7 @@ static void gmac_clean_txq(struct net_device *netdev, struct gmac_txq *txq,
 
 			u64_stats_update_begin(&port->tx_stats_syncp);
 			port->tx_frag_stats[nfrags]++;
-			u64_stats_update_end(&port->ir_stats_syncp);
+			u64_stats_update_end(&port->tx_stats_syncp);
 		}
 	}
 
-- 
GitLab


From 7b69154171b407844c273ab4c10b5f0ddcd6aa29 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 23 Nov 2018 18:16:33 +0100
Subject: [PATCH 1371/1890] ALSA: wss: Fix invalid snd_free_pages() at error
 path

Some spurious calls of snd_free_pages() have been overlooked and
remain in the error paths of wss driver code.  Since runtime->dma_area
is managed by the PCM core helper, we shouldn't release manually.

Drop the superfluous calls.

Reviewed-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/isa/wss/wss_lib.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/sound/isa/wss/wss_lib.c b/sound/isa/wss/wss_lib.c
index 32453f81b95a8..3a50088375760 100644
--- a/sound/isa/wss/wss_lib.c
+++ b/sound/isa/wss/wss_lib.c
@@ -1531,7 +1531,6 @@ static int snd_wss_playback_open(struct snd_pcm_substream *substream)
 	if (err < 0) {
 		if (chip->release_dma)
 			chip->release_dma(chip, chip->dma_private_data, chip->dma1);
-		snd_free_pages(runtime->dma_area, runtime->dma_bytes);
 		return err;
 	}
 	chip->playback_substream = substream;
@@ -1572,7 +1571,6 @@ static int snd_wss_capture_open(struct snd_pcm_substream *substream)
 	if (err < 0) {
 		if (chip->release_dma)
 			chip->release_dma(chip, chip->dma_private_data, chip->dma2);
-		snd_free_pages(runtime->dma_area, runtime->dma_bytes);
 		return err;
 	}
 	chip->capture_substream = substream;
-- 
GitLab


From 9a20332ab373b1f8f947e0a9c923652b32dab031 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 23 Nov 2018 18:18:30 +0100
Subject: [PATCH 1372/1890] ALSA: sparc: Fix invalid snd_free_pages() at error
 path

Some spurious calls of snd_free_pages() have been overlooked and
remain in the error paths of sparc cs4231 driver code.  Since
runtime->dma_area is managed by the PCM core helper, we shouldn't
release manually.

Drop the superfluous calls.

Reviewed-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/sparc/cs4231.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/sound/sparc/cs4231.c b/sound/sparc/cs4231.c
index e73c962590eb6..079063d8038d9 100644
--- a/sound/sparc/cs4231.c
+++ b/sound/sparc/cs4231.c
@@ -1146,10 +1146,8 @@ static int snd_cs4231_playback_open(struct snd_pcm_substream *substream)
 	runtime->hw = snd_cs4231_playback;
 
 	err = snd_cs4231_open(chip, CS4231_MODE_PLAY);
-	if (err < 0) {
-		snd_free_pages(runtime->dma_area, runtime->dma_bytes);
+	if (err < 0)
 		return err;
-	}
 	chip->playback_substream = substream;
 	chip->p_periods_sent = 0;
 	snd_pcm_set_sync(substream);
@@ -1167,10 +1165,8 @@ static int snd_cs4231_capture_open(struct snd_pcm_substream *substream)
 	runtime->hw = snd_cs4231_capture;
 
 	err = snd_cs4231_open(chip, CS4231_MODE_RECORD);
-	if (err < 0) {
-		snd_free_pages(runtime->dma_area, runtime->dma_bytes);
+	if (err < 0)
 		return err;
-	}
 	chip->capture_substream = substream;
 	chip->c_periods_sent = 0;
 	snd_pcm_set_sync(substream);
-- 
GitLab


From e1a7bfe3807974e66f971f2589d4e0197ec0fced Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 22 Nov 2018 14:36:17 +0100
Subject: [PATCH 1373/1890] ALSA: control: Fix race between adding and removing
 a user element

The procedure for adding a user control element has some window opened
for race against the concurrent removal of a user element.  This was
caught by syzkaller, hitting a KASAN use-after-free error.

This patch addresses the bug by wrapping the whole procedure to add a
user control element with the card->controls_rwsem, instead of only
around the increment of card->user_ctl_count.

This required a slight code refactoring, too.  The function
snd_ctl_add() is split to two parts: a core function to add the
control element and a part calling it.  The former is called from the
function for adding a user control element inside the controls_rwsem.

One change to be noted is that snd_ctl_notify() for adding a control
element gets called inside the controls_rwsem as well while it was
called outside the rwsem.  But this should be OK, as snd_ctl_notify()
takes another (finer) rwlock instead of rwsem, and the call of
snd_ctl_notify() inside rwsem is already done in another code path.

Reported-by: syzbot+dc09047bce3820621ba2@syzkaller.appspotmail.com
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/core/control.c | 80 +++++++++++++++++++++++++-------------------
 1 file changed, 45 insertions(+), 35 deletions(-)

diff --git a/sound/core/control.c b/sound/core/control.c
index 9aa15bfc79369..649d3217590ed 100644
--- a/sound/core/control.c
+++ b/sound/core/control.c
@@ -348,6 +348,40 @@ static int snd_ctl_find_hole(struct snd_card *card, unsigned int count)
 	return 0;
 }
 
+/* add a new kcontrol object; call with card->controls_rwsem locked */
+static int __snd_ctl_add(struct snd_card *card, struct snd_kcontrol *kcontrol)
+{
+	struct snd_ctl_elem_id id;
+	unsigned int idx;
+	unsigned int count;
+
+	id = kcontrol->id;
+	if (id.index > UINT_MAX - kcontrol->count)
+		return -EINVAL;
+
+	if (snd_ctl_find_id(card, &id)) {
+		dev_err(card->dev,
+			"control %i:%i:%i:%s:%i is already present\n",
+			id.iface, id.device, id.subdevice, id.name, id.index);
+		return -EBUSY;
+	}
+
+	if (snd_ctl_find_hole(card, kcontrol->count) < 0)
+		return -ENOMEM;
+
+	list_add_tail(&kcontrol->list, &card->controls);
+	card->controls_count += kcontrol->count;
+	kcontrol->id.numid = card->last_numid + 1;
+	card->last_numid += kcontrol->count;
+
+	id = kcontrol->id;
+	count = kcontrol->count;
+	for (idx = 0; idx < count; idx++, id.index++, id.numid++)
+		snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_ADD, &id);
+
+	return 0;
+}
+
 /**
  * snd_ctl_add - add the control instance to the card
  * @card: the card instance
@@ -364,45 +398,18 @@ static int snd_ctl_find_hole(struct snd_card *card, unsigned int count)
  */
 int snd_ctl_add(struct snd_card *card, struct snd_kcontrol *kcontrol)
 {
-	struct snd_ctl_elem_id id;
-	unsigned int idx;
-	unsigned int count;
 	int err = -EINVAL;
 
 	if (! kcontrol)
 		return err;
 	if (snd_BUG_ON(!card || !kcontrol->info))
 		goto error;
-	id = kcontrol->id;
-	if (id.index > UINT_MAX - kcontrol->count)
-		goto error;
 
 	down_write(&card->controls_rwsem);
-	if (snd_ctl_find_id(card, &id)) {
-		up_write(&card->controls_rwsem);
-		dev_err(card->dev, "control %i:%i:%i:%s:%i is already present\n",
-					id.iface,
-					id.device,
-					id.subdevice,
-					id.name,
-					id.index);
-		err = -EBUSY;
-		goto error;
-	}
-	if (snd_ctl_find_hole(card, kcontrol->count) < 0) {
-		up_write(&card->controls_rwsem);
-		err = -ENOMEM;
-		goto error;
-	}
-	list_add_tail(&kcontrol->list, &card->controls);
-	card->controls_count += kcontrol->count;
-	kcontrol->id.numid = card->last_numid + 1;
-	card->last_numid += kcontrol->count;
-	id = kcontrol->id;
-	count = kcontrol->count;
+	err = __snd_ctl_add(card, kcontrol);
 	up_write(&card->controls_rwsem);
-	for (idx = 0; idx < count; idx++, id.index++, id.numid++)
-		snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_ADD, &id);
+	if (err < 0)
+		goto error;
 	return 0;
 
  error:
@@ -1361,9 +1368,12 @@ static int snd_ctl_elem_add(struct snd_ctl_file *file,
 		kctl->tlv.c = snd_ctl_elem_user_tlv;
 
 	/* This function manage to free the instance on failure. */
-	err = snd_ctl_add(card, kctl);
-	if (err < 0)
-		return err;
+	down_write(&card->controls_rwsem);
+	err = __snd_ctl_add(card, kctl);
+	if (err < 0) {
+		snd_ctl_free_one(kctl);
+		goto unlock;
+	}
 	offset = snd_ctl_get_ioff(kctl, &info->id);
 	snd_ctl_build_ioff(&info->id, kctl, offset);
 	/*
@@ -1374,10 +1384,10 @@ static int snd_ctl_elem_add(struct snd_ctl_file *file,
 	 * which locks the element.
 	 */
 
-	down_write(&card->controls_rwsem);
 	card->user_ctl_count++;
-	up_write(&card->controls_rwsem);
 
+ unlock:
+	up_write(&card->controls_rwsem);
 	return 0;
 }
 
-- 
GitLab


From 9efdda4e3abed13f0903b7b6e4d4c2102019440a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 24 Nov 2018 09:12:24 -0800
Subject: [PATCH 1374/1890] tcp: address problems caused by EDT misshaps

When a qdisc setup including pacing FQ is dismantled and recreated,
some TCP packets are sent earlier than instructed by TCP stack.

TCP can be fooled when ACK comes back, because the following
operation can return a negative value.

    tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;

Some paths in TCP stack were not dealing properly with this,
this patch addresses four of them.

Fixes: ab408b6dc744 ("tcp: switch tcp and sch_fq to new earliest departure time model")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 16 ++++++++++------
 net/ipv4/tcp_timer.c | 10 ++++++----
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 1e37c13881893..a9d9555a973fe 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -579,10 +579,12 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
 		u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
 		u32 delta_us;
 
-		if (!delta)
-			delta = 1;
-		delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
-		tcp_rcv_rtt_update(tp, delta_us, 0);
+		if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
+			if (!delta)
+				delta = 1;
+			delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
+			tcp_rcv_rtt_update(tp, delta_us, 0);
+		}
 	}
 }
 
@@ -2910,9 +2912,11 @@ static bool tcp_ack_update_rtt(struct sock *sk, const int flag,
 	if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
 	    flag & FLAG_ACKED) {
 		u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
-		u32 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
 
-		seq_rtt_us = ca_rtt_us = delta_us;
+		if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
+			seq_rtt_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
+			ca_rtt_us = seq_rtt_us;
+		}
 	}
 	rs->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet (or -1) */
 	if (seq_rtt_us < 0)
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 5f8b6d3cd855d..091c53925e4da 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -40,15 +40,17 @@ static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	u32 elapsed, start_ts;
+	s32 remaining;
 
 	start_ts = tcp_retransmit_stamp(sk);
 	if (!icsk->icsk_user_timeout || !start_ts)
 		return icsk->icsk_rto;
 	elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts;
-	if (elapsed >= icsk->icsk_user_timeout)
+	remaining = icsk->icsk_user_timeout - elapsed;
+	if (remaining <= 0)
 		return 1; /* user timeout has passed; fire ASAP */
-	else
-		return min_t(u32, icsk->icsk_rto, msecs_to_jiffies(icsk->icsk_user_timeout - elapsed));
+
+	return min_t(u32, icsk->icsk_rto, msecs_to_jiffies(remaining));
 }
 
 /**
@@ -209,7 +211,7 @@ static bool retransmits_timed_out(struct sock *sk,
 				(boundary - linear_backoff_thresh) * TCP_RTO_MAX;
 		timeout = jiffies_to_msecs(timeout);
 	}
-	return (tcp_time_stamp(tcp_sk(sk)) - start_ts) >= timeout;
+	return (s32)(tcp_time_stamp(tcp_sk(sk)) - start_ts - timeout) >= 0;
 }
 
 /* A write timeout has occurred. Process the after effects. */
-- 
GitLab


From aba36930a35e7f1fe1319b203f25c05d6c119936 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Sat, 24 Nov 2018 14:21:16 -0500
Subject: [PATCH 1375/1890] net: always initialize pagedlen

In ip packet generation, pagedlen is initialized for each skb at the
start of the loop in __ip(6)_append_data, before label alloc_new_skb.

Depending on compiler options, code can be generated that jumps to
this label, triggering use of an an uninitialized variable.

In practice, at -O2, the generated code moves the initialization below
the label. But the code should not rely on that for correctness.

Fixes: 15e36f5b8e98 ("udp: paged allocation with gso")
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_output.c  | 3 ++-
 net/ipv6/ip6_output.c | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index c09219e7f2304..5dbec21856f4c 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -939,7 +939,7 @@ static int __ip_append_data(struct sock *sk,
 			unsigned int fraglen;
 			unsigned int fraggap;
 			unsigned int alloclen;
-			unsigned int pagedlen = 0;
+			unsigned int pagedlen;
 			struct sk_buff *skb_prev;
 alloc_new_skb:
 			skb_prev = skb;
@@ -956,6 +956,7 @@ static int __ip_append_data(struct sock *sk,
 			if (datalen > mtu - fragheaderlen)
 				datalen = maxfraglen - fragheaderlen;
 			fraglen = datalen + fragheaderlen;
+			pagedlen = 0;
 
 			if ((flags & MSG_MORE) &&
 			    !(rt->dst.dev->features&NETIF_F_SG))
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 89e0d5118afe6..827a3f5ff3bbd 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1354,7 +1354,7 @@ static int __ip6_append_data(struct sock *sk,
 			unsigned int fraglen;
 			unsigned int fraggap;
 			unsigned int alloclen;
-			unsigned int pagedlen = 0;
+			unsigned int pagedlen;
 alloc_new_skb:
 			/* There's no room in the current skb */
 			if (skb)
@@ -1378,6 +1378,7 @@ static int __ip6_append_data(struct sock *sk,
 			if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
 				datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
 			fraglen = datalen + fragheaderlen;
+			pagedlen = 0;
 
 			if ((flags & MSG_MORE) &&
 			    !(rt->dst.dev->features&NETIF_F_SG))
-- 
GitLab


From 4e962ff6e34f44c400c548da0c1e2393053a691e Mon Sep 17 00:00:00 2001
From: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Date: Sun, 25 Nov 2018 17:34:05 +0100
Subject: [PATCH 1376/1890] MAINTAINERS: change Sparse's maintainer

I'm taking over the maintainance of Sparse so add myself as
maintainer and move Christopher's info to CREDITS.

Signed-off-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 CREDITS     | 4 ++++
 MAINTAINERS | 3 +--
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/CREDITS b/CREDITS
index 84cbec4c62115..c9273393fe14c 100644
--- a/CREDITS
+++ b/CREDITS
@@ -2204,6 +2204,10 @@ S: Post Office Box 371
 S: North Little Rock, Arkansas 72115
 S: USA
 
+N: Christopher Li
+E: sparse@chrisli.org
+D: Sparse maintainer 2009 - 2018
+
 N: Stephan Linz
 E: linz@mazet.de
 E: Stephan.Linz@gmx.de
diff --git a/MAINTAINERS b/MAINTAINERS
index 03c46f4831433..380e43f585d34 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13996,11 +13996,10 @@ F:	drivers/tty/serial/sunzilog.h
 F:	drivers/tty/vcc.c
 
 SPARSE CHECKER
-M:	"Christopher Li" <sparse@chrisli.org>
+M:	"Luc Van Oostenryck" <luc.vanoostenryck@gmail.com>
 L:	linux-sparse@vger.kernel.org
 W:	https://sparse.wiki.kernel.org/
 T:	git git://git.kernel.org/pub/scm/devel/sparse/sparse.git
-T:	git git://git.kernel.org/pub/scm/devel/sparse/chrisl/sparse.git
 S:	Maintained
 F:	include/linux/compiler.h
 
-- 
GitLab


From 78e1f386170798159a81fdbc5e131836fd808048 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 25 Nov 2018 16:24:16 -0500
Subject: [PATCH 1377/1890] iov_iter: teach csum_and_copy_to_iter() to handle
 pipe-backed ones

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 lib/iov_iter.c | 38 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 37 insertions(+), 1 deletion(-)

diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 7ebccb5c16377..54c248526b55f 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -560,6 +560,38 @@ static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
 	return bytes;
 }
 
+static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes,
+				__wsum *csum, struct iov_iter *i)
+{
+	struct pipe_inode_info *pipe = i->pipe;
+	size_t n, r;
+	size_t off = 0;
+	__wsum sum = *csum, next;
+	int idx;
+
+	if (!sanity(i))
+		return 0;
+
+	bytes = n = push_pipe(i, bytes, &idx, &r);
+	if (unlikely(!n))
+		return 0;
+	for ( ; n; idx = next_idx(idx, pipe), r = 0) {
+		size_t chunk = min_t(size_t, n, PAGE_SIZE - r);
+		char *p = kmap_atomic(pipe->bufs[idx].page);
+		next = csum_partial_copy_nocheck(addr, p + r, chunk, 0);
+		sum = csum_block_add(sum, next, off);
+		kunmap_atomic(p);
+		i->idx = idx;
+		i->iov_offset = r + chunk;
+		n -= chunk;
+		off += chunk;
+		addr += chunk;
+	}
+	i->count -= bytes;
+	*csum = sum;
+	return bytes;
+}
+
 size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
 {
 	const char *from = addr;
@@ -1438,8 +1470,12 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum,
 	const char *from = addr;
 	__wsum sum, next;
 	size_t off = 0;
+
+	if (unlikely(iov_iter_is_pipe(i)))
+		return csum_and_copy_to_pipe_iter(addr, bytes, csum, i);
+
 	sum = *csum;
-	if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
+	if (unlikely(iov_iter_is_discard(i))) {
 		WARN_ON(1);	/* for now */
 		return 0;
 	}
-- 
GitLab


From 2e6e902d185027f8e3cb8b7305238f7e35d6a436 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 25 Nov 2018 14:19:31 -0800
Subject: [PATCH 1378/1890] Linux 4.20-rc4

---
 Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index ddbf627cad8f5..0ce4e29ee342f 100644
--- a/Makefile
+++ b/Makefile
@@ -2,8 +2,8 @@
 VERSION = 4
 PATCHLEVEL = 20
 SUBLEVEL = 0
-EXTRAVERSION = -rc3
-NAME = "People's Front"
+EXTRAVERSION = -rc4
+NAME = Shy Crocodile
 
 # *DOCUMENTATION*
 # To see a list of typical targets execute "make help"
-- 
GitLab


From 64999fa7aa2c076ec6d05aee481f11f5296ceb8c Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 14 Aug 2018 00:37:18 +0200
Subject: [PATCH 1379/1890] fsi: master-ast-cf: select GENERIC_ALLOCATOR

In randconfig builds without CONFIG_GENERIC_ALLOCATOR, this driver
fails to link:

ERROR: "gen_pool_alloc_algo" [drivers/fsi/fsi-master-ast-cf.ko] undefined!
ERROR: "gen_pool_fixed_alloc" [drivers/fsi/fsi-master-ast-cf.ko] undefined!
ERROR: "of_gen_pool_get" [drivers/fsi/fsi-master-ast-cf.ko] undefined!
ERROR: "gen_pool_free" [drivers/fsi/fsi-master-ast-cf.ko] undefined!

Select the dependency as all other users do.

Fixes: 6a794a27daca ("fsi: master-ast-cf: Add new FSI master using Aspeed ColdFire")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 drivers/fsi/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/fsi/Kconfig b/drivers/fsi/Kconfig
index af3a20dd5aa4a..99c99a5d57fe2 100644
--- a/drivers/fsi/Kconfig
+++ b/drivers/fsi/Kconfig
@@ -46,6 +46,7 @@ config FSI_MASTER_AST_CF
 	tristate "FSI master based on Aspeed ColdFire coprocessor"
 	depends on GPIOLIB
 	depends on GPIO_ASPEED
+	select GENERIC_ALLOCATOR
 	---help---
 	This option enables a FSI master using the AST2400 and AST2500 GPIO
 	lines driven by the internal ColdFire coprocessor. This requires
-- 
GitLab


From d20810530b7109a95abef5130e6dcec09c5180d7 Mon Sep 17 00:00:00 2001
From: Brajeswar Ghosh <brajeswar.linux@gmail.com>
Date: Fri, 16 Nov 2018 16:17:03 +0530
Subject: [PATCH 1380/1890] fsi: fsi-scom.c: Remove duplicate header

Remove linux/cdev.h which is included more than once

Signed-off-by: Brajeswar Ghosh <brajeswar.linux@gmail.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 drivers/fsi/fsi-scom.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/fsi/fsi-scom.c b/drivers/fsi/fsi-scom.c
index df94021dd9d12..81dc01ac2351f 100644
--- a/drivers/fsi/fsi-scom.c
+++ b/drivers/fsi/fsi-scom.c
@@ -20,7 +20,6 @@
 #include <linux/fs.h>
 #include <linux/uaccess.h>
 #include <linux/slab.h>
-#include <linux/cdev.h>
 #include <linux/list.h>
 
 #include <uapi/linux/fsi.h>
-- 
GitLab


From 38317f5c0f2faae5110854f36edad810f841d62f Mon Sep 17 00:00:00 2001
From: Felipe Balbi <felipe.balbi@linux.intel.com>
Date: Mon, 19 Nov 2018 08:34:04 +0200
Subject: [PATCH 1381/1890] Revert "usb: dwc3: gadget: skip Set/Clear Halt when
 invalid"

This reverts commit ffb80fc672c3a7b6afd0cefcb1524fb99917b2f3.

Turns out that commit is wrong. Host controllers are allowed to use
Clear Feature HALT as means to sync data toggle between host and
periperal.

Cc: <stable@vger.kernel.org>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/dwc3/gadget.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 9faad896b3a13..9f92ee03dde70 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -1470,9 +1470,6 @@ int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol)
 		unsigned transfer_in_flight;
 		unsigned started;
 
-		if (dep->flags & DWC3_EP_STALL)
-			return 0;
-
 		if (dep->number > 1)
 			trb = dwc3_ep_prev_trb(dep, dep->trb_enqueue);
 		else
@@ -1494,8 +1491,6 @@ int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol)
 		else
 			dep->flags |= DWC3_EP_STALL;
 	} else {
-		if (!(dep->flags & DWC3_EP_STALL))
-			return 0;
 
 		ret = dwc3_send_clear_stall_ep_cmd(dep);
 		if (ret)
-- 
GitLab


From a84a1bcc992f0545a51d2e120b8ca2ef20e2ea97 Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng@canonical.com>
Date: Fri, 23 Nov 2018 08:42:19 +0000
Subject: [PATCH 1382/1890] USB: usb-storage: Add new IDs to ums-realtek

There are two new Realtek card readers require ums-realtek to work
correctly.

Add the new IDs to support them.

Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/storage/unusual_realtek.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/usb/storage/unusual_realtek.h b/drivers/usb/storage/unusual_realtek.h
index d17cd95b55bbd..6b2140f966ef8 100644
--- a/drivers/usb/storage/unusual_realtek.h
+++ b/drivers/usb/storage/unusual_realtek.h
@@ -27,4 +27,14 @@ UNUSUAL_DEV(0x0bda, 0x0159, 0x0000, 0x9999,
 		"USB Card Reader",
 		USB_SC_DEVICE, USB_PR_DEVICE, init_realtek_cr, 0),
 
+UNUSUAL_DEV(0x0bda, 0x0177, 0x0000, 0x9999,
+		"Realtek",
+		"USB Card Reader",
+		USB_SC_DEVICE, USB_PR_DEVICE, init_realtek_cr, 0),
+
+UNUSUAL_DEV(0x0bda, 0x0184, 0x0000, 0x9999,
+		"Realtek",
+		"USB Card Reader",
+		USB_SC_DEVICE, USB_PR_DEVICE, init_realtek_cr, 0),
+
 #endif  /* defined(CONFIG_USB_STORAGE_REALTEK) || ... */
-- 
GitLab


From effd14f66cc1ef6701a19c5a56e39c35f4d395a5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michael=20Niew=C3=B6hner?= <linux@mniewoehner.de>
Date: Sun, 25 Nov 2018 17:57:33 +0100
Subject: [PATCH 1383/1890] usb: core: quirks: add RESET_RESUME quirk for
 Cherry G230 Stream series
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cherry G230 Stream 2.0 (G85-231) and 3.0 (G85-232) need this quirk to
function correctly. This fixes a but where double pressing numlock locks
up the device completely with need to replug the keyboard.

Signed-off-by: Michael NiewÃ¶hner <linux@mniewoehner.de>
Tested-by: Michael NiewÃ¶hner <linux@mniewoehner.de>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/quirks.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index f9ff03e6af93e..0690fcff0ea23 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -209,6 +209,9 @@ static const struct usb_device_id usb_quirk_list[] = {
 	/* Microsoft LifeCam-VX700 v2.0 */
 	{ USB_DEVICE(0x045e, 0x0770), .driver_info = USB_QUIRK_RESET_RESUME },
 
+	/* Cherry Stream G230 2.0 (G85-231) and 3.0 (G85-232) */
+	{ USB_DEVICE(0x046a, 0x0023), .driver_info = USB_QUIRK_RESET_RESUME },
+
 	/* Logitech HD Pro Webcams C920, C920-C, C925e and C930e */
 	{ USB_DEVICE(0x046d, 0x082d), .driver_info = USB_QUIRK_DELAY_INIT },
 	{ USB_DEVICE(0x046d, 0x0841), .driver_info = USB_QUIRK_DELAY_INIT },
-- 
GitLab


From c4cfcf6f4297c9256b53790bacbbbd6901fef468 Mon Sep 17 00:00:00 2001
From: Hui Wang <hui.wang@canonical.com>
Date: Mon, 26 Nov 2018 14:17:16 +0800
Subject: [PATCH 1384/1890] ALSA: hda/realtek - fix the pop noise on headphone
 for lenovo laptops

We have several Lenovo laptops with the codec alc285, when playing
sound via headphone, we can hear click/pop noise in the headphone,
if we let the headphone share the DAC of NID 0x2 with the speaker,
the noise disappears.

The Lenovo laptops here include P52, P72, X1 yoda2 and X1 carbon.

I have tried to set preferred_dacs and override_conn, but neither of
them worked. Thanks for Kailang, he told me to invalidate the NID 0x3
through override_wcaps.

BugLink: https://bugs.launchpad.net/bugs/1805079
Cc: <stable@vger.kernel.org>
Signed-off-by: Kailang Yang <kailang@realtek.com>
Signed-off-by: Hui Wang <hui.wang@canonical.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 1118fd1bbf1a2..e66da22272fdf 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -5358,6 +5358,16 @@ static void alc274_fixup_bind_dacs(struct hda_codec *codec,
 	spec->gen.preferred_dacs = preferred_pairs;
 }
 
+/* The DAC of NID 0x3 will introduce click/pop noise on headphones, so invalidate it */
+static void alc285_fixup_invalidate_dacs(struct hda_codec *codec,
+			      const struct hda_fixup *fix, int action)
+{
+	if (action != HDA_FIXUP_ACT_PRE_PROBE)
+		return;
+
+	snd_hda_override_wcaps(codec, 0x03, 0);
+}
+
 /* for hda_fixup_thinkpad_acpi() */
 #include "thinkpad_helper.c"
 
@@ -5495,6 +5505,7 @@ enum {
 	ALC255_FIXUP_DELL_HEADSET_MIC,
 	ALC295_FIXUP_HP_X360,
 	ALC221_FIXUP_HP_HEADSET_MIC,
+	ALC285_FIXUP_LENOVO_HEADPHONE_NOISE,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -6362,6 +6373,10 @@ static const struct hda_fixup alc269_fixups[] = {
 		.chained = true,
 		.chain_id = ALC269_FIXUP_HEADSET_MIC
 	},
+	[ALC285_FIXUP_LENOVO_HEADPHONE_NOISE] = {
+		.type = HDA_FIXUP_FUNC,
+		.v.func = alc285_fixup_invalidate_dacs,
+	},
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -7035,6 +7050,11 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
 		{0x12, 0x90a60130},
 		{0x19, 0x03a11020},
 		{0x21, 0x0321101f}),
+	SND_HDA_PIN_QUIRK(0x10ec0285, 0x17aa, "Lenovo", ALC285_FIXUP_LENOVO_HEADPHONE_NOISE,
+		{0x12, 0x90a60130},
+		{0x14, 0x90170110},
+		{0x19, 0x04a11040},
+		{0x21, 0x04211020}),
 	SND_HDA_PIN_QUIRK(0x10ec0288, 0x1028, "Dell", ALC288_FIXUP_DELL1_MIC_NO_PRESENCE,
 		{0x12, 0x90a60120},
 		{0x14, 0x90170110},
-- 
GitLab


From 0bc3544a010c1e460c99fa052991789f113d860e Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Fri, 23 Nov 2018 13:38:17 +0200
Subject: [PATCH 1385/1890] drm: rcar-du: Fix DU3 start/stop on M3-N

Group start/stop is controlled by the DRES and DEN bits of DSYSR0 for
the first group and DSYSR2 for the second group. On most DU instances,
this maps to the first CRTC of the group. On M3-N, however, DU2 doesn't
exist, but DSYSR2 does. There is no CRTC object there that maps to the
correct DSYSR register.

Commit 9144adc5e5a9 ("drm: rcar-du: Cache DSYSR value to ensure known
initial value") switched group start/stop from using group read/write
access to DSYSR to a CRTC-based API to cache the DSYSR value. While
doing so, it introduced a regression on M3-N by accessing DSYSR3 instead
of DSYSR2 to start/stop the second group.

To fix this, access the DSYSR register directly through group read/write
if the SoC is missing the first DU channel of the group. Keep using the
rcar_du_crtc_dsysr_clr_set() function otherwise, to retain the DSYSR
caching feature.

Fixes: 9144adc5e5a9 ("drm: rcar-du: Cache DSYSR value to ensure known initial value")
Reported-by: Hoan Nguyen An <na-hoan@jinso.co.jp>
Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Acked-by: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
Tested-by: Simon Horman <horms+renesas@verge.net.au>
---
 drivers/gpu/drm/rcar-du/rcar_du_group.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/rcar-du/rcar_du_group.c b/drivers/gpu/drm/rcar-du/rcar_du_group.c
index d85f0a1c15817..cebf313c6e1f9 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_group.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_group.c
@@ -202,10 +202,25 @@ void rcar_du_group_put(struct rcar_du_group *rgrp)
 
 static void __rcar_du_group_start_stop(struct rcar_du_group *rgrp, bool start)
 {
-	struct rcar_du_crtc *rcrtc = &rgrp->dev->crtcs[rgrp->index * 2];
+	struct rcar_du_device *rcdu = rgrp->dev;
+
+	/*
+	 * Group start/stop is controlled by the DRES and DEN bits of DSYSR0
+	 * for the first group and DSYSR2 for the second group. On most DU
+	 * instances, this maps to the first CRTC of the group, and we can just
+	 * use rcar_du_crtc_dsysr_clr_set() to access the correct DSYSR. On
+	 * M3-N, however, DU2 doesn't exist, but DSYSR2 does. We thus need to
+	 * access the register directly using group read/write.
+	 */
+	if (rcdu->info->channels_mask & BIT(rgrp->index * 2)) {
+		struct rcar_du_crtc *rcrtc = &rgrp->dev->crtcs[rgrp->index * 2];
 
-	rcar_du_crtc_dsysr_clr_set(rcrtc, DSYSR_DRES | DSYSR_DEN,
-				   start ? DSYSR_DEN : DSYSR_DRES);
+		rcar_du_crtc_dsysr_clr_set(rcrtc, DSYSR_DRES | DSYSR_DEN,
+					   start ? DSYSR_DEN : DSYSR_DRES);
+	} else {
+		rcar_du_group_write(rgrp, DSYSR,
+				    start ? DSYSR_DEN : DSYSR_DRES);
+	}
 }
 
 void rcar_du_group_start_stop(struct rcar_du_group *rgrp, bool start)
-- 
GitLab


From 2a31e4bd9ad255ee40809b5c798c4b1c2b09703b Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Thu, 15 Nov 2018 15:14:30 +0800
Subject: [PATCH 1386/1890] ipvs: call ip_vs_dst_notifier earlier than
 ipv6_dev_notf

ip_vs_dst_event is supposed to clean up all dst used in ipvs'
destinations when a net dev is going down. But it works only
when the dst's dev is the same as the dev from the event.

Now with the same priority but late registration,
ip_vs_dst_notifier is always called later than ipv6_dev_notf
where the dst's dev is set to lo for NETDEV_DOWN event.

As the dst's dev lo is not the same as the dev from the event
in ip_vs_dst_event, ip_vs_dst_notifier doesn't actually work.
Also as these dst have to wait for dest_trash_timer to clean
them up. It would cause some non-permanent kernel warnings:

  unregister_netdevice: waiting for br0 to become free. Usage count = 3

To fix it, call ip_vs_dst_notifier earlier than ipv6_dev_notf
by increasing its priority to ADDRCONF_NOTIFY_PRIORITY + 5.

Note that for ipv4 route fib_netdev_notifier doesn't set dst's
dev to lo in NETDEV_DOWN event, so this fix is only needed when
IP_VS_IPV6 is defined.

Fixes: 7a4f0761fce3 ("IPVS: init and cleanup restructuring")
Reported-by: Li Shuang <shuali@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Acked-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/ipvs/ip_vs_ctl.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 83395bf6dc35e..432141f04af3d 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -3980,6 +3980,9 @@ static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs)
 
 static struct notifier_block ip_vs_dst_notifier = {
 	.notifier_call = ip_vs_dst_event,
+#ifdef CONFIG_IP_VS_IPV6
+	.priority = ADDRCONF_NOTIFY_PRIORITY + 5,
+#endif
 };
 
 int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
-- 
GitLab


From 89259088c1b7fecb43e8e245dc931909132a4e03 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sat, 17 Nov 2018 11:32:29 +0100
Subject: [PATCH 1387/1890] netfilter: nfnetlink_cttimeout: fetch timeouts for
 udplite and gre, too

syzbot was able to trigger the WARN in cttimeout_default_get() by
passing UDPLITE as l4protocol.  Alias UDPLITE to UDP, both use
same timeout values.

Furthermore, also fetch GRE timeouts.  GRE is a bit more complicated,
as it still can be a module and its netns_proto_gre struct layout isn't
visible outside of the gre module. Can't move timeouts around, it
appears conntrack sysctl unregister assumes net_generic() returns
nf_proto_net, so we get crash. Expose layout of netns_proto_gre instead.

A followup nf-next patch could make gre tracker be built-in as well
if needed, its not that large.

Last, make the WARN() mention the missing protocol value in case
anything else is missing.

Reported-by: syzbot+2fae8fa157dd92618cae@syzkaller.appspotmail.com
Fixes: 8866df9264a3 ("netfilter: nfnetlink_cttimeout: pass default timeout policy to obj_to_nlattr")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nf_conntrack_proto_gre.h | 13 +++++++++++++
 net/netfilter/nf_conntrack_proto_gre.c           | 14 ++------------
 net/netfilter/nfnetlink_cttimeout.c              | 15 +++++++++++++--
 3 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h
index b8d95564bd534..14edb795ab430 100644
--- a/include/linux/netfilter/nf_conntrack_proto_gre.h
+++ b/include/linux/netfilter/nf_conntrack_proto_gre.h
@@ -21,6 +21,19 @@ struct nf_ct_gre_keymap {
 	struct nf_conntrack_tuple tuple;
 };
 
+enum grep_conntrack {
+	GRE_CT_UNREPLIED,
+	GRE_CT_REPLIED,
+	GRE_CT_MAX
+};
+
+struct netns_proto_gre {
+	struct nf_proto_net	nf;
+	rwlock_t		keymap_lock;
+	struct list_head	keymap_list;
+	unsigned int		gre_timeouts[GRE_CT_MAX];
+};
+
 /* add new tuple->key_reply pair to keymap */
 int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir,
 			 struct nf_conntrack_tuple *t);
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 9b48dc8b4b885..2a5e56c6d8d9f 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -43,24 +43,12 @@
 #include <linux/netfilter/nf_conntrack_proto_gre.h>
 #include <linux/netfilter/nf_conntrack_pptp.h>
 
-enum grep_conntrack {
-	GRE_CT_UNREPLIED,
-	GRE_CT_REPLIED,
-	GRE_CT_MAX
-};
-
 static const unsigned int gre_timeouts[GRE_CT_MAX] = {
 	[GRE_CT_UNREPLIED]	= 30*HZ,
 	[GRE_CT_REPLIED]	= 180*HZ,
 };
 
 static unsigned int proto_gre_net_id __read_mostly;
-struct netns_proto_gre {
-	struct nf_proto_net	nf;
-	rwlock_t		keymap_lock;
-	struct list_head	keymap_list;
-	unsigned int		gre_timeouts[GRE_CT_MAX];
-};
 
 static inline struct netns_proto_gre *gre_pernet(struct net *net)
 {
@@ -402,6 +390,8 @@ static int __init nf_ct_proto_gre_init(void)
 {
 	int ret;
 
+	BUILD_BUG_ON(offsetof(struct netns_proto_gre, nf) != 0);
+
 	ret = register_pernet_subsys(&proto_gre_net_ops);
 	if (ret < 0)
 		goto out_pernet;
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index a518eb162344e..109b0d27345ac 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -455,7 +455,8 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
 	case IPPROTO_TCP:
 		timeouts = nf_tcp_pernet(net)->timeouts;
 		break;
-	case IPPROTO_UDP:
+	case IPPROTO_UDP: /* fallthrough */
+	case IPPROTO_UDPLITE:
 		timeouts = nf_udp_pernet(net)->timeouts;
 		break;
 	case IPPROTO_DCCP:
@@ -469,13 +470,23 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
 	case IPPROTO_SCTP:
 #ifdef CONFIG_NF_CT_PROTO_SCTP
 		timeouts = nf_sctp_pernet(net)->timeouts;
+#endif
+		break;
+	case IPPROTO_GRE:
+#ifdef CONFIG_NF_CT_PROTO_GRE
+		if (l4proto->net_id) {
+			struct netns_proto_gre *net_gre;
+
+			net_gre = net_generic(net, *l4proto->net_id);
+			timeouts = net_gre->gre_timeouts;
+		}
 #endif
 		break;
 	case 255:
 		timeouts = &nf_generic_pernet(net)->timeout;
 		break;
 	default:
-		WARN_ON_ONCE(1);
+		WARN_ONCE(1, "Missing timeouts for proto %d", l4proto->l4proto);
 		break;
 	}
 
-- 
GitLab


From ce68cc6fad893eb33b69ef7ec186233a51696236 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Mon, 12 Nov 2018 09:28:08 +0800
Subject: [PATCH 1388/1890] arm64: dts: mt7622: Drop the general purpose timer
 node

MediaTeks general purpose timer register into system in early phase
during kernel boot, but the clock sources aren't probed at this point.

The system has the ARM architecture timer, so we don't need the GPT
timer from mediatek. Drop the DT node for it.

Fixes: 9cc7f0de9e67 ("arm64: dts: mt7622: add timer, CCI-400 and PMU nodes")
Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
[mb: fix commit message]
Signed-off-by: Matthias Brugger <matthias.bgg@gmail.com>
---
 arch/arm64/boot/dts/mediatek/mt7622.dtsi | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/arch/arm64/boot/dts/mediatek/mt7622.dtsi b/arch/arm64/boot/dts/mediatek/mt7622.dtsi
index fe0c875f1d951..14a1028ca3a64 100644
--- a/arch/arm64/boot/dts/mediatek/mt7622.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt7622.dtsi
@@ -227,16 +227,6 @@ pericfg: pericfg@10002000 {
 		#reset-cells = <1>;
 	};
 
-	timer: timer@10004000 {
-		compatible = "mediatek,mt7622-timer",
-			     "mediatek,mt6577-timer";
-		reg = <0 0x10004000 0 0x80>;
-		interrupts = <GIC_SPI 152 IRQ_TYPE_LEVEL_LOW>;
-		clocks = <&infracfg CLK_INFRA_APXGPT_PD>,
-			 <&topckgen CLK_TOP_RTC>;
-		clock-names = "system-clk", "rtc-clk";
-	};
-
 	scpsys: scpsys@10006000 {
 		compatible = "mediatek,mt7622-scpsys",
 			     "syscon";
-- 
GitLab


From 286afdde1640d8ea8916a0f05e811441fbbf4b9d Mon Sep 17 00:00:00 2001
From: Aaro Koskinen <aaro.koskinen@iki.fi>
Date: Sun, 25 Nov 2018 00:17:04 +0200
Subject: [PATCH 1389/1890] USB: omap_udc: use devm_request_irq()

The current code fails to release the third irq on the error path
(observed by reading the code), and we get also multiple WARNs with
failing gadget drivers due to duplicate IRQ releases. Fix by using
devm_request_irq().

Signed-off-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/udc/omap_udc.c | 37 +++++++++----------------------
 1 file changed, 10 insertions(+), 27 deletions(-)

diff --git a/drivers/usb/gadget/udc/omap_udc.c b/drivers/usb/gadget/udc/omap_udc.c
index 3a16431da3211..1c77218c82af8 100644
--- a/drivers/usb/gadget/udc/omap_udc.c
+++ b/drivers/usb/gadget/udc/omap_udc.c
@@ -2867,8 +2867,8 @@ static int omap_udc_probe(struct platform_device *pdev)
 		udc->clr_halt = UDC_RESET_EP;
 
 	/* USB general purpose IRQ:  ep0, state changes, dma, etc */
-	status = request_irq(pdev->resource[1].start, omap_udc_irq,
-			0, driver_name, udc);
+	status = devm_request_irq(&pdev->dev, pdev->resource[1].start,
+				  omap_udc_irq, 0, driver_name, udc);
 	if (status != 0) {
 		ERR("can't get irq %d, err %d\n",
 			(int) pdev->resource[1].start, status);
@@ -2876,20 +2876,20 @@ static int omap_udc_probe(struct platform_device *pdev)
 	}
 
 	/* USB "non-iso" IRQ (PIO for all but ep0) */
-	status = request_irq(pdev->resource[2].start, omap_udc_pio_irq,
-			0, "omap_udc pio", udc);
+	status = devm_request_irq(&pdev->dev, pdev->resource[2].start,
+				  omap_udc_pio_irq, 0, "omap_udc pio", udc);
 	if (status != 0) {
 		ERR("can't get irq %d, err %d\n",
 			(int) pdev->resource[2].start, status);
-		goto cleanup2;
+		goto cleanup1;
 	}
 #ifdef	USE_ISO
-	status = request_irq(pdev->resource[3].start, omap_udc_iso_irq,
-			0, "omap_udc iso", udc);
+	status = devm_request_irq(&pdev->dev, pdev->resource[3].start,
+				  omap_udc_iso_irq, 0, "omap_udc iso", udc);
 	if (status != 0) {
 		ERR("can't get irq %d, err %d\n",
 			(int) pdev->resource[3].start, status);
-		goto cleanup3;
+		goto cleanup1;
 	}
 #endif
 	if (cpu_is_omap16xx() || cpu_is_omap7xx()) {
@@ -2902,22 +2902,11 @@ static int omap_udc_probe(struct platform_device *pdev)
 	create_proc_file();
 	status = usb_add_gadget_udc_release(&pdev->dev, &udc->gadget,
 			omap_udc_release);
-	if (status)
-		goto cleanup4;
-
-	return 0;
+	if (!status)
+		return 0;
 
-cleanup4:
 	remove_proc_file();
 
-#ifdef	USE_ISO
-cleanup3:
-	free_irq(pdev->resource[2].start, udc);
-#endif
-
-cleanup2:
-	free_irq(pdev->resource[1].start, udc);
-
 cleanup1:
 	kfree(udc);
 	udc = NULL;
@@ -2961,12 +2950,6 @@ static int omap_udc_remove(struct platform_device *pdev)
 
 	remove_proc_file();
 
-#ifdef	USE_ISO
-	free_irq(pdev->resource[3].start, udc);
-#endif
-	free_irq(pdev->resource[2].start, udc);
-	free_irq(pdev->resource[1].start, udc);
-
 	if (udc->dc_clk) {
 		if (udc->clk_requested)
 			omap_udc_enable_clock(0);
-- 
GitLab


From 99f700366fcea1aa2fa3c49c99f371670c3c62f8 Mon Sep 17 00:00:00 2001
From: Aaro Koskinen <aaro.koskinen@iki.fi>
Date: Sun, 25 Nov 2018 00:17:05 +0200
Subject: [PATCH 1390/1890] USB: omap_udc: fix crashes on probe error and
 module removal

We currently crash if usb_add_gadget_udc_release() fails, since the
udc->done is not initialized until in the remove function.
Furthermore, on module removal the udc data is accessed although
the release function is already triggered by usb_del_gadget_udc()
early in the function.

Fix by rewriting the release and remove functions, basically moving
all the cleanup into the release function, and doing the completion
only in the module removal case.

The patch fixes omap_udc module probe with a failing gadged, and also
allows the removal of omap_udc. Tested by running "modprobe omap_udc;
modprobe -r omap_udc" in a loop.

Signed-off-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/udc/omap_udc.c | 50 ++++++++++++-------------------
 1 file changed, 19 insertions(+), 31 deletions(-)

diff --git a/drivers/usb/gadget/udc/omap_udc.c b/drivers/usb/gadget/udc/omap_udc.c
index 1c77218c82af8..240ccba445922 100644
--- a/drivers/usb/gadget/udc/omap_udc.c
+++ b/drivers/usb/gadget/udc/omap_udc.c
@@ -2593,9 +2593,22 @@ omap_ep_setup(char *name, u8 addr, u8 type,
 
 static void omap_udc_release(struct device *dev)
 {
-	complete(udc->done);
+	pullup_disable(udc);
+	if (!IS_ERR_OR_NULL(udc->transceiver)) {
+		usb_put_phy(udc->transceiver);
+		udc->transceiver = NULL;
+	}
+	omap_writew(0, UDC_SYSCON1);
+	remove_proc_file();
+	if (udc->dc_clk) {
+		if (udc->clk_requested)
+			omap_udc_enable_clock(0);
+		clk_put(udc->hhc_clk);
+		clk_put(udc->dc_clk);
+	}
+	if (udc->done)
+		complete(udc->done);
 	kfree(udc);
-	udc = NULL;
 }
 
 static int
@@ -2900,12 +2913,8 @@ static int omap_udc_probe(struct platform_device *pdev)
 	}
 
 	create_proc_file();
-	status = usb_add_gadget_udc_release(&pdev->dev, &udc->gadget,
-			omap_udc_release);
-	if (!status)
-		return 0;
-
-	remove_proc_file();
+	return usb_add_gadget_udc_release(&pdev->dev, &udc->gadget,
+					  omap_udc_release);
 
 cleanup1:
 	kfree(udc);
@@ -2932,36 +2941,15 @@ static int omap_udc_remove(struct platform_device *pdev)
 {
 	DECLARE_COMPLETION_ONSTACK(done);
 
-	if (!udc)
-		return -ENODEV;
-
-	usb_del_gadget_udc(&udc->gadget);
-	if (udc->driver)
-		return -EBUSY;
-
 	udc->done = &done;
 
-	pullup_disable(udc);
-	if (!IS_ERR_OR_NULL(udc->transceiver)) {
-		usb_put_phy(udc->transceiver);
-		udc->transceiver = NULL;
-	}
-	omap_writew(0, UDC_SYSCON1);
-
-	remove_proc_file();
+	usb_del_gadget_udc(&udc->gadget);
 
-	if (udc->dc_clk) {
-		if (udc->clk_requested)
-			omap_udc_enable_clock(0);
-		clk_put(udc->hhc_clk);
-		clk_put(udc->dc_clk);
-	}
+	wait_for_completion(&done);
 
 	release_mem_region(pdev->resource[0].start,
 			pdev->resource[0].end - pdev->resource[0].start + 1);
 
-	wait_for_completion(&done);
-
 	return 0;
 }
 
-- 
GitLab


From 6ca6695f576b8453fe68865e84d25946d63b10ad Mon Sep 17 00:00:00 2001
From: Aaro Koskinen <aaro.koskinen@iki.fi>
Date: Sun, 25 Nov 2018 00:17:06 +0200
Subject: [PATCH 1391/1890] USB: omap_udc: fix omap_udc_start() on 15xx
 machines

On OMAP 15xx machines there are no transceivers, and omap_udc_start()
always fails as it forgot to adjust the default return value.

Signed-off-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/udc/omap_udc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/gadget/udc/omap_udc.c b/drivers/usb/gadget/udc/omap_udc.c
index 240ccba445922..33250e569af82 100644
--- a/drivers/usb/gadget/udc/omap_udc.c
+++ b/drivers/usb/gadget/udc/omap_udc.c
@@ -2041,7 +2041,7 @@ static inline int machine_without_vbus_sense(void)
 static int omap_udc_start(struct usb_gadget *g,
 		struct usb_gadget_driver *driver)
 {
-	int		status = -ENODEV;
+	int		status;
 	struct omap_ep	*ep;
 	unsigned long	flags;
 
@@ -2079,6 +2079,7 @@ static int omap_udc_start(struct usb_gadget *g,
 			goto done;
 		}
 	} else {
+		status = 0;
 		if (can_pullup(udc))
 			pullup_enable(udc);
 		else
-- 
GitLab


From 2c2322fbcab8102b8cadc09d66714700a2da42c2 Mon Sep 17 00:00:00 2001
From: Aaro Koskinen <aaro.koskinen@iki.fi>
Date: Sun, 25 Nov 2018 00:17:07 +0200
Subject: [PATCH 1392/1890] USB: omap_udc: fix USB gadget functionality on Palm
 Tungsten E

On Palm TE nothing happens when you try to use gadget drivers and plug
the USB cable. Fix by adding the board to the vbus sense quirk list.

Signed-off-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/udc/omap_udc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/gadget/udc/omap_udc.c b/drivers/usb/gadget/udc/omap_udc.c
index 33250e569af82..9b23e04c8f026 100644
--- a/drivers/usb/gadget/udc/omap_udc.c
+++ b/drivers/usb/gadget/udc/omap_udc.c
@@ -2033,6 +2033,7 @@ static inline int machine_without_vbus_sense(void)
 {
 	return machine_is_omap_innovator()
 		|| machine_is_omap_osk()
+		|| machine_is_omap_palmte()
 		|| machine_is_sx1()
 		/* No known omap7xx boards with vbus sense */
 		|| cpu_is_omap7xx();
-- 
GitLab


From 069caf5950dfa75d0526cd89c439ff9d9d3136d8 Mon Sep 17 00:00:00 2001
From: Aaro Koskinen <aaro.koskinen@iki.fi>
Date: Sun, 25 Nov 2018 00:17:08 +0200
Subject: [PATCH 1393/1890] USB: omap_udc: fix rejection of out transfers when
 DMA is used

Commit 387f869d2579 ("usb: gadget: u_ether: conditionally align
transfer size") started aligning transfer size only if requested,
breaking omap_udc DMA mode. Set quirk_ep_out_aligned_size to restore
the old behaviour.

Fixes: 387f869d2579 ("usb: gadget: u_ether: conditionally align transfer size")
Signed-off-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/udc/omap_udc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/gadget/udc/omap_udc.c b/drivers/usb/gadget/udc/omap_udc.c
index 9b23e04c8f026..fcf13ef33b312 100644
--- a/drivers/usb/gadget/udc/omap_udc.c
+++ b/drivers/usb/gadget/udc/omap_udc.c
@@ -2642,6 +2642,7 @@ omap_udc_setup(struct platform_device *odev, struct usb_phy *xceiv)
 	udc->gadget.speed = USB_SPEED_UNKNOWN;
 	udc->gadget.max_speed = USB_SPEED_FULL;
 	udc->gadget.name = driver_name;
+	udc->gadget.quirk_ep_out_aligned_size = 1;
 	udc->transceiver = xceiv;
 
 	/* ep0 is special; put it right after the SETUP buffer */
-- 
GitLab


From 58a0afbf4c99ac355df16773af835b919b9432ee Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Wed, 21 Nov 2018 10:35:12 +0100
Subject: [PATCH 1394/1890] ARM: davinci: da8xx: define gpio interrupts as
 separate resources

Since commit eb3744a2dd01 ("gpio: davinci: Do not assume continuous
IRQ numbering") the davinci GPIO driver fails to probe if we boot
in legacy mode from any of the board files. Since the driver now
expects every interrupt to be defined as a separate resource, split
the definition of IRQ resources instead of having a single continuous
interrupt range.

Fixes: eb3744a2dd01 ("gpio: davinci: Do not assume continuous IRQ numbering")
Cc: stable@vger.kernel.org
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
---
 arch/arm/mach-davinci/devices-da8xx.c | 40 +++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/arch/arm/mach-davinci/devices-da8xx.c b/arch/arm/mach-davinci/devices-da8xx.c
index 1fd3619f6a09f..cf78da5ab0548 100644
--- a/arch/arm/mach-davinci/devices-da8xx.c
+++ b/arch/arm/mach-davinci/devices-da8xx.c
@@ -701,6 +701,46 @@ static struct resource da8xx_gpio_resources[] = {
 	},
 	{ /* interrupt */
 		.start	= IRQ_DA8XX_GPIO0,
+		.end	= IRQ_DA8XX_GPIO0,
+		.flags	= IORESOURCE_IRQ,
+	},
+	{
+		.start	= IRQ_DA8XX_GPIO1,
+		.end	= IRQ_DA8XX_GPIO1,
+		.flags	= IORESOURCE_IRQ,
+	},
+	{
+		.start	= IRQ_DA8XX_GPIO2,
+		.end	= IRQ_DA8XX_GPIO2,
+		.flags	= IORESOURCE_IRQ,
+	},
+	{
+		.start	= IRQ_DA8XX_GPIO3,
+		.end	= IRQ_DA8XX_GPIO3,
+		.flags	= IORESOURCE_IRQ,
+	},
+	{
+		.start	= IRQ_DA8XX_GPIO4,
+		.end	= IRQ_DA8XX_GPIO4,
+		.flags	= IORESOURCE_IRQ,
+	},
+	{
+		.start	= IRQ_DA8XX_GPIO5,
+		.end	= IRQ_DA8XX_GPIO5,
+		.flags	= IORESOURCE_IRQ,
+	},
+	{
+		.start	= IRQ_DA8XX_GPIO6,
+		.end	= IRQ_DA8XX_GPIO6,
+		.flags	= IORESOURCE_IRQ,
+	},
+	{
+		.start	= IRQ_DA8XX_GPIO7,
+		.end	= IRQ_DA8XX_GPIO7,
+		.flags	= IORESOURCE_IRQ,
+	},
+	{
+		.start	= IRQ_DA8XX_GPIO8,
 		.end	= IRQ_DA8XX_GPIO8,
 		.flags	= IORESOURCE_IRQ,
 	},
-- 
GitLab


From 193c04374e281a56c7d4f96e66d329671945bebe Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Wed, 21 Nov 2018 10:35:13 +0100
Subject: [PATCH 1395/1890] ARM: davinci: dm365: define gpio interrupts as
 separate resources

Since commit eb3744a2dd01 ("gpio: davinci: Do not assume continuous
IRQ numbering") the davinci GPIO driver fails to probe if we boot
in legacy mode from any of the board files. Since the driver now
expects every interrupt to be defined as a separate resource, split
the definition of IRQ resources instead of having a single continuous
interrupt range.

Fixes: eb3744a2dd01 ("gpio: davinci: Do not assume continuous IRQ numbering")
Cc: stable@vger.kernel.org
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
---
 arch/arm/mach-davinci/dm365.c | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/arch/arm/mach-davinci/dm365.c b/arch/arm/mach-davinci/dm365.c
index abcf2a5ed89b5..42665914166a3 100644
--- a/arch/arm/mach-davinci/dm365.c
+++ b/arch/arm/mach-davinci/dm365.c
@@ -267,6 +267,41 @@ static struct resource dm365_gpio_resources[] = {
 	},
 	{	/* interrupt */
 		.start	= IRQ_DM365_GPIO0,
+		.end	= IRQ_DM365_GPIO0,
+		.flags	= IORESOURCE_IRQ,
+	},
+	{
+		.start	= IRQ_DM365_GPIO1,
+		.end	= IRQ_DM365_GPIO1,
+		.flags	= IORESOURCE_IRQ,
+	},
+	{
+		.start	= IRQ_DM365_GPIO2,
+		.end	= IRQ_DM365_GPIO2,
+		.flags	= IORESOURCE_IRQ,
+	},
+	{
+		.start	= IRQ_DM365_GPIO3,
+		.end	= IRQ_DM365_GPIO3,
+		.flags	= IORESOURCE_IRQ,
+	},
+	{
+		.start	= IRQ_DM365_GPIO4,
+		.end	= IRQ_DM365_GPIO4,
+		.flags	= IORESOURCE_IRQ,
+	},
+	{
+		.start	= IRQ_DM365_GPIO5,
+		.end	= IRQ_DM365_GPIO5,
+		.flags	= IORESOURCE_IRQ,
+	},
+	{
+		.start	= IRQ_DM365_GPIO6,
+		.end	= IRQ_DM365_GPIO6,
+		.flags	= IORESOURCE_IRQ,
+	},
+	{
+		.start	= IRQ_DM365_GPIO7,
 		.end	= IRQ_DM365_GPIO7,
 		.flags	= IORESOURCE_IRQ,
 	},
-- 
GitLab


From 2c9c83491f30afbce25796e185cd4d5e36080e31 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Wed, 21 Nov 2018 10:35:14 +0100
Subject: [PATCH 1396/1890] ARM: davinci: dm646x: define gpio interrupts as
 separate resources

Since commit eb3744a2dd01 ("gpio: davinci: Do not assume continuous
IRQ numbering") the davinci GPIO driver fails to probe if we boot
in legacy mode from any of the board files. Since the driver now
expects every interrupt to be defined as a separate resource, split
the definition of IRQ resources instead of having a single continuous
interrupt range.

Fixes: eb3744a2dd01 ("gpio: davinci: Do not assume continuous IRQ numbering")
Cc: stable@vger.kernel.org
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
---
 arch/arm/mach-davinci/dm646x.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/arch/arm/mach-davinci/dm646x.c b/arch/arm/mach-davinci/dm646x.c
index 6bd2ed069d0d7..d9b93e2806d22 100644
--- a/arch/arm/mach-davinci/dm646x.c
+++ b/arch/arm/mach-davinci/dm646x.c
@@ -442,6 +442,16 @@ static struct resource dm646x_gpio_resources[] = {
 	},
 	{	/* interrupt */
 		.start	= IRQ_DM646X_GPIOBNK0,
+		.end	= IRQ_DM646X_GPIOBNK0,
+		.flags	= IORESOURCE_IRQ,
+	},
+	{
+		.start	= IRQ_DM646X_GPIOBNK1,
+		.end	= IRQ_DM646X_GPIOBNK1,
+		.flags	= IORESOURCE_IRQ,
+	},
+	{
+		.start	= IRQ_DM646X_GPIOBNK2,
 		.end	= IRQ_DM646X_GPIOBNK2,
 		.flags	= IORESOURCE_IRQ,
 	},
-- 
GitLab


From 27db7baab640ea28d7994eda943fef170e347081 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Wed, 21 Nov 2018 10:35:15 +0100
Subject: [PATCH 1397/1890] ARM: davinci: dm355: define gpio interrupts as
 separate resources

Since commit eb3744a2dd01 ("gpio: davinci: Do not assume continuous
IRQ numbering") the davinci GPIO driver fails to probe if we boot
in legacy mode from any of the board files. Since the driver now
expects every interrupt to be defined as a separate resource, split
the definition of IRQ resources instead of having a single continuous
interrupt range.

Fixes: eb3744a2dd01 ("gpio: davinci: Do not assume continuous IRQ numbering")
Cc: stable@vger.kernel.org
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
---
 arch/arm/mach-davinci/dm355.c | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/arch/arm/mach-davinci/dm355.c b/arch/arm/mach-davinci/dm355.c
index 9f7d38d12c888..2b0f5d97ab7c1 100644
--- a/arch/arm/mach-davinci/dm355.c
+++ b/arch/arm/mach-davinci/dm355.c
@@ -548,6 +548,36 @@ static struct resource dm355_gpio_resources[] = {
 	},
 	{	/* interrupt */
 		.start	= IRQ_DM355_GPIOBNK0,
+		.end	= IRQ_DM355_GPIOBNK0,
+		.flags	= IORESOURCE_IRQ,
+	},
+	{
+		.start	= IRQ_DM355_GPIOBNK1,
+		.end	= IRQ_DM355_GPIOBNK1,
+		.flags	= IORESOURCE_IRQ,
+	},
+	{
+		.start	= IRQ_DM355_GPIOBNK2,
+		.end	= IRQ_DM355_GPIOBNK2,
+		.flags	= IORESOURCE_IRQ,
+	},
+	{
+		.start	= IRQ_DM355_GPIOBNK3,
+		.end	= IRQ_DM355_GPIOBNK3,
+		.flags	= IORESOURCE_IRQ,
+	},
+	{
+		.start	= IRQ_DM355_GPIOBNK4,
+		.end	= IRQ_DM355_GPIOBNK4,
+		.flags	= IORESOURCE_IRQ,
+	},
+	{
+		.start	= IRQ_DM355_GPIOBNK5,
+		.end	= IRQ_DM355_GPIOBNK5,
+		.flags	= IORESOURCE_IRQ,
+	},
+	{
+		.start	= IRQ_DM355_GPIOBNK6,
 		.end	= IRQ_DM355_GPIOBNK6,
 		.flags	= IORESOURCE_IRQ,
 	},
-- 
GitLab


From adcf60ce14c8250761af9de907eb6c7d096c26d3 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Wed, 21 Nov 2018 10:35:16 +0100
Subject: [PATCH 1398/1890] ARM: davinci: dm644x: define gpio interrupts as
 separate resources

Since commit eb3744a2dd01 ("gpio: davinci: Do not assume continuous
IRQ numbering") the davinci GPIO driver fails to probe if we boot
in legacy mode from any of the board files. Since the driver now
expects every interrupt to be defined as a separate resource, split
the definition of IRQ resources instead of having a single continuous
interrupt range.

Fixes: eb3744a2dd01 ("gpio: davinci: Do not assume continuous IRQ numbering")
Cc: stable@vger.kernel.org
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
---
 arch/arm/mach-davinci/dm644x.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/arch/arm/mach-davinci/dm644x.c b/arch/arm/mach-davinci/dm644x.c
index 0720da7809a69..de1ec6dc01e94 100644
--- a/arch/arm/mach-davinci/dm644x.c
+++ b/arch/arm/mach-davinci/dm644x.c
@@ -492,6 +492,26 @@ static struct resource dm644_gpio_resources[] = {
 	},
 	{	/* interrupt */
 		.start	= IRQ_GPIOBNK0,
+		.end	= IRQ_GPIOBNK0,
+		.flags	= IORESOURCE_IRQ,
+	},
+	{
+		.start	= IRQ_GPIOBNK1,
+		.end	= IRQ_GPIOBNK1,
+		.flags	= IORESOURCE_IRQ,
+	},
+	{
+		.start	= IRQ_GPIOBNK2,
+		.end	= IRQ_GPIOBNK2,
+		.flags	= IORESOURCE_IRQ,
+	},
+	{
+		.start	= IRQ_GPIOBNK3,
+		.end	= IRQ_GPIOBNK3,
+		.flags	= IORESOURCE_IRQ,
+	},
+	{
+		.start	= IRQ_GPIOBNK4,
 		.end	= IRQ_GPIOBNK4,
 		.flags	= IORESOURCE_IRQ,
 	},
-- 
GitLab


From 786a9ab1330169f2602238822b4df5d5c4c98f6c Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Wed, 21 Nov 2018 10:35:17 +0100
Subject: [PATCH 1399/1890] gpio: davinci: restore a way to manually specify
 the GPIO base

Commit 587f7a694f01 ("gpio: davinci: Use dev name for label and
automatic base selection") broke the network support in legacy boot
mode for da850-evm since we can no longer request the MDIO clock GPIO.

Other boards may be broken too, which I haven't tested.

The problem is in the fact that most board files still use the legacy
GPIO API where lines are requested by numbers rather than descriptors.

While this should be fixed eventually, in order to unbreak the board
for now - provide a way to manually specify the GPIO base in platform
data.

Fixes: 587f7a694f01 ("gpio: davinci: Use dev name for label and automatic base selection")
Cc: stable@vger.kernel.org
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
---
 drivers/gpio/gpio-davinci.c                | 2 +-
 include/linux/platform_data/gpio-davinci.h | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpio/gpio-davinci.c b/drivers/gpio/gpio-davinci.c
index 5c1564fcc24ea..bdb29e51b4176 100644
--- a/drivers/gpio/gpio-davinci.c
+++ b/drivers/gpio/gpio-davinci.c
@@ -258,7 +258,7 @@ static int davinci_gpio_probe(struct platform_device *pdev)
 	chips->chip.set = davinci_gpio_set;
 
 	chips->chip.ngpio = ngpio;
-	chips->chip.base = -1;
+	chips->chip.base = pdata->no_auto_base ? pdata->base : -1;
 
 #ifdef CONFIG_OF_GPIO
 	chips->chip.of_gpio_n_cells = 2;
diff --git a/include/linux/platform_data/gpio-davinci.h b/include/linux/platform_data/gpio-davinci.h
index f92a47e180341..a93841bfb9f7a 100644
--- a/include/linux/platform_data/gpio-davinci.h
+++ b/include/linux/platform_data/gpio-davinci.h
@@ -17,6 +17,8 @@
 #define __DAVINCI_GPIO_PLATFORM_H
 
 struct davinci_gpio_platform_data {
+	bool	no_auto_base;
+	u32	base;
 	u32	ngpio;
 	u32	gpio_unbanked;
 };
-- 
GitLab


From 45ed94b9e2d2e310fa7fb884b26080dfd4f0e31d Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Wed, 21 Nov 2018 10:35:18 +0100
Subject: [PATCH 1400/1890] ARM: davinci: da850: set the GPIO base to 0

Commit 587f7a694f01 ("gpio: davinci: Use dev name for label and
automatic base selection") broke the network support in legacy boot
mode for da850-evm since we can no longer request the MDIO clock GPIO.

We now have the option to specify the GPIO base manually for davinci,
so add the relevant fields to platform data.

Fixes: 587f7a694f01 ("gpio: davinci: Use dev name for label and automatic base selection")
Cc: stable@vger.kernel.org
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
---
 arch/arm/mach-davinci/da850.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/arm/mach-davinci/da850.c b/arch/arm/mach-davinci/da850.c
index 4528bbf0c8618..e7b78df2bfefb 100644
--- a/arch/arm/mach-davinci/da850.c
+++ b/arch/arm/mach-davinci/da850.c
@@ -719,7 +719,9 @@ int __init da850_register_vpif_capture(struct vpif_capture_config
 }
 
 static struct davinci_gpio_platform_data da850_gpio_platform_data = {
-	.ngpio = 144,
+	.no_auto_base	= true,
+	.base		= 0,
+	.ngpio		= 144,
 };
 
 int __init da850_register_gpio(void)
-- 
GitLab


From 133cd2e48305887709675847da1f6ee2b7363929 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Wed, 21 Nov 2018 10:35:19 +0100
Subject: [PATCH 1401/1890] ARM: davinci: dm365: set the GPIO base to 0

Commit 587f7a694f01 ("gpio: davinci: Use dev name for label and
automatic base selection") broke the GPIO support on DaVinci boards
in legacy mode by allowing gpiolib to set the GPIO base automatically.

DaVinci board files use the legacy GPIO API with hard-coded GPIO line
numbers. Use the new fields in struct davinci_gpio_platform_data to
manually set the GPIO base to 0.

Fixes: 587f7a694f01 ("gpio: davinci: Use dev name for label and automatic base selection")
Cc: stable@vger.kernel.org
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
---
 arch/arm/mach-davinci/dm365.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm/mach-davinci/dm365.c b/arch/arm/mach-davinci/dm365.c
index 42665914166a3..01fb2b0c82de3 100644
--- a/arch/arm/mach-davinci/dm365.c
+++ b/arch/arm/mach-davinci/dm365.c
@@ -308,6 +308,8 @@ static struct resource dm365_gpio_resources[] = {
 };
 
 static struct davinci_gpio_platform_data dm365_gpio_platform_data = {
+	.no_auto_base	= true,
+	.base		= 0,
 	.ngpio		= 104,
 	.gpio_unbanked	= 8,
 };
-- 
GitLab


From fb9e7f0bba151281d7587c1262fae8bdf0497296 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Wed, 21 Nov 2018 10:35:20 +0100
Subject: [PATCH 1402/1890] ARM: davinci: dm646x: set the GPIO base to 0

Commit 587f7a694f01 ("gpio: davinci: Use dev name for label and
automatic base selection") broke the GPIO support on DaVinci boards
in legacy mode by allowing gpiolib to set the GPIO base automatically.

DaVinci board files use the legacy GPIO API with hard-coded GPIO line
numbers. Use the new fields in struct davinci_gpio_platform_data to
manually set the GPIO base to 0.

Fixes: 587f7a694f01 ("gpio: davinci: Use dev name for label and automatic base selection")
Cc: stable@vger.kernel.org
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
---
 arch/arm/mach-davinci/dm646x.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm/mach-davinci/dm646x.c b/arch/arm/mach-davinci/dm646x.c
index d9b93e2806d22..7dc54b2a610f4 100644
--- a/arch/arm/mach-davinci/dm646x.c
+++ b/arch/arm/mach-davinci/dm646x.c
@@ -458,6 +458,8 @@ static struct resource dm646x_gpio_resources[] = {
 };
 
 static struct davinci_gpio_platform_data dm646x_gpio_platform_data = {
+	.no_auto_base	= true,
+	.base		= 0,
 	.ngpio		= 43,
 };
 
-- 
GitLab


From 7d35baa4e9ec4b717bc0e58a39cdb6a1c50f5465 Mon Sep 17 00:00:00 2001
From: Mathias Kresin <dev@kresin.me>
Date: Mon, 26 Nov 2018 11:25:40 +0100
Subject: [PATCH 1403/1890] MIPS: ralink: Fix mt7620 nd_sd pinmux

In case the nd_sd group is set to the sd-card function, Pins 45 + 46 are
configured as GPIOs. If they are blocked by the sd function, they can't
be used as GPIOs.

Reported-by: Kristian Evensen <kristian.evensen@gmail.com>
Signed-off-by: Mathias Kresin <dev@kresin.me>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Fixes: f576fb6a0700 ("MIPS: ralink: cleanup the soc specific pinmux data")
Patchwork: https://patchwork.linux-mips.org/patch/21220/
Cc: John Crispin <john@phrozen.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Cc: stable@vger.kernel.org # v3.18+
---
 arch/mips/ralink/mt7620.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/ralink/mt7620.c b/arch/mips/ralink/mt7620.c
index 41b71c4352c25..c1ce6f43642bc 100644
--- a/arch/mips/ralink/mt7620.c
+++ b/arch/mips/ralink/mt7620.c
@@ -84,7 +84,7 @@ static struct rt2880_pmx_func pcie_rst_grp[] = {
 };
 static struct rt2880_pmx_func nd_sd_grp[] = {
 	FUNC("nand", MT7620_GPIO_MODE_NAND, 45, 15),
-	FUNC("sd", MT7620_GPIO_MODE_SD, 45, 15)
+	FUNC("sd", MT7620_GPIO_MODE_SD, 47, 13)
 };
 
 static struct rt2880_pmx_group mt7620a_pinmux_data[] = {
-- 
GitLab


From 7bada55ab50697861eee6bb7d60b41e68a961a9c Mon Sep 17 00:00:00 2001
From: Todd Kjos <tkjos@android.com>
Date: Tue, 6 Nov 2018 15:55:32 -0800
Subject: [PATCH 1404/1890] binder: fix race that allows malicious free of live
 buffer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Malicious code can attempt to free buffers using the BC_FREE_BUFFER
ioctl to binder. There are protections against a user freeing a buffer
while in use by the kernel, however there was a window where
BC_FREE_BUFFER could be used to free a recently allocated buffer that
was not completely initialized. This resulted in a use-after-free
detected by KASAN with a malicious test program.

This window is closed by setting the buffer's allow_user_free attribute
to 0 when the buffer is allocated or when the user has previously freed
it instead of waiting for the caller to set it. The problem was that
when the struct buffer was recycled, allow_user_free was stale and set
to 1 allowing a free to go through.

Signed-off-by: Todd Kjos <tkjos@google.com>
Acked-by: Arve HjÃ¸nnevÃ¥g <arve@android.com>
Cc: stable <stable@vger.kernel.org> # 4.14
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/android/binder.c       | 21 ++++++++++++---------
 drivers/android/binder_alloc.c | 16 ++++++----------
 drivers/android/binder_alloc.h |  3 +--
 3 files changed, 19 insertions(+), 21 deletions(-)

diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index cb30a524d16d8..9f1000d2a40c7 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -2974,7 +2974,6 @@ static void binder_transaction(struct binder_proc *proc,
 		t->buffer = NULL;
 		goto err_binder_alloc_buf_failed;
 	}
-	t->buffer->allow_user_free = 0;
 	t->buffer->debug_id = t->debug_id;
 	t->buffer->transaction = t;
 	t->buffer->target_node = target_node;
@@ -3510,14 +3509,18 @@ static int binder_thread_write(struct binder_proc *proc,
 
 			buffer = binder_alloc_prepare_to_free(&proc->alloc,
 							      data_ptr);
-			if (buffer == NULL) {
-				binder_user_error("%d:%d BC_FREE_BUFFER u%016llx no match\n",
-					proc->pid, thread->pid, (u64)data_ptr);
-				break;
-			}
-			if (!buffer->allow_user_free) {
-				binder_user_error("%d:%d BC_FREE_BUFFER u%016llx matched unreturned buffer\n",
-					proc->pid, thread->pid, (u64)data_ptr);
+			if (IS_ERR_OR_NULL(buffer)) {
+				if (PTR_ERR(buffer) == -EPERM) {
+					binder_user_error(
+						"%d:%d BC_FREE_BUFFER u%016llx matched unreturned or currently freeing buffer\n",
+						proc->pid, thread->pid,
+						(u64)data_ptr);
+				} else {
+					binder_user_error(
+						"%d:%d BC_FREE_BUFFER u%016llx no match\n",
+						proc->pid, thread->pid,
+						(u64)data_ptr);
+				}
 				break;
 			}
 			binder_debug(BINDER_DEBUG_FREE_BUFFER,
diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
index 64fd96eada31f..030c98f35cca7 100644
--- a/drivers/android/binder_alloc.c
+++ b/drivers/android/binder_alloc.c
@@ -151,16 +151,12 @@ static struct binder_buffer *binder_alloc_prepare_to_free_locked(
 		else {
 			/*
 			 * Guard against user threads attempting to
-			 * free the buffer twice
+			 * free the buffer when in use by kernel or
+			 * after it's already been freed.
 			 */
-			if (buffer->free_in_progress) {
-				binder_alloc_debug(BINDER_DEBUG_USER_ERROR,
-						   "%d:%d FREE_BUFFER u%016llx user freed buffer twice\n",
-						   alloc->pid, current->pid,
-						   (u64)user_ptr);
-				return NULL;
-			}
-			buffer->free_in_progress = 1;
+			if (!buffer->allow_user_free)
+				return ERR_PTR(-EPERM);
+			buffer->allow_user_free = 0;
 			return buffer;
 		}
 	}
@@ -500,7 +496,7 @@ static struct binder_buffer *binder_alloc_new_buf_locked(
 
 	rb_erase(best_fit, &alloc->free_buffers);
 	buffer->free = 0;
-	buffer->free_in_progress = 0;
+	buffer->allow_user_free = 0;
 	binder_insert_allocated_buffer_locked(alloc, buffer);
 	binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC,
 		     "%d: binder_alloc_buf size %zd got %pK\n",
diff --git a/drivers/android/binder_alloc.h b/drivers/android/binder_alloc.h
index 9ef64e5638566..fb3238c74c8a8 100644
--- a/drivers/android/binder_alloc.h
+++ b/drivers/android/binder_alloc.h
@@ -50,8 +50,7 @@ struct binder_buffer {
 	unsigned free:1;
 	unsigned allow_user_free:1;
 	unsigned async_transaction:1;
-	unsigned free_in_progress:1;
-	unsigned debug_id:28;
+	unsigned debug_id:29;
 
 	struct binder_transaction *transaction;
 
-- 
GitLab


From eceb05965489784f24bbf4d61ba60e475a983016 Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Mon, 26 Nov 2018 02:29:56 +0000
Subject: [PATCH 1405/1890] Drivers: hv: vmbus: check the creation_status in
 vmbus_establish_gpadl()

This is a longstanding issue: if the vmbus upper-layer drivers try to
consume too many GPADLs, the host may return with an error
0xC0000044 (STATUS_QUOTA_EXCEEDED), but currently we forget to check
the creation_status, and hence we can pass an invalid GPADL handle
into the OPEN_CHANNEL message, and get an error code 0xc0000225 in
open_info->response.open_result.status, and finally we hang in
vmbus_open() -> "goto error_free_info" -> vmbus_teardown_gpadl().

With this patch, we can exit gracefully on STATUS_QUOTA_EXCEEDED.

Cc: Stephen Hemminger <sthemmin@microsoft.com>
Cc: K. Y. Srinivasan <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: stable@vger.kernel.org
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index de8193f3b8381..fe00b12e44178 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -516,6 +516,14 @@ int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer,
 	}
 	wait_for_completion(&msginfo->waitevent);
 
+	if (msginfo->response.gpadl_created.creation_status != 0) {
+		pr_err("Failed to establish GPADL: err = 0x%x\n",
+		       msginfo->response.gpadl_created.creation_status);
+
+		ret = -EDQUOT;
+		goto cleanup;
+	}
+
 	if (channel->rescind) {
 		ret = -ENODEV;
 		goto cleanup;
-- 
GitLab


From 1830b6eeda1fed42d85f2388f79c926331a9b2d0 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Mon, 26 Nov 2018 12:47:46 +0300
Subject: [PATCH 1406/1890] thunderbolt: Prevent root port runtime suspend
 during NVM upgrade

During NVM upgrade process the host router is hot-removed for a short
while. During this time it is possible that the root port is moved into
D3cold which would be fine if the root port could trigger PME on itself.
However, many systems actually do not implement it so what happens is
that the root port goes into D3cold and never wakes up unless userspace
does PCI config space access, such as running 'lscpi'.

For this reason we explicitly prevent the root port from runtime
suspending during NVM upgrade.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/thunderbolt/switch.c | 40 ++++++++++++++++++++++++++++++++++--
 1 file changed, 38 insertions(+), 2 deletions(-)

diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c
index 52ff854f0d6c1..cd96994dc0947 100644
--- a/drivers/thunderbolt/switch.c
+++ b/drivers/thunderbolt/switch.c
@@ -863,6 +863,30 @@ static ssize_t key_store(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR(key, 0600, key_show, key_store);
 
+static void nvm_authenticate_start(struct tb_switch *sw)
+{
+	struct pci_dev *root_port;
+
+	/*
+	 * During host router NVM upgrade we should not allow root port to
+	 * go into D3cold because some root ports cannot trigger PME
+	 * itself. To be on the safe side keep the root port in D0 during
+	 * the whole upgrade process.
+	 */
+	root_port = pci_find_pcie_root_port(sw->tb->nhi->pdev);
+	if (root_port)
+		pm_runtime_get_noresume(&root_port->dev);
+}
+
+static void nvm_authenticate_complete(struct tb_switch *sw)
+{
+	struct pci_dev *root_port;
+
+	root_port = pci_find_pcie_root_port(sw->tb->nhi->pdev);
+	if (root_port)
+		pm_runtime_put(&root_port->dev);
+}
+
 static ssize_t nvm_authenticate_show(struct device *dev,
 	struct device_attribute *attr, char *buf)
 {
@@ -912,10 +936,18 @@ static ssize_t nvm_authenticate_store(struct device *dev,
 
 		sw->nvm->authenticating = true;
 
-		if (!tb_route(sw))
+		if (!tb_route(sw)) {
+			/*
+			 * Keep root port from suspending as long as the
+			 * NVM upgrade process is running.
+			 */
+			nvm_authenticate_start(sw);
 			ret = nvm_authenticate_host(sw);
-		else
+			if (ret)
+				nvm_authenticate_complete(sw);
+		} else {
 			ret = nvm_authenticate_device(sw);
+		}
 		pm_runtime_mark_last_busy(&sw->dev);
 		pm_runtime_put_autosuspend(&sw->dev);
 	}
@@ -1334,6 +1366,10 @@ static int tb_switch_add_dma_port(struct tb_switch *sw)
 	if (ret <= 0)
 		return ret;
 
+	/* Now we can allow root port to suspend again */
+	if (!tb_route(sw))
+		nvm_authenticate_complete(sw);
+
 	if (status) {
 		tb_sw_info(sw, "switch flash authentication failed\n");
 		tb_switch_set_uuid(sw);
-- 
GitLab


From c54c7374ff44de5e609506aca7c0deae4703b6d1 Mon Sep 17 00:00:00 2001
From: Lyude Paul <lyude@redhat.com>
Date: Tue, 13 Nov 2018 17:46:14 -0500
Subject: [PATCH 1407/1890] drm/dp_mst: Skip validating ports during
 destruction, just ref

Jerry Zuo pointed out a rather obscure hotplugging issue that it seems I
accidentally introduced into DRM two years ago.

Pretend we have a topology like this:

|- DP-1: mst_primary
   |- DP-4: active display
   |- DP-5: disconnected
   |- DP-6: active hub
      |- DP-7: active display
      |- DP-8: disconnected
      |- DP-9: disconnected

If we unplug DP-6, the topology starting at DP-7 will be destroyed but
it's payloads will live on in DP-1's VCPI allocations and thus require
removal. However, this removal currently fails because
drm_dp_update_payload_part1() will (rightly so) try to validate the port
before accessing it, fail then abort. If we keep going, eventually we
run the MST hub out of bandwidth and all new allocations will start to
fail (or in my case; all new displays just start flickering a ton).

We could just teach drm_dp_update_payload_part1() not to drop the port
ref in this case, but then we also need to teach
drm_dp_destroy_payload_step1() to do the same thing, then hope no one
ever adds anything to the that requires a validated port reference in
drm_dp_destroy_connector_work(). Kind of sketchy.

So let's go with a more clever solution: any port that
drm_dp_destroy_connector_work() interacts with is guaranteed to still
exist in memory until we say so. While said port might not be valid we
don't really care: that's the whole reason we're destroying it in the
first place! So, teach drm_dp_get_validated_port_ref() to use the all
mighty current_work() function to avoid attempting to validate ports
from the context of mgr->destroy_connector_work. I can't see any
situation where this wouldn't be safe, and this avoids having to play
whack-a-mole in the future of trying to work around port validation.

Signed-off-by: Lyude Paul <lyude@redhat.com>
Fixes: 263efde31f97 ("drm/dp/mst: Get validated port ref in drm_dp_update_payload_part1()")
Reported-by: Jerry Zuo <Jerry.Zuo@amd.com>
Cc: Jerry Zuo <Jerry.Zuo@amd.com>
Cc: Harry Wentland <Harry.Wentland@amd.com>
Cc: <stable@vger.kernel.org> # v4.6+
Reviewed-by: Dave Airlie <airlied@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181113224613.28809-1-lyude@redhat.com
Signed-off-by: Sean Paul <seanpaul@chromium.org>
---
 drivers/gpu/drm/drm_dp_mst_topology.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
index 0e0df398222d1..250d7160826ff 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -1023,9 +1023,20 @@ static struct drm_dp_mst_port *drm_dp_mst_get_port_ref_locked(struct drm_dp_mst_
 static struct drm_dp_mst_port *drm_dp_get_validated_port_ref(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_port *port)
 {
 	struct drm_dp_mst_port *rport = NULL;
+
 	mutex_lock(&mgr->lock);
-	if (mgr->mst_primary)
-		rport = drm_dp_mst_get_port_ref_locked(mgr->mst_primary, port);
+	/*
+	 * Port may or may not be 'valid' but we don't care about that when
+	 * destroying the port and we are guaranteed that the port pointer
+	 * will be valid until we've finished
+	 */
+	if (current_work() == &mgr->destroy_connector_work) {
+		kref_get(&port->kref);
+		rport = port;
+	} else if (mgr->mst_primary) {
+		rport = drm_dp_mst_get_port_ref_locked(mgr->mst_primary,
+						       port);
+	}
 	mutex_unlock(&mgr->lock);
 	return rport;
 }
-- 
GitLab


From 23a336b34258aba3b50ea6863cca4e81b5ef6384 Mon Sep 17 00:00:00 2001
From: Sergio Correia <sergio@correia.cc>
Date: Thu, 22 Nov 2018 02:33:29 -0300
Subject: [PATCH 1408/1890] drm: set is_master to 0 upon drm_new_set_master()
 failure

When drm_new_set_master() fails, set is_master to 0, to prevent a
possible NULL pointer deref.

Here is a problematic flow: we check is_master in drm_is_current_master(),
then proceed to call drm_lease_owner() passing master. If we do not restore
is_master status when drm_new_set_master() fails, we may have a situation
in which is_master will be 1 and master itself, NULL, leading to the deref
of a NULL pointer in drm_lease_owner().

This fixes the following OOPS, observed on an ArchLinux running a 4.19.2
kernel:

[   97.804282] BUG: unable to handle kernel NULL pointer dereference at 0000000000000080
[   97.807224] PGD 0 P4D 0
[   97.807224] Oops: 0000 [#1] PREEMPT SMP NOPTI
[   97.807224] CPU: 0 PID: 1348 Comm: xfwm4 Tainted: P           OE     4.19.2-arch1-1-ARCH #1
[   97.807224] Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./AB350 Pro4, BIOS P5.10 10/16/2018
[   97.807224] RIP: 0010:drm_lease_owner+0xd/0x20 [drm]
[   97.807224] Code: 83 c4 18 5b 5d c3 b8 ea ff ff ff eb e2 b8 ed ff ff ff eb db e8 b4 ca 68 fb 0f 1f 40 00 0f 1f 44 00 00 48 89 f8 eb 03 48 89 d0 <48> 8b 90 80 00 00 00 48 85 d2 75 f1 c3 66 0f 1f 44 00 00 0f 1f 44
[   97.807224] RSP: 0018:ffffb8cf08e07bb0 EFLAGS: 00010202
[   97.807224] RAX: 0000000000000000 RBX: ffff9cf0f2586c00 RCX: ffff9cf0f2586c88
[   97.807224] RDX: ffff9cf0ddbd8000 RSI: 0000000000000000 RDI: 0000000000000000
[   97.807224] RBP: ffff9cf1040e9800 R08: 0000000000000000 R09: 0000000000000000
[   97.807224] R10: ffffdeb30fd5d680 R11: ffffdeb30f5d6808 R12: ffff9cf1040e9888
[   97.807224] R13: 0000000000000000 R14: dead000000000200 R15: ffff9cf0f2586cc8
[   97.807224] FS:  00007f4145513180(0000) GS:ffff9cf10ea00000(0000) knlGS:0000000000000000
[   97.807224] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   97.807224] CR2: 0000000000000080 CR3: 00000003d7548000 CR4: 00000000003406f0
[   97.807224] Call Trace:
[   97.807224]  drm_is_current_master+0x1a/0x30 [drm]
[   97.807224]  drm_master_release+0x3e/0x130 [drm]
[   97.807224]  drm_file_free.part.0+0x2be/0x2d0 [drm]
[   97.807224]  drm_open+0x1ba/0x1e0 [drm]
[   97.807224]  drm_stub_open+0xaf/0xe0 [drm]
[   97.807224]  chrdev_open+0xa3/0x1b0
[   97.807224]  ? cdev_put.part.0+0x20/0x20
[   97.807224]  do_dentry_open+0x132/0x340
[   97.807224]  path_openat+0x2d1/0x14e0
[   97.807224]  ? mem_cgroup_commit_charge+0x7a/0x520
[   97.807224]  do_filp_open+0x93/0x100
[   97.807224]  ? __check_object_size+0x102/0x189
[   97.807224]  ? _raw_spin_unlock+0x16/0x30
[   97.807224]  do_sys_open+0x186/0x210
[   97.807224]  do_syscall_64+0x5b/0x170
[   97.807224]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[   97.807224] RIP: 0033:0x7f4147b07976
[   97.807224] Code: 89 54 24 08 e8 7b f4 ff ff 8b 74 24 0c 48 8b 3c 24 41 89 c0 44 8b 54 24 08 b8 01 01 00 00 89 f2 48 89 fe bf 9c ff ff ff 0f 05 <48> 3d 00 f0 ff ff 77 30 44 89 c7 89 44 24 08 e8 a6 f4 ff ff 8b 44
[   97.807224] RSP: 002b:00007ffcced96ca0 EFLAGS: 00000293 ORIG_RAX: 0000000000000101
[   97.807224] RAX: ffffffffffffffda RBX: 00005619d5037f80 RCX: 00007f4147b07976
[   97.807224] RDX: 0000000000000002 RSI: 00005619d46b969c RDI: 00000000ffffff9c
[   98.040039] RBP: 0000000000000024 R08: 0000000000000000 R09: 0000000000000000
[   98.040039] R10: 0000000000000000 R11: 0000000000000293 R12: 0000000000000024
[   98.040039] R13: 0000000000000012 R14: 00005619d5035950 R15: 0000000000000012
[   98.040039] Modules linked in: nct6775 hwmon_vid algif_skcipher af_alg nls_iso8859_1 nls_cp437 vfat fat uvcvideo videobuf2_vmalloc videobuf2_memops videobuf2_v4l2 videobuf2_common arc4 videodev media snd_usb_audio snd_hda_codec_hdmi snd_usbmidi_lib snd_rawmidi snd_seq_device mousedev input_leds iwlmvm mac80211 snd_hda_codec_realtek snd_hda_codec_generic snd_hda_intel snd_hda_codec edac_mce_amd kvm_amd snd_hda_core kvm iwlwifi snd_hwdep r8169 wmi_bmof cfg80211 snd_pcm irqbypass snd_timer snd libphy soundcore pinctrl_amd rfkill pcspkr sp5100_tco evdev gpio_amdpt k10temp mac_hid i2c_piix4 wmi pcc_cpufreq acpi_cpufreq vboxnetflt(OE) vboxnetadp(OE) vboxpci(OE) vboxdrv(OE) msr sg crypto_user ip_tables x_tables ext4 crc32c_generic crc16 mbcache jbd2 fscrypto uas usb_storage dm_crypt hid_generic usbhid hid
[   98.040039]  dm_mod raid1 md_mod sd_mod crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel pcbc ahci libahci aesni_intel aes_x86_64 libata crypto_simd cryptd glue_helper ccp xhci_pci rng_core scsi_mod xhci_hcd nvidia_drm(POE) drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm agpgart nvidia_uvm(POE) nvidia_modeset(POE) nvidia(POE) ipmi_devintf ipmi_msghandler
[   98.040039] CR2: 0000000000000080
[   98.040039] ---[ end trace 3b65093b6fe62b2f ]---
[   98.040039] RIP: 0010:drm_lease_owner+0xd/0x20 [drm]
[   98.040039] Code: 83 c4 18 5b 5d c3 b8 ea ff ff ff eb e2 b8 ed ff ff ff eb db e8 b4 ca 68 fb 0f 1f 40 00 0f 1f 44 00 00 48 89 f8 eb 03 48 89 d0 <48> 8b 90 80 00 00 00 48 85 d2 75 f1 c3 66 0f 1f 44 00 00 0f 1f 44
[   98.040039] RSP: 0018:ffffb8cf08e07bb0 EFLAGS: 00010202
[   98.040039] RAX: 0000000000000000 RBX: ffff9cf0f2586c00 RCX: ffff9cf0f2586c88
[   98.040039] RDX: ffff9cf0ddbd8000 RSI: 0000000000000000 RDI: 0000000000000000
[   98.040039] RBP: ffff9cf1040e9800 R08: 0000000000000000 R09: 0000000000000000
[   98.040039] R10: ffffdeb30fd5d680 R11: ffffdeb30f5d6808 R12: ffff9cf1040e9888
[   98.040039] R13: 0000000000000000 R14: dead000000000200 R15: ffff9cf0f2586cc8
[   98.040039] FS:  00007f4145513180(0000) GS:ffff9cf10ea00000(0000) knlGS:0000000000000000
[   98.040039] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   98.040039] CR2: 0000000000000080 CR3: 00000003d7548000 CR4: 00000000003406f0

Signed-off-by: Sergio Correia <sergio@correia.cc>
Cc: stable@vger.kernel.org
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20181122053329.2692-1-sergio@correia.cc
Signed-off-by: Sean Paul <seanpaul@chromium.org>
---
 drivers/gpu/drm/drm_auth.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/drm_auth.c b/drivers/gpu/drm/drm_auth.c
index d9c0f75739054..1669c42c40ed3 100644
--- a/drivers/gpu/drm/drm_auth.c
+++ b/drivers/gpu/drm/drm_auth.c
@@ -142,6 +142,7 @@ static int drm_new_set_master(struct drm_device *dev, struct drm_file *fpriv)
 
 	lockdep_assert_held_once(&dev->master_mutex);
 
+	WARN_ON(fpriv->is_master);
 	old_master = fpriv->master;
 	fpriv->master = drm_master_create(dev);
 	if (!fpriv->master) {
@@ -170,6 +171,7 @@ static int drm_new_set_master(struct drm_device *dev, struct drm_file *fpriv)
 	/* drop references and restore old master on failure */
 	drm_master_put(&fpriv->master);
 	fpriv->master = old_master;
+	fpriv->is_master = 0;
 
 	return ret;
 }
-- 
GitLab


From 2bcd3ecab773f73211c45bb1430bb52ac641f271 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Thu, 22 Nov 2018 17:01:03 +0100
Subject: [PATCH 1409/1890] drm/meson: Fixes for drm_crtc_vblank_on/off support

Since Linux 4.17, calls to drm_crtc_vblank_on/off are mandatory, and we get
a warning when ctrc is disabled :
" driver forgot to call drm_crtc_vblank_off()"

But, the vsync IRQ was not totally disabled due the transient hardware
state and specific interrupt line, thus adding proper IRQ masking from
the HHI system control registers.

The last change fixes a race condition introduced by calling the added
drm_crtc_vblank_on/off when an HPD event occurs from the HDMI connector,
triggering a WARN_ON() in the _atomic_begin() callback when the CRTC
is disabled, thus also triggering a WARN_ON() in drm_vblank_put() :

WARNING: CPU: 0 PID: 1185 at drivers/gpu/drm/meson/meson_crtc.c:157 meson_crtc_atomic_begin+0x78/0x80
[...]
Call trace:
  meson_crtc_atomic_begin+0x78/0x80
  drm_atomic_helper_commit_planes+0x140/0x218
  drm_atomic_helper_commit_tail+0x38/0x80
  commit_tail+0x7c/0x80
  drm_atomic_helper_commit+0xdc/0x150
  drm_atomic_commit+0x54/0x60
  restore_fbdev_mode_atomic+0x198/0x238
  restore_fbdev_mode+0x6c/0x1c0
  drm_fb_helper_restore_fbdev_mode_unlocked+0x7c/0xf0
  drm_fb_helper_set_par+0x34/0x60
  drm_fb_helper_hotplug_event.part.28+0xb8/0xc8
  drm_fbdev_client_hotplug+0xa4/0xe0
  drm_client_dev_hotplug+0x90/0xe0
  drm_kms_helper_hotplug_event+0x3c/0x48
  drm_helper_hpd_irq_event+0x134/0x168
  dw_hdmi_top_thread_irq+0x3c/0x50
[...]
WARNING: CPU: 0 PID: 1185 at drivers/gpu/drm/drm_vblank.c:1026 drm_vblank_put+0xb4/0xc8
[...]
 Call trace:
  drm_vblank_put+0xb4/0xc8
  drm_crtc_vblank_put+0x24/0x30
  drm_atomic_helper_wait_for_vblanks.part.9+0x130/0x2b8
  drm_atomic_helper_commit_tail+0x68/0x80
[...]

The issue is that vblank need to be enabled in any occurrence of :
- atomic_enable()
- atomic_begin() and state->enable == true, which was not the case

Moving the CRTC enable code to a common function and calling in one of
these occurrence solves this race condition and makes sure vblank is
enabled in each call to _atomic_begin() from the HPD event leading to
drm_atomic_helper_commit_planes().

To Summarize :
- Make sure that the CRTC code will call the drm_crtc_vblank_on()/off()
- *Really* mask the Vsync IRQ
- Initialize and enable vblank at the first
  atomic_begin()/_atomic_enable()

Cc: stable@vger.kernel.org # 4.17+
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Reviewed-by: Lyude Paul <lyude@redhat.com>
[fixed typos+added cc for stable]
Signed-off-by: Lyude Paul <lyude@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181122160103.10993-1-narmstrong@baylibre.com
Signed-off-by: Sean Paul <seanpaul@chromium.org>
---
 drivers/gpu/drm/meson/meson_crtc.c | 27 +++++++++++++++++++++++++--
 drivers/gpu/drm/meson/meson_venc.c |  3 +++
 2 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/meson/meson_crtc.c b/drivers/gpu/drm/meson/meson_crtc.c
index 05520202c9677..191b314f9e9e5 100644
--- a/drivers/gpu/drm/meson/meson_crtc.c
+++ b/drivers/gpu/drm/meson/meson_crtc.c
@@ -45,6 +45,7 @@ struct meson_crtc {
 	struct drm_crtc base;
 	struct drm_pending_vblank_event *event;
 	struct meson_drm *priv;
+	bool enabled;
 };
 #define to_meson_crtc(x) container_of(x, struct meson_crtc, base)
 
@@ -80,8 +81,7 @@ static const struct drm_crtc_funcs meson_crtc_funcs = {
 
 };
 
-static void meson_crtc_atomic_enable(struct drm_crtc *crtc,
-				     struct drm_crtc_state *old_state)
+static void meson_crtc_enable(struct drm_crtc *crtc)
 {
 	struct meson_crtc *meson_crtc = to_meson_crtc(crtc);
 	struct drm_crtc_state *crtc_state = crtc->state;
@@ -101,6 +101,22 @@ static void meson_crtc_atomic_enable(struct drm_crtc *crtc,
 	writel_bits_relaxed(VPP_POSTBLEND_ENABLE, VPP_POSTBLEND_ENABLE,
 			    priv->io_base + _REG(VPP_MISC));
 
+	drm_crtc_vblank_on(crtc);
+
+	meson_crtc->enabled = true;
+}
+
+static void meson_crtc_atomic_enable(struct drm_crtc *crtc,
+				     struct drm_crtc_state *old_state)
+{
+	struct meson_crtc *meson_crtc = to_meson_crtc(crtc);
+	struct meson_drm *priv = meson_crtc->priv;
+
+	DRM_DEBUG_DRIVER("\n");
+
+	if (!meson_crtc->enabled)
+		meson_crtc_enable(crtc);
+
 	priv->viu.osd1_enabled = true;
 }
 
@@ -110,6 +126,8 @@ static void meson_crtc_atomic_disable(struct drm_crtc *crtc,
 	struct meson_crtc *meson_crtc = to_meson_crtc(crtc);
 	struct meson_drm *priv = meson_crtc->priv;
 
+	drm_crtc_vblank_off(crtc);
+
 	priv->viu.osd1_enabled = false;
 	priv->viu.osd1_commit = false;
 
@@ -124,6 +142,8 @@ static void meson_crtc_atomic_disable(struct drm_crtc *crtc,
 
 		crtc->state->event = NULL;
 	}
+
+	meson_crtc->enabled = false;
 }
 
 static void meson_crtc_atomic_begin(struct drm_crtc *crtc,
@@ -132,6 +152,9 @@ static void meson_crtc_atomic_begin(struct drm_crtc *crtc,
 	struct meson_crtc *meson_crtc = to_meson_crtc(crtc);
 	unsigned long flags;
 
+	if (crtc->state->enable && !meson_crtc->enabled)
+		meson_crtc_enable(crtc);
+
 	if (crtc->state->event) {
 		WARN_ON(drm_crtc_vblank_get(crtc) != 0);
 
diff --git a/drivers/gpu/drm/meson/meson_venc.c b/drivers/gpu/drm/meson/meson_venc.c
index acbbad3e322ca..2f9c3814a2906 100644
--- a/drivers/gpu/drm/meson/meson_venc.c
+++ b/drivers/gpu/drm/meson/meson_venc.c
@@ -71,6 +71,7 @@
  */
 
 /* HHI Registers */
+#define HHI_GCLK_MPEG2		0x148 /* 0x52 offset in data sheet */
 #define HHI_VDAC_CNTL0		0x2F4 /* 0xbd offset in data sheet */
 #define HHI_VDAC_CNTL1		0x2F8 /* 0xbe offset in data sheet */
 #define HHI_HDMI_PHY_CNTL0	0x3a0 /* 0xe8 offset in data sheet */
@@ -1530,10 +1531,12 @@ unsigned int meson_venci_get_field(struct meson_drm *priv)
 void meson_venc_enable_vsync(struct meson_drm *priv)
 {
 	writel_relaxed(2, priv->io_base + _REG(VENC_INTCTRL));
+	regmap_update_bits(priv->hhi, HHI_GCLK_MPEG2, BIT(25), BIT(25));
 }
 
 void meson_venc_disable_vsync(struct meson_drm *priv)
 {
+	regmap_update_bits(priv->hhi, HHI_GCLK_MPEG2, BIT(25), 0);
 	writel_relaxed(0, priv->io_base + _REG(VENC_INTCTRL));
 }
 
-- 
GitLab


From 995b278e4723b26f8ebf0e7c119286d16c712747 Mon Sep 17 00:00:00 2001
From: Lyude Paul <lyude@redhat.com>
Date: Sat, 24 Nov 2018 14:12:38 -0500
Subject: [PATCH 1410/1890] drm/meson: Enable fast_io in
 meson_dw_hdmi_regmap_config

Seeing as we use this registermap in the context of our IRQ handlers, we
need to be using spinlocks for reading/writing registers so that we can
still read them from IRQ handlers without having to grab any mutexes and
accidentally sleep. We don't currently do this, as pointed out by
lockdep:

[   18.403770] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:908
[   18.406744] in_atomic(): 1, irqs_disabled(): 128, pid: 68, name: kworker/u17:0
[   18.413864] INFO: lockdep is turned off.
[   18.417675] irq event stamp: 12
[   18.420778] hardirqs last  enabled at (11): [<ffff000008a4f57c>] _raw_spin_unlock_irq+0x2c/0x60
[   18.429510] hardirqs last disabled at (12): [<ffff000008a48914>] __schedule+0xc4/0xa60
[   18.437345] softirqs last  enabled at (0): [<ffff0000080b55e0>] copy_process.isra.4.part.5+0x4d8/0x1c50
[   18.446684] softirqs last disabled at (0): [<0000000000000000>]           (null)
[   18.453979] CPU: 0 PID: 68 Comm: kworker/u17:0 Tainted: G        W  O      4.20.0-rc3Lyude-Test+ #9
[   18.469839] Hardware name: amlogic khadas-vim2/khadas-vim2, BIOS 2018.07-rc2-armbian 09/11/2018
[   18.480037] Workqueue: hci0 hci_power_on [bluetooth]
[   18.487138] Call trace:
[   18.494192]  dump_backtrace+0x0/0x1b8
[   18.501280]  show_stack+0x14/0x20
[   18.508361]  dump_stack+0xbc/0xf4
[   18.515427]  ___might_sleep+0x140/0x1d8
[   18.522515]  __might_sleep+0x50/0x88
[   18.529582]  __mutex_lock+0x60/0x870
[   18.536621]  mutex_lock_nested+0x1c/0x28
[   18.543660]  regmap_lock_mutex+0x10/0x18
[   18.550696]  regmap_read+0x38/0x70
[   18.557727]  dw_hdmi_hardirq+0x58/0x138 [dw_hdmi]
[   18.564804]  __handle_irq_event_percpu+0xac/0x410
[   18.571891]  handle_irq_event_percpu+0x34/0x88
[   18.578982]  handle_irq_event+0x48/0x78
[   18.586051]  handle_fasteoi_irq+0xac/0x160
[   18.593061]  generic_handle_irq+0x24/0x38
[   18.599989]  __handle_domain_irq+0x60/0xb8
[   18.606857]  gic_handle_irq+0x50/0xa0
[   18.613659]  el1_irq+0xb4/0x130
[   18.620394]  debug_lockdep_rcu_enabled+0x2c/0x30
[   18.627111]  schedule+0x38/0xa0
[   18.633781]  schedule_timeout+0x3a8/0x510
[   18.640389]  wait_for_common+0x15c/0x180
[   18.646905]  wait_for_completion+0x14/0x20
[   18.653319]  mmc_wait_for_req_done+0x28/0x168
[   18.659693]  mmc_wait_for_req+0xa8/0xe8
[   18.665978]  mmc_wait_for_cmd+0x64/0x98
[   18.672180]  mmc_io_rw_direct_host+0x94/0x130
[   18.678385]  mmc_io_rw_direct+0x10/0x18
[   18.684516]  sdio_enable_func+0xe8/0x1d0
[   18.690627]  btsdio_open+0x24/0xc0 [btsdio]
[   18.696821]  hci_dev_do_open+0x64/0x598 [bluetooth]
[   18.703025]  hci_power_on+0x50/0x270 [bluetooth]
[   18.709163]  process_one_work+0x2a0/0x6e0
[   18.715252]  worker_thread+0x40/0x448
[   18.721310]  kthread+0x12c/0x130
[   18.727326]  ret_from_fork+0x10/0x1c
[   18.735555] ------------[ cut here ]------------
[   18.741430] do not call blocking ops when !TASK_RUNNING; state=2 set at [<000000006265ec59>] wait_for_common+0x140/0x180
[   18.752417] WARNING: CPU: 0 PID: 68 at kernel/sched/core.c:6096 __might_sleep+0x7c/0x88
[   18.760553] Modules linked in: dm_mirror dm_region_hash dm_log dm_mod
btsdio bluetooth snd_soc_hdmi_codec dw_hdmi_i2s_audio ecdh_generic
brcmfmac brcmutil cfg80211 rfkill ir_nec_decoder meson_dw_hdmi(O)
dw_hdmi rc_geekbox meson_rng meson_ir ao_cec rng_core rc_core cec
leds_pwm efivars nfsd ip_tables x_tables crc32_generic f2fs uas
meson_gxbb_wdt pwm_meson efivarfs ipv6
[   18.799469] CPU: 0 PID: 68 Comm: kworker/u17:0 Tainted: G        W  O      4.20.0-rc3Lyude-Test+ #9
[   18.808858] Hardware name: amlogic khadas-vim2/khadas-vim2, BIOS 2018.07-rc2-armbian 09/11/2018
[   18.818045] Workqueue: hci0 hci_power_on [bluetooth]
[   18.824088] pstate: 80000085 (Nzcv daIf -PAN -UAO)
[   18.829891] pc : __might_sleep+0x7c/0x88
[   18.835722] lr : __might_sleep+0x7c/0x88
[   18.841256] sp : ffff000008003cb0
[   18.846751] x29: ffff000008003cb0 x28: 0000000000000000
[   18.852269] x27: ffff00000938e000 x26: ffff800010283000
[   18.857726] x25: ffff800010353280 x24: ffff00000868ef50
[   18.863166] x23: 0000000000000000 x22: 0000000000000000
[   18.868551] x21: 0000000000000000 x20: 000000000000038c
[   18.873850] x19: ffff000008cd08c0 x18: 0000000000000010
[   18.879081] x17: ffff000008a68cb0 x16: 0000000000000000
[   18.884197] x15: 0000000000aaaaaa x14: 0e200e200e200e20
[   18.889239] x13: 0000000000000001 x12: 00000000ffffffff
[   18.894261] x11: ffff000008adfa48 x10: 0000000000000001
[   18.899517] x9 : ffff0000092a0158 x8 : 0000000000000000
[   18.904674] x7 : ffff00000812136c x6 : 0000000000000000
[   18.909895] x5 : 0000000000000000 x4 : 0000000000000001
[   18.915080] x3 : 0000000000000007 x2 : 0000000000000007
[   18.920269] x1 : 99ab8e9ebb6c8500 x0 : 0000000000000000
[   18.925443] Call trace:
[   18.929904]  __might_sleep+0x7c/0x88
[   18.934311]  __mutex_lock+0x60/0x870
[   18.938687]  mutex_lock_nested+0x1c/0x28
[   18.943076]  regmap_lock_mutex+0x10/0x18
[   18.947453]  regmap_read+0x38/0x70
[   18.951842]  dw_hdmi_hardirq+0x58/0x138 [dw_hdmi]
[   18.956269]  __handle_irq_event_percpu+0xac/0x410
[   18.960712]  handle_irq_event_percpu+0x34/0x88
[   18.965176]  handle_irq_event+0x48/0x78
[   18.969612]  handle_fasteoi_irq+0xac/0x160
[   18.974058]  generic_handle_irq+0x24/0x38
[   18.978501]  __handle_domain_irq+0x60/0xb8
[   18.982938]  gic_handle_irq+0x50/0xa0
[   18.987351]  el1_irq+0xb4/0x130
[   18.991734]  debug_lockdep_rcu_enabled+0x2c/0x30
[   18.996180]  schedule+0x38/0xa0
[   19.000609]  schedule_timeout+0x3a8/0x510
[   19.005064]  wait_for_common+0x15c/0x180
[   19.009513]  wait_for_completion+0x14/0x20
[   19.013951]  mmc_wait_for_req_done+0x28/0x168
[   19.018402]  mmc_wait_for_req+0xa8/0xe8
[   19.022809]  mmc_wait_for_cmd+0x64/0x98
[   19.027177]  mmc_io_rw_direct_host+0x94/0x130
[   19.031563]  mmc_io_rw_direct+0x10/0x18
[   19.035922]  sdio_enable_func+0xe8/0x1d0
[   19.040294]  btsdio_open+0x24/0xc0 [btsdio]
[   19.044742]  hci_dev_do_open+0x64/0x598 [bluetooth]
[   19.049228]  hci_power_on+0x50/0x270 [bluetooth]
[   19.053687]  process_one_work+0x2a0/0x6e0
[   19.058143]  worker_thread+0x40/0x448
[   19.062608]  kthread+0x12c/0x130
[   19.067064]  ret_from_fork+0x10/0x1c
[   19.071513] irq event stamp: 12
[   19.075937] hardirqs last  enabled at (11): [<ffff000008a4f57c>] _raw_spin_unlock_irq+0x2c/0x60
[   19.083560] hardirqs last disabled at (12): [<ffff000008a48914>] __schedule+0xc4/0xa60
[   19.091401] softirqs last  enabled at (0): [<ffff0000080b55e0>] copy_process.isra.4.part.5+0x4d8/0x1c50
[   19.100801] softirqs last disabled at (0): [<0000000000000000>]           (null)
[   19.108135] ---[ end trace 38c4920787b88c75 ]---

So, fix this by enabling the fast_io option in our regmap config so that
regmap uses spinlocks for locking instead of mutexes.

Signed-off-by: Lyude Paul <lyude@redhat.com>
Fixes: 3f68be7d8e96 ("drm/meson: Add support for HDMI encoder and DW-HDMI bridge + PHY")
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Neil Armstrong <narmstrong@baylibre.com>
Cc: Carlo Caione <carlo@caione.org>
Cc: Kevin Hilman <khilman@baylibre.com>
Cc: dri-devel@lists.freedesktop.org
Cc: linux-amlogic@lists.infradead.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: <stable@vger.kernel.org> # v4.12+
Acked-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181124191238.28276-1-lyude@redhat.com
Signed-off-by: Sean Paul <seanpaul@chromium.org>
---
 drivers/gpu/drm/meson/meson_dw_hdmi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/meson/meson_dw_hdmi.c b/drivers/gpu/drm/meson/meson_dw_hdmi.c
index df7247cd93f98..2cb2ad26d7167 100644
--- a/drivers/gpu/drm/meson/meson_dw_hdmi.c
+++ b/drivers/gpu/drm/meson/meson_dw_hdmi.c
@@ -706,6 +706,7 @@ static const struct regmap_config meson_dw_hdmi_regmap_config = {
 	.reg_read = meson_dw_hdmi_reg_read,
 	.reg_write = meson_dw_hdmi_reg_write,
 	.max_register = 0x10000,
+	.fast_io = true,
 };
 
 static bool meson_hdmi_connector_is_available(struct device *dev)
-- 
GitLab


From 97b2a3180a559a33852ac0cd77904166069484fd Mon Sep 17 00:00:00 2001
From: Lyude Paul <lyude@redhat.com>
Date: Sat, 24 Nov 2018 20:21:17 -0500
Subject: [PATCH 1411/1890] drm/meson: Fix OOB memory accesses in
 meson_viu_set_osd_lut()

Currently on driver bringup with KASAN enabled, meson triggers an OOB
memory access as shown below:

[  117.904528] ==================================================================
[  117.904560] BUG: KASAN: global-out-of-bounds in meson_viu_set_osd_lut+0x7a0/0x890
[  117.904588] Read of size 4 at addr ffff20000a63ce24 by task systemd-udevd/498
[  117.904601]
[  118.083372] CPU: 4 PID: 498 Comm: systemd-udevd Not tainted 4.20.0-rc3Lyude-Test+ #20
[  118.091143] Hardware name: amlogic khadas-vim2/khadas-vim2, BIOS 2018.07-rc2-armbian 09/11/2018
[  118.099768] Call trace:
[  118.102181]  dump_backtrace+0x0/0x3e8
[  118.105796]  show_stack+0x14/0x20
[  118.109083]  dump_stack+0x130/0x1c4
[  118.112539]  print_address_description+0x60/0x25c
[  118.117214]  kasan_report+0x1b4/0x368
[  118.120851]  __asan_report_load4_noabort+0x18/0x20
[  118.125566]  meson_viu_set_osd_lut+0x7a0/0x890
[  118.129953]  meson_viu_init+0x10c/0x290
[  118.133741]  meson_drv_bind_master+0x474/0x748
[  118.138141]  meson_drv_bind+0x10/0x18
[  118.141760]  try_to_bring_up_master+0x3d8/0x768
[  118.146249]  component_add+0x214/0x570
[  118.149978]  meson_dw_hdmi_probe+0x18/0x20 [meson_dw_hdmi]
[  118.155404]  platform_drv_probe+0x98/0x138
[  118.159455]  really_probe+0x2a0/0xa70
[  118.163070]  driver_probe_device+0x1b4/0x2d8
[  118.167299]  __driver_attach+0x200/0x280
[  118.171189]  bus_for_each_dev+0x10c/0x1a8
[  118.175144]  driver_attach+0x38/0x50
[  118.178681]  bus_add_driver+0x330/0x608
[  118.182471]  driver_register+0x140/0x388
[  118.186361]  __platform_driver_register+0xc8/0x108
[  118.191117]  meson_dw_hdmi_platform_driver_init+0x1c/0x1000 [meson_dw_hdmi]
[  118.198022]  do_one_initcall+0x12c/0x3bc
[  118.201883]  do_init_module+0x1fc/0x638
[  118.205673]  load_module+0x4b4c/0x6808
[  118.209387]  __se_sys_init_module+0x2e8/0x3c0
[  118.213699]  __arm64_sys_init_module+0x68/0x98
[  118.218100]  el0_svc_common+0x104/0x210
[  118.221893]  el0_svc_handler+0x48/0xb8
[  118.225594]  el0_svc+0x8/0xc
[  118.228429]
[  118.229887] The buggy address belongs to the variable:
[  118.235007]  eotf_33_linear_mapping+0x84/0xc0
[  118.239301]
[  118.240752] Memory state around the buggy address:
[  118.245522]  ffff20000a63cd00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[  118.252695]  ffff20000a63cd80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[  118.259850] >ffff20000a63ce00: 00 00 00 00 04 fa fa fa fa fa fa fa 00 00 00 00
[  118.267000]                                ^
[  118.271222]  ffff20000a63ce80: 00 fa fa fa fa fa fa fa 00 00 00 00 00 00 00 00
[  118.278393]  ffff20000a63cf00: 00 00 00 00 00 00 00 00 00 00 00 00 04 fa fa fa
[  118.285542] ==================================================================
[  118.292699] Disabling lock debugging due to kernel taint

It seems that when looping through the OSD EOTF LUT maps, we use the
same max iterator for OETF: 20. This is wrong though, since 20*2 is 40,
which means that we'll stop out of bounds on the EOTF maps.

But, this whole thing is already confusing enough to read through as-is,
so let's just replace all of the hardcoded sizes with
OSD_(OETF/EOTF)_LUT_SIZE / 2.

Signed-off-by: Lyude Paul <lyude@redhat.com>
Fixes: bbbe775ec5b5 ("drm: Add support for Amlogic Meson Graphic Controller")
Cc: Neil Armstrong <narmstrong@baylibre.com>
Cc: Maxime Ripard <maxime.ripard@bootlin.com>
Cc: Carlo Caione <carlo@caione.org>
Cc: Kevin Hilman <khilman@baylibre.com>
Cc: dri-devel@lists.freedesktop.org
Cc: linux-amlogic@lists.infradead.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: <stable@vger.kernel.org> # v4.10+
Acked-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181125012117.31915-1-lyude@redhat.com
Signed-off-by: Sean Paul <seanpaul@chromium.org>
---
 drivers/gpu/drm/meson/meson_viu.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/meson/meson_viu.c b/drivers/gpu/drm/meson/meson_viu.c
index 6bcfa527c1801..26a0857878bfd 100644
--- a/drivers/gpu/drm/meson/meson_viu.c
+++ b/drivers/gpu/drm/meson/meson_viu.c
@@ -184,18 +184,18 @@ void meson_viu_set_osd_lut(struct meson_drm *priv, enum viu_lut_sel_e lut_sel,
 	if (lut_sel == VIU_LUT_OSD_OETF) {
 		writel(0, priv->io_base + _REG(addr_port));
 
-		for (i = 0; i < 20; i++)
+		for (i = 0; i < (OSD_OETF_LUT_SIZE / 2); i++)
 			writel(r_map[i * 2] | (r_map[i * 2 + 1] << 16),
 				priv->io_base + _REG(data_port));
 
 		writel(r_map[OSD_OETF_LUT_SIZE - 1] | (g_map[0] << 16),
 			priv->io_base + _REG(data_port));
 
-		for (i = 0; i < 20; i++)
+		for (i = 0; i < (OSD_OETF_LUT_SIZE / 2); i++)
 			writel(g_map[i * 2 + 1] | (g_map[i * 2 + 2] << 16),
 				priv->io_base + _REG(data_port));
 
-		for (i = 0; i < 20; i++)
+		for (i = 0; i < (OSD_OETF_LUT_SIZE / 2); i++)
 			writel(b_map[i * 2] | (b_map[i * 2 + 1] << 16),
 				priv->io_base + _REG(data_port));
 
@@ -211,18 +211,18 @@ void meson_viu_set_osd_lut(struct meson_drm *priv, enum viu_lut_sel_e lut_sel,
 	} else if (lut_sel == VIU_LUT_OSD_EOTF) {
 		writel(0, priv->io_base + _REG(addr_port));
 
-		for (i = 0; i < 20; i++)
+		for (i = 0; i < (OSD_EOTF_LUT_SIZE / 2); i++)
 			writel(r_map[i * 2] | (r_map[i * 2 + 1] << 16),
 				priv->io_base + _REG(data_port));
 
 		writel(r_map[OSD_EOTF_LUT_SIZE - 1] | (g_map[0] << 16),
 			priv->io_base + _REG(data_port));
 
-		for (i = 0; i < 20; i++)
+		for (i = 0; i < (OSD_EOTF_LUT_SIZE / 2); i++)
 			writel(g_map[i * 2 + 1] | (g_map[i * 2 + 2] << 16),
 				priv->io_base + _REG(data_port));
 
-		for (i = 0; i < 20; i++)
+		for (i = 0; i < (OSD_EOTF_LUT_SIZE / 2); i++)
 			writel(b_map[i * 2] | (b_map[i * 2 + 1] << 16),
 				priv->io_base + _REG(data_port));
 
-- 
GitLab


From 31e1ab494559fb46de304cc6c2aed1528f94b298 Mon Sep 17 00:00:00 2001
From: Christian Hewitt <christianshewitt@gmail.com>
Date: Wed, 21 Nov 2018 13:39:29 +0400
Subject: [PATCH 1412/1890] drm/meson: add support for 1080p25 mode

This essential mode for PAL users is missing, so add it.

Fixes: 335e3713afb87 ("drm/meson: Add support for HDMI venc modes and settings")
Signed-off-by: Christian Hewitt <christianshewitt@gmail.com>
Acked-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1542793169-13008-1-git-send-email-christianshewitt@gmail.com
Signed-off-by: Sean Paul <seanpaul@chromium.org>
---
 drivers/gpu/drm/meson/meson_venc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/meson/meson_venc.c b/drivers/gpu/drm/meson/meson_venc.c
index 2f9c3814a2906..be76f3d64bf2e 100644
--- a/drivers/gpu/drm/meson/meson_venc.c
+++ b/drivers/gpu/drm/meson/meson_venc.c
@@ -715,6 +715,7 @@ struct meson_hdmi_venc_vic_mode {
 	{ 5, &meson_hdmi_encp_mode_1080i60 },
 	{ 20, &meson_hdmi_encp_mode_1080i50 },
 	{ 32, &meson_hdmi_encp_mode_1080p24 },
+	{ 33, &meson_hdmi_encp_mode_1080p50 },
 	{ 34, &meson_hdmi_encp_mode_1080p30 },
 	{ 31, &meson_hdmi_encp_mode_1080p50 },
 	{ 16, &meson_hdmi_encp_mode_1080p60 },
-- 
GitLab


From 8114865ff82e200b383e46821c25cb0625b842b5 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Sun, 18 Nov 2018 17:10:15 -0500
Subject: [PATCH 1413/1890] function_graph: Create function_graph_enter() to
 consolidate architecture code

Currently all the architectures do basically the same thing in preparing the
function graph tracer on entry to a function. This code can be pulled into a
generic location and then this will allow the function graph tracer to be
fixed, as well as extended.

Create a new function graph helper function_graph_enter() that will call the
hook function (ftrace_graph_entry) and the shadow stack operation
(ftrace_push_return_trace), and remove the need of the architecture code to
manage the shadow stack.

This is needed to prepare for a fix of a design bug on how the curr_ret_stack
is used.

Cc: stable@kernel.org
Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback")
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/ftrace.h               |  3 +++
 kernel/trace/trace_functions_graph.c | 16 ++++++++++++++++
 2 files changed, 19 insertions(+)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index a397907e8d727..5717e8f81c595 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -779,6 +779,9 @@ extern void return_to_handler(void);
 extern int
 ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
 			 unsigned long frame_pointer, unsigned long *retp);
+extern int
+function_graph_enter(unsigned long ret, unsigned long func,
+		     unsigned long frame_pointer, unsigned long *retp);
 
 unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
 				    unsigned long ret, unsigned long *retp);
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 169b3c44ee97f..28f2602435d05 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -182,6 +182,22 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
 	return 0;
 }
 
+int function_graph_enter(unsigned long ret, unsigned long func,
+			 unsigned long frame_pointer, unsigned long *retp)
+{
+	struct ftrace_graph_ent trace;
+
+	trace.func = func;
+	trace.depth = current->curr_ret_stack + 1;
+
+	/* Only trace if the calling function expects to */
+	if (!ftrace_graph_entry(&trace))
+		return -EBUSY;
+
+	return ftrace_push_return_trace(ret, func, &trace.depth,
+					frame_pointer, retp);
+}
+
 /* Retrieve a function return address to the trace stack on thread info.*/
 static void
 ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
-- 
GitLab


From de7243057e7cefa923fa5f467c0f1ec24eef41d2 Mon Sep 17 00:00:00 2001
From: Ye Yin <dbyin@tencent.com>
Date: Fri, 23 Nov 2018 09:55:18 -0800
Subject: [PATCH 1414/1890] fs/xfs: fix f_ffree value for statfs when project
 quota is set

When project is set, we should use inode limit minus the used count

Signed-off-by: Ye Yin <dbyin@tencent.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_qm_bhv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c
index 73a1d77ec187c..3091e4bc04efe 100644
--- a/fs/xfs/xfs_qm_bhv.c
+++ b/fs/xfs/xfs_qm_bhv.c
@@ -40,7 +40,7 @@ xfs_fill_statvfs_from_dquot(
 		statp->f_files = limit;
 		statp->f_ffree =
 			(statp->f_files > dqp->q_res_icount) ?
-			 (statp->f_ffree - dqp->q_res_icount) : 0;
+			 (statp->f_files - dqp->q_res_icount) : 0;
 	}
 }
 
-- 
GitLab


From 508b09046c0f21678652fb66fd1e9959d55591d2 Mon Sep 17 00:00:00 2001
From: Alin Nastac <alin.nastac@gmail.com>
Date: Wed, 21 Nov 2018 14:00:30 +0100
Subject: [PATCH 1415/1890] netfilter: ipv6: Preserve link scope traffic
 original oif

When ip6_route_me_harder is invoked, it resets outgoing interface of:
  - link-local scoped packets sent by neighbor discovery
  - multicast packets sent by MLD host
  - multicast packets send by MLD proxy daemon that sets outgoing
    interface through IPV6_PKTINFO ipi6_ifindex

Link-local and multicast packets must keep their original oif after
ip6_route_me_harder is called.

Signed-off-by: Alin Nastac <alin.nastac@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv6/netfilter.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 5ae8e1c51079c..8b075f0bc3516 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -24,7 +24,8 @@ int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
 	unsigned int hh_len;
 	struct dst_entry *dst;
 	struct flowi6 fl6 = {
-		.flowi6_oif = sk ? sk->sk_bound_dev_if : 0,
+		.flowi6_oif = sk && sk->sk_bound_dev_if ? sk->sk_bound_dev_if :
+			rt6_need_strict(&iph->daddr) ? skb_dst(skb)->dev->ifindex : 0,
 		.flowi6_mark = skb->mark,
 		.flowi6_uid = sock_net_uid(net, sk),
 		.daddr = iph->daddr,
-- 
GitLab


From 4d5422a309deecec906c491f8aea77593a46321d Mon Sep 17 00:00:00 2001
From: Artemy Kovalyov <artemyko@mellanox.com>
Date: Sun, 25 Nov 2018 20:34:23 +0200
Subject: [PATCH 1416/1890] IB/mlx5: Skip non-ODP MR when handling a page fault

It is possible that we call pagefault_single_data_segment() with a MKey
that belongs to a memory region which is not on demand (i.e. pinned
pages). This can happen if, for instance, a WQE that points to multiple
MRs where some of them are ODP MRs and some are not.  In this case we
don't need to handle this MR in the ODP context besides reporting success.

Otherwise the code will call pagefault_mr() which will do to_ib_umem_odp()
on a non-ODP MR and thus access out of bounds.

Fixes: 7bdf65d411c1 ("IB/mlx5: Handle page faults")
Signed-off-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Moni Shoua <monis@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/mlx5/odp.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index b04eb67753261..2a0743808bd83 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -674,6 +674,14 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
 			goto srcu_unlock;
 		}
 
+		if (!mr->umem->is_odp) {
+			mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
+				    key);
+			if (bytes_mapped)
+				*bytes_mapped += bcnt;
+			goto srcu_unlock;
+		}
+
 		ret = pagefault_mr(dev, mr, io_virt, bcnt, bytes_mapped);
 		if (ret < 0)
 			goto srcu_unlock;
-- 
GitLab


From 605728e65ad303a1b639bcae7c0abd2e24e6a930 Mon Sep 17 00:00:00 2001
From: Artemy Kovalyov <artemyko@mellanox.com>
Date: Sun, 25 Nov 2018 20:34:25 +0200
Subject: [PATCH 1417/1890] IB/umem: Set correct address to the invalidation
 function

The invalidate range was using PAGE_SIZE instead of the computed 'end',
and had the wrong transformation of page_index due the weird
construction. This can trigger during error unwind and would cause
malfunction.

Inline the code and correct the math.

Fixes: 403cd12e2cf7 ("IB/umem: Add contiguous ODP support")
Signed-off-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Moni Shoua <monis@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/umem_odp.c | 20 ++++++--------------
 1 file changed, 6 insertions(+), 14 deletions(-)

diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 2b4c5e7dd5a17..676c1fd1119d8 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -137,15 +137,6 @@ static void ib_umem_notifier_release(struct mmu_notifier *mn,
 	up_read(&per_mm->umem_rwsem);
 }
 
-static int invalidate_page_trampoline(struct ib_umem_odp *item, u64 start,
-				      u64 end, void *cookie)
-{
-	ib_umem_notifier_start_account(item);
-	item->umem.context->invalidate_range(item, start, start + PAGE_SIZE);
-	ib_umem_notifier_end_account(item);
-	return 0;
-}
-
 static int invalidate_range_start_trampoline(struct ib_umem_odp *item,
 					     u64 start, u64 end, void *cookie)
 {
@@ -553,12 +544,13 @@ static int ib_umem_odp_map_dma_single_page(
 		put_page(page);
 
 	if (remove_existing_mapping && umem->context->invalidate_range) {
-		invalidate_page_trampoline(
+		ib_umem_notifier_start_account(umem_odp);
+		umem->context->invalidate_range(
 			umem_odp,
-			ib_umem_start(umem) + (page_index >> umem->page_shift),
-			ib_umem_start(umem) + ((page_index + 1) >>
-					       umem->page_shift),
-			NULL);
+			ib_umem_start(umem) + (page_index << umem->page_shift),
+			ib_umem_start(umem) +
+				((page_index + 1) << umem->page_shift));
+		ib_umem_notifier_end_account(umem_odp);
 		ret = -EAGAIN;
 	}
 
-- 
GitLab


From 75b7b86bdb0df37e08e44b6c1f99010967f81944 Mon Sep 17 00:00:00 2001
From: Artemy Kovalyov <artemyko@mellanox.com>
Date: Sun, 25 Nov 2018 20:34:26 +0200
Subject: [PATCH 1418/1890] IB/mlx5: Fix page fault handling for MW

Memory windows are implemented with an indirect MKey, when a page fault
event comes for a MW Mkey we need to find the MR at the end of the list of
the indirect MKeys by iterating on all items from the first to the last.

The offset calculated during this process has to be zeroed after the first
iteration or the next iteration will start from a wrong address, resulting
incorrect ODP faulting behavior.

Fixes: db570d7deafb ("IB/mlx5: Add ODP support to MW")
Signed-off-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Moni Shoua <monis@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/mlx5/odp.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 2a0743808bd83..b711a0f3aa353 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -743,6 +743,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
 			head = frame;
 
 			bcnt -= frame->bcnt;
+			offset = 0;
 		}
 		break;
 
-- 
GitLab


From 584eab291c67894cb17cc87544b9d086228ea70f Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Thu, 22 Nov 2018 19:59:46 +0900
Subject: [PATCH 1419/1890] netfilter: add missing error handling code for
 register functions

register_{netdevice/inetaddr/inet6addr}_notifier may return an error
value, this patch adds the code to handle these error paths.

Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 .../net/netfilter/ipv4/nf_nat_masquerade.h    |  2 +-
 .../net/netfilter/ipv6/nf_nat_masquerade.h    |  2 +-
 net/ipv4/netfilter/ipt_MASQUERADE.c           |  7 ++--
 net/ipv4/netfilter/nf_nat_masquerade_ipv4.c   | 21 +++++++++---
 net/ipv4/netfilter/nft_masq_ipv4.c            |  4 ++-
 net/ipv6/netfilter/ip6t_MASQUERADE.c          |  8 +++--
 net/ipv6/netfilter/nf_nat_masquerade_ipv6.c   | 32 +++++++++++++------
 net/ipv6/netfilter/nft_masq_ipv6.c            |  4 ++-
 net/netfilter/nft_flow_offload.c              |  5 ++-
 9 files changed, 63 insertions(+), 22 deletions(-)

diff --git a/include/net/netfilter/ipv4/nf_nat_masquerade.h b/include/net/netfilter/ipv4/nf_nat_masquerade.h
index cd24be4c4a99b..13d55206bb9fc 100644
--- a/include/net/netfilter/ipv4/nf_nat_masquerade.h
+++ b/include/net/netfilter/ipv4/nf_nat_masquerade.h
@@ -9,7 +9,7 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
 		       const struct nf_nat_range2 *range,
 		       const struct net_device *out);
 
-void nf_nat_masquerade_ipv4_register_notifier(void);
+int nf_nat_masquerade_ipv4_register_notifier(void);
 void nf_nat_masquerade_ipv4_unregister_notifier(void);
 
 #endif /*_NF_NAT_MASQUERADE_IPV4_H_ */
diff --git a/include/net/netfilter/ipv6/nf_nat_masquerade.h b/include/net/netfilter/ipv6/nf_nat_masquerade.h
index 0c3b5ebf0bb8d..2917bf95c4370 100644
--- a/include/net/netfilter/ipv6/nf_nat_masquerade.h
+++ b/include/net/netfilter/ipv6/nf_nat_masquerade.h
@@ -5,7 +5,7 @@
 unsigned int
 nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
 		       const struct net_device *out);
-void nf_nat_masquerade_ipv6_register_notifier(void);
+int nf_nat_masquerade_ipv6_register_notifier(void);
 void nf_nat_masquerade_ipv6_unregister_notifier(void);
 
 #endif /* _NF_NAT_MASQUERADE_IPV6_H_ */
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index ce1512b02cb20..fd3f9e8a74daf 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -81,9 +81,12 @@ static int __init masquerade_tg_init(void)
 	int ret;
 
 	ret = xt_register_target(&masquerade_tg_reg);
+	if (ret)
+		return ret;
 
-	if (ret == 0)
-		nf_nat_masquerade_ipv4_register_notifier();
+	ret = nf_nat_masquerade_ipv4_register_notifier();
+	if (ret)
+		xt_unregister_target(&masquerade_tg_reg);
 
 	return ret;
 }
diff --git a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
index a9d5e013e5556..c7d7fa4fc369b 100644
--- a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
@@ -149,16 +149,29 @@ static struct notifier_block masq_inet_notifier = {
 
 static atomic_t masquerade_notifier_refcount = ATOMIC_INIT(0);
 
-void nf_nat_masquerade_ipv4_register_notifier(void)
+int nf_nat_masquerade_ipv4_register_notifier(void)
 {
+	int ret;
+
 	/* check if the notifier was already set */
 	if (atomic_inc_return(&masquerade_notifier_refcount) > 1)
-		return;
+		return 0;
 
 	/* Register for device down reports */
-	register_netdevice_notifier(&masq_dev_notifier);
+	ret = register_netdevice_notifier(&masq_dev_notifier);
+	if (ret)
+		goto err_dec;
 	/* Register IP address change reports */
-	register_inetaddr_notifier(&masq_inet_notifier);
+	ret = register_inetaddr_notifier(&masq_inet_notifier);
+	if (ret)
+		goto err_unregister;
+
+	return ret;
+err_unregister:
+	unregister_netdevice_notifier(&masq_dev_notifier);
+err_dec:
+	atomic_dec(&masquerade_notifier_refcount);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_register_notifier);
 
diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c
index f1193e1e928aa..6847de1d1db8a 100644
--- a/net/ipv4/netfilter/nft_masq_ipv4.c
+++ b/net/ipv4/netfilter/nft_masq_ipv4.c
@@ -69,7 +69,9 @@ static int __init nft_masq_ipv4_module_init(void)
 	if (ret < 0)
 		return ret;
 
-	nf_nat_masquerade_ipv4_register_notifier();
+	ret = nf_nat_masquerade_ipv4_register_notifier();
+	if (ret)
+		nft_unregister_expr(&nft_masq_ipv4_type);
 
 	return ret;
 }
diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c
index 491f808e356a6..29c7f1915a96c 100644
--- a/net/ipv6/netfilter/ip6t_MASQUERADE.c
+++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c
@@ -58,8 +58,12 @@ static int __init masquerade_tg6_init(void)
 	int err;
 
 	err = xt_register_target(&masquerade_tg6_reg);
-	if (err == 0)
-		nf_nat_masquerade_ipv6_register_notifier();
+	if (err)
+		return err;
+
+	err = nf_nat_masquerade_ipv6_register_notifier();
+	if (err)
+		xt_unregister_target(&masquerade_tg6_reg);
 
 	return err;
 }
diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
index 3e4bf2286abea..7afd1e63d2dba 100644
--- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
@@ -132,8 +132,8 @@ static void iterate_cleanup_work(struct work_struct *work)
  * of ipv6 addresses being deleted), we also need to add an upper
  * limit to the number of queued work items.
  */
-static int masq_inet_event(struct notifier_block *this,
-			   unsigned long event, void *ptr)
+static int masq_inet6_event(struct notifier_block *this,
+			    unsigned long event, void *ptr)
 {
 	struct inet6_ifaddr *ifa = ptr;
 	const struct net_device *dev;
@@ -171,20 +171,34 @@ static int masq_inet_event(struct notifier_block *this,
 	return NOTIFY_DONE;
 }
 
-static struct notifier_block masq_inet_notifier = {
-	.notifier_call	= masq_inet_event,
+static struct notifier_block masq_inet6_notifier = {
+	.notifier_call	= masq_inet6_event,
 };
 
 static atomic_t masquerade_notifier_refcount = ATOMIC_INIT(0);
 
-void nf_nat_masquerade_ipv6_register_notifier(void)
+int nf_nat_masquerade_ipv6_register_notifier(void)
 {
+	int ret;
+
 	/* check if the notifier is already set */
 	if (atomic_inc_return(&masquerade_notifier_refcount) > 1)
-		return;
+		return 0;
 
-	register_netdevice_notifier(&masq_dev_notifier);
-	register_inet6addr_notifier(&masq_inet_notifier);
+	ret = register_netdevice_notifier(&masq_dev_notifier);
+	if (ret)
+		goto err_dec;
+
+	ret = register_inet6addr_notifier(&masq_inet6_notifier);
+	if (ret)
+		goto err_unregister;
+
+	return ret;
+err_unregister:
+	unregister_netdevice_notifier(&masq_dev_notifier);
+err_dec:
+	atomic_dec(&masquerade_notifier_refcount);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_register_notifier);
 
@@ -194,7 +208,7 @@ void nf_nat_masquerade_ipv6_unregister_notifier(void)
 	if (atomic_dec_return(&masquerade_notifier_refcount) > 0)
 		return;
 
-	unregister_inet6addr_notifier(&masq_inet_notifier);
+	unregister_inet6addr_notifier(&masq_inet6_notifier);
 	unregister_netdevice_notifier(&masq_dev_notifier);
 }
 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_unregister_notifier);
diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c
index dd0122f3cffea..e06c82e9dfcdf 100644
--- a/net/ipv6/netfilter/nft_masq_ipv6.c
+++ b/net/ipv6/netfilter/nft_masq_ipv6.c
@@ -70,7 +70,9 @@ static int __init nft_masq_ipv6_module_init(void)
 	if (ret < 0)
 		return ret;
 
-	nf_nat_masquerade_ipv6_register_notifier();
+	ret = nf_nat_masquerade_ipv6_register_notifier();
+	if (ret)
+		nft_unregister_expr(&nft_masq_ipv6_type);
 
 	return ret;
 }
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index e82d9a966c45a..974525eb92df7 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -214,7 +214,9 @@ static int __init nft_flow_offload_module_init(void)
 {
 	int err;
 
-	register_netdevice_notifier(&flow_offload_netdev_notifier);
+	err = register_netdevice_notifier(&flow_offload_netdev_notifier);
+	if (err)
+		goto err;
 
 	err = nft_register_expr(&nft_flow_offload_type);
 	if (err < 0)
@@ -224,6 +226,7 @@ static int __init nft_flow_offload_module_init(void)
 
 register_expr:
 	unregister_netdevice_notifier(&flow_offload_netdev_notifier);
+err:
 	return err;
 }
 
-- 
GitLab


From 095faf45e64be00bff4da2d6182dface3d69c9b7 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Thu, 22 Nov 2018 19:59:57 +0900
Subject: [PATCH 1420/1890] netfilter: nat: fix double register in masquerade
 modules

There is a reference counter to ensure that masquerade modules register
notifiers only once. However, the existing reference counter approach is
not safe, test commands are:

   while :
   do
   	   modprobe ip6t_MASQUERADE &
	   modprobe nft_masq_ipv6 &
	   modprobe -rv ip6t_MASQUERADE &
	   modprobe -rv nft_masq_ipv6 &
   done

numbers below represent the reference counter.
--------------------------------------------------------
CPU0        CPU1        CPU2        CPU3        CPU4
[insmod]    [insmod]    [rmmod]     [rmmod]     [insmod]
--------------------------------------------------------
0->1
register    1->2
            returns     2->1
			returns     1->0
                                                0->1
                                                register <--
                                    unregister
--------------------------------------------------------

The unregistation of CPU3 should be processed before the
registration of CPU4.

In order to fix this, use a mutex instead of reference counter.

splat looks like:
[  323.869557] watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [modprobe:1381]
[  323.869574] Modules linked in: nf_tables(+) nf_nat_ipv6(-) nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 n]
[  323.869574] irq event stamp: 194074
[  323.898930] hardirqs last  enabled at (194073): [<ffffffff90004a0d>] trace_hardirqs_on_thunk+0x1a/0x1c
[  323.898930] hardirqs last disabled at (194074): [<ffffffff90004a29>] trace_hardirqs_off_thunk+0x1a/0x1c
[  323.898930] softirqs last  enabled at (182132): [<ffffffff922006ec>] __do_softirq+0x6ec/0xa3b
[  323.898930] softirqs last disabled at (182109): [<ffffffff90193426>] irq_exit+0x1a6/0x1e0
[  323.898930] CPU: 0 PID: 1381 Comm: modprobe Not tainted 4.20.0-rc2+ #27
[  323.898930] RIP: 0010:raw_notifier_chain_register+0xea/0x240
[  323.898930] Code: 3c 03 0f 8e f2 00 00 00 44 3b 6b 10 7f 4d 49 bc 00 00 00 00 00 fc ff df eb 22 48 8d 7b 10 488
[  323.898930] RSP: 0018:ffff888101597218 EFLAGS: 00000206 ORIG_RAX: ffffffffffffff13
[  323.898930] RAX: 0000000000000000 RBX: ffffffffc04361c0 RCX: 0000000000000000
[  323.898930] RDX: 1ffffffff26132ae RSI: ffffffffc04aa3c0 RDI: ffffffffc04361d0
[  323.898930] RBP: ffffffffc04361c8 R08: 0000000000000000 R09: 0000000000000001
[  323.898930] R10: ffff8881015972b0 R11: fffffbfff26132c4 R12: dffffc0000000000
[  323.898930] R13: 0000000000000000 R14: 1ffff110202b2e44 R15: ffffffffc04aa3c0
[  323.898930] FS:  00007f813ed41540(0000) GS:ffff88811ae00000(0000) knlGS:0000000000000000
[  323.898930] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  323.898930] CR2: 0000559bf2c9f120 CR3: 000000010bc80000 CR4: 00000000001006f0
[  323.898930] Call Trace:
[  323.898930]  ? atomic_notifier_chain_register+0x2d0/0x2d0
[  323.898930]  ? down_read+0x150/0x150
[  323.898930]  ? sched_clock_cpu+0x126/0x170
[  323.898930]  ? nf_tables_core_module_init+0xe4/0xe4 [nf_tables]
[  323.898930]  ? nf_tables_core_module_init+0xe4/0xe4 [nf_tables]
[  323.898930]  register_netdevice_notifier+0xbb/0x790
[  323.898930]  ? __dev_close_many+0x2d0/0x2d0
[  323.898930]  ? __mutex_unlock_slowpath+0x17f/0x740
[  323.898930]  ? wait_for_completion+0x710/0x710
[  323.898930]  ? nf_tables_core_module_init+0xe4/0xe4 [nf_tables]
[  323.898930]  ? up_write+0x6c/0x210
[  323.898930]  ? nf_tables_core_module_init+0xe4/0xe4 [nf_tables]
[  324.127073]  ? nf_tables_core_module_init+0xe4/0xe4 [nf_tables]
[  324.127073]  nft_chain_filter_init+0x1e/0xe8a [nf_tables]
[  324.127073]  nf_tables_module_init+0x37/0x92 [nf_tables]
[ ... ]

Fixes: 8dd33cc93ec9 ("netfilter: nf_nat: generalize IPv4 masquerading support for nf_tables")
Fixes: be6b635cd674 ("netfilter: nf_nat: generalize IPv6 masquerading support for nf_tables")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/nf_nat_masquerade_ipv4.c | 23 ++++++++++++++-------
 net/ipv6/netfilter/nf_nat_masquerade_ipv6.c | 23 ++++++++++++++-------
 2 files changed, 32 insertions(+), 14 deletions(-)

diff --git a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
index c7d7fa4fc369b..41327bb990932 100644
--- a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
@@ -147,15 +147,17 @@ static struct notifier_block masq_inet_notifier = {
 	.notifier_call	= masq_inet_event,
 };
 
-static atomic_t masquerade_notifier_refcount = ATOMIC_INIT(0);
+static int masq_refcnt;
+static DEFINE_MUTEX(masq_mutex);
 
 int nf_nat_masquerade_ipv4_register_notifier(void)
 {
-	int ret;
+	int ret = 0;
 
+	mutex_lock(&masq_mutex);
 	/* check if the notifier was already set */
-	if (atomic_inc_return(&masquerade_notifier_refcount) > 1)
-		return 0;
+	if (++masq_refcnt > 1)
+		goto out_unlock;
 
 	/* Register for device down reports */
 	ret = register_netdevice_notifier(&masq_dev_notifier);
@@ -166,22 +168,29 @@ int nf_nat_masquerade_ipv4_register_notifier(void)
 	if (ret)
 		goto err_unregister;
 
+	mutex_unlock(&masq_mutex);
 	return ret;
+
 err_unregister:
 	unregister_netdevice_notifier(&masq_dev_notifier);
 err_dec:
-	atomic_dec(&masquerade_notifier_refcount);
+	masq_refcnt--;
+out_unlock:
+	mutex_unlock(&masq_mutex);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_register_notifier);
 
 void nf_nat_masquerade_ipv4_unregister_notifier(void)
 {
+	mutex_lock(&masq_mutex);
 	/* check if the notifier still has clients */
-	if (atomic_dec_return(&masquerade_notifier_refcount) > 0)
-		return;
+	if (--masq_refcnt > 0)
+		goto out_unlock;
 
 	unregister_netdevice_notifier(&masq_dev_notifier);
 	unregister_inetaddr_notifier(&masq_inet_notifier);
+out_unlock:
+	mutex_unlock(&masq_mutex);
 }
 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_unregister_notifier);
diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
index 7afd1e63d2dba..0ad0da5a26002 100644
--- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
@@ -175,15 +175,17 @@ static struct notifier_block masq_inet6_notifier = {
 	.notifier_call	= masq_inet6_event,
 };
 
-static atomic_t masquerade_notifier_refcount = ATOMIC_INIT(0);
+static int masq_refcnt;
+static DEFINE_MUTEX(masq_mutex);
 
 int nf_nat_masquerade_ipv6_register_notifier(void)
 {
-	int ret;
+	int ret = 0;
 
+	mutex_lock(&masq_mutex);
 	/* check if the notifier is already set */
-	if (atomic_inc_return(&masquerade_notifier_refcount) > 1)
-		return 0;
+	if (++masq_refcnt > 1)
+		goto out_unlock;
 
 	ret = register_netdevice_notifier(&masq_dev_notifier);
 	if (ret)
@@ -193,22 +195,29 @@ int nf_nat_masquerade_ipv6_register_notifier(void)
 	if (ret)
 		goto err_unregister;
 
+	mutex_unlock(&masq_mutex);
 	return ret;
+
 err_unregister:
 	unregister_netdevice_notifier(&masq_dev_notifier);
 err_dec:
-	atomic_dec(&masquerade_notifier_refcount);
+	masq_refcnt--;
+out_unlock:
+	mutex_unlock(&masq_mutex);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_register_notifier);
 
 void nf_nat_masquerade_ipv6_unregister_notifier(void)
 {
+	mutex_lock(&masq_mutex);
 	/* check if the notifier still has clients */
-	if (atomic_dec_return(&masquerade_notifier_refcount) > 0)
-		return;
+	if (--masq_refcnt > 0)
+		goto out_unlock;
 
 	unregister_inet6addr_notifier(&masq_inet6_notifier);
 	unregister_netdevice_notifier(&masq_dev_notifier);
+out_unlock:
+	mutex_unlock(&masq_mutex);
 }
 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_unregister_notifier);
-- 
GitLab


From 53ca0f2fec39c80ccd19e6e3f30cc8daef174b70 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Sun, 25 Nov 2018 18:47:13 +0900
Subject: [PATCH 1421/1890] netfilter: nf_conncount: remove wrong condition
 check routine

All lists that reach the tree_nodes_free() function have both zero
counter and true dead flag. The reason for this is that lists to be
release are selected by nf_conncount_gc_list() which already decrements
the list counter and sets on the dead flag. Therefore, this if statement
in tree_nodes_free() is unnecessary and wrong.

Fixes: 31568ec09ea0 ("netfilter: nf_conncount: fix list_del corruption in conn_free")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conncount.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index 8acae4a3e4c00..b6d0f6deea86c 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -323,11 +323,8 @@ static void tree_nodes_free(struct rb_root *root,
 	while (gc_count) {
 		rbconn = gc_nodes[--gc_count];
 		spin_lock(&rbconn->list.list_lock);
-		if (rbconn->list.count == 0 && rbconn->list.dead == false) {
-			rbconn->list.dead = true;
-			rb_erase(&rbconn->node, root);
-			call_rcu(&rbconn->rcu_head, __tree_nodes_free);
-		}
+		rb_erase(&rbconn->node, root);
+		call_rcu(&rbconn->rcu_head, __tree_nodes_free);
 		spin_unlock(&rbconn->list.list_lock);
 	}
 }
-- 
GitLab


From e2c95a61656d29ceaac97b6a975c8a1f26e26f15 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Mon, 26 Nov 2018 14:05:38 +0100
Subject: [PATCH 1422/1890] bpf, ppc64: generalize fetching subprog into
 bpf_jit_get_func_addr

Make fetching of the BPF call address from ppc64 JIT generic. ppc64
was using a slightly different variant rather than through the insns'
imm field encoding as the target address would not fit into that space.
Therefore, the target subprog number was encoded into the insns' offset
and fetched through fp->aux->func[off]->bpf_func instead. Given there
are other JITs with this issue and the mechanism of fetching the address
is JIT-generic, move it into the core as a helper instead. On the JIT
side, we get information on whether the retrieved address is a fixed
one, that is, not changing through JIT passes, or a dynamic one. For
the former, JITs can optimize their imm emission because this doesn't
change jump offsets throughout JIT process.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Sandipan Das <sandipan@linux.ibm.com>
Tested-by: Sandipan Das <sandipan@linux.ibm.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 arch/powerpc/net/bpf_jit_comp64.c | 57 ++++++++++++++++++++-----------
 include/linux/filter.h            |  4 +++
 kernel/bpf/core.c                 | 34 ++++++++++++++++++
 3 files changed, 76 insertions(+), 19 deletions(-)

diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 50b129785aeee..17482f5de3e26 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -166,7 +166,33 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
 	PPC_BLR();
 }
 
-static void bpf_jit_emit_func_call(u32 *image, struct codegen_context *ctx, u64 func)
+static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx,
+				       u64 func)
+{
+#ifdef PPC64_ELF_ABI_v1
+	/* func points to the function descriptor */
+	PPC_LI64(b2p[TMP_REG_2], func);
+	/* Load actual entry point from function descriptor */
+	PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0);
+	/* ... and move it to LR */
+	PPC_MTLR(b2p[TMP_REG_1]);
+	/*
+	 * Load TOC from function descriptor at offset 8.
+	 * We can clobber r2 since we get called through a
+	 * function pointer (so caller will save/restore r2)
+	 * and since we don't use a TOC ourself.
+	 */
+	PPC_BPF_LL(2, b2p[TMP_REG_2], 8);
+#else
+	/* We can clobber r12 */
+	PPC_FUNC_ADDR(12, func);
+	PPC_MTLR(12);
+#endif
+	PPC_BLRL();
+}
+
+static void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx,
+				       u64 func)
 {
 	unsigned int i, ctx_idx = ctx->idx;
 
@@ -273,7 +299,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
 {
 	const struct bpf_insn *insn = fp->insnsi;
 	int flen = fp->len;
-	int i;
+	int i, ret;
 
 	/* Start of epilogue code - will only be valid 2nd pass onwards */
 	u32 exit_addr = addrs[flen];
@@ -284,8 +310,9 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
 		u32 src_reg = b2p[insn[i].src_reg];
 		s16 off = insn[i].off;
 		s32 imm = insn[i].imm;
+		bool func_addr_fixed;
+		u64 func_addr;
 		u64 imm64;
-		u8 *func;
 		u32 true_cond;
 		u32 tmp_idx;
 
@@ -711,23 +738,15 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
 		case BPF_JMP | BPF_CALL:
 			ctx->seen |= SEEN_FUNC;
 
-			/* bpf function call */
-			if (insn[i].src_reg == BPF_PSEUDO_CALL)
-				if (!extra_pass)
-					func = NULL;
-				else if (fp->aux->func && off < fp->aux->func_cnt)
-					/* use the subprog id from the off
-					 * field to lookup the callee address
-					 */
-					func = (u8 *) fp->aux->func[off]->bpf_func;
-				else
-					return -EINVAL;
-			/* kernel helper call */
-			else
-				func = (u8 *) __bpf_call_base + imm;
-
-			bpf_jit_emit_func_call(image, ctx, (u64)func);
+			ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass,
+						    &func_addr, &func_addr_fixed);
+			if (ret < 0)
+				return ret;
 
+			if (func_addr_fixed)
+				bpf_jit_emit_func_call_hlp(image, ctx, func_addr);
+			else
+				bpf_jit_emit_func_call_rel(image, ctx, func_addr);
 			/* move return value from r3 to BPF_REG_0 */
 			PPC_MR(b2p[BPF_REG_0], 3);
 			break;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index de629b706d1d7..448dcc448f1fe 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -866,6 +866,10 @@ void bpf_jit_binary_free(struct bpf_binary_header *hdr);
 
 void bpf_jit_free(struct bpf_prog *fp);
 
+int bpf_jit_get_func_addr(const struct bpf_prog *prog,
+			  const struct bpf_insn *insn, bool extra_pass,
+			  u64 *func_addr, bool *func_addr_fixed);
+
 struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *fp);
 void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other);
 
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 1a796e0799ec4..b1a3545d0ec89 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -672,6 +672,40 @@ void __weak bpf_jit_free(struct bpf_prog *fp)
 	bpf_prog_unlock_free(fp);
 }
 
+int bpf_jit_get_func_addr(const struct bpf_prog *prog,
+			  const struct bpf_insn *insn, bool extra_pass,
+			  u64 *func_addr, bool *func_addr_fixed)
+{
+	s16 off = insn->off;
+	s32 imm = insn->imm;
+	u8 *addr;
+
+	*func_addr_fixed = insn->src_reg != BPF_PSEUDO_CALL;
+	if (!*func_addr_fixed) {
+		/* Place-holder address till the last pass has collected
+		 * all addresses for JITed subprograms in which case we
+		 * can pick them up from prog->aux.
+		 */
+		if (!extra_pass)
+			addr = NULL;
+		else if (prog->aux->func &&
+			 off >= 0 && off < prog->aux->func_cnt)
+			addr = (u8 *)prog->aux->func[off]->bpf_func;
+		else
+			return -EINVAL;
+	} else {
+		/* Address of a BPF helper call. Since part of the core
+		 * kernel, it's always at a fixed location. __bpf_call_base
+		 * and the helper with imm relative to it are both in core
+		 * kernel.
+		 */
+		addr = (u8 *)__bpf_call_base + imm;
+	}
+
+	*func_addr = (unsigned long)addr;
+	return 0;
+}
+
 static int bpf_jit_blind_insn(const struct bpf_insn *from,
 			      const struct bpf_insn *aux,
 			      struct bpf_insn *to_buff)
-- 
GitLab


From 8c11ea5ce13da0252fc92f91e90b0cb0c8fe5619 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Mon, 26 Nov 2018 14:05:39 +0100
Subject: [PATCH 1423/1890] bpf, arm64: fix getting subprog addr from aux for
 calls

The arm64 JIT has the same issue as ppc64 JIT in that the relative BPF
to BPF call offset can be too far away from core kernel in that relative
encoding into imm is not sufficient and could potentially be truncated,
see also fd045f6cd98e ("arm64: add support for module PLTs") which adds
spill-over space for module_alloc() and therefore bpf_jit_binary_alloc().
Therefore, use the recently added bpf_jit_get_func_addr() helper for
properly fetching the address through prog->aux->func[off]->bpf_func
instead. This also has the benefit to optimize normal helper calls since
their address can use the optimized emission. Tested on Cavium ThunderX
CN8890.

Fixes: db496944fdaa ("bpf: arm64: add JIT support for multi-function programs")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 arch/arm64/net/bpf_jit_comp.c | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index a6fdaea07c633..89198017e8e68 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -351,7 +351,8 @@ static void build_epilogue(struct jit_ctx *ctx)
  * >0 - successfully JITed a 16-byte eBPF instruction.
  * <0 - failed to JIT.
  */
-static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
+static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
+		      bool extra_pass)
 {
 	const u8 code = insn->code;
 	const u8 dst = bpf2a64[insn->dst_reg];
@@ -625,12 +626,19 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
 	case BPF_JMP | BPF_CALL:
 	{
 		const u8 r0 = bpf2a64[BPF_REG_0];
-		const u64 func = (u64)__bpf_call_base + imm;
+		bool func_addr_fixed;
+		u64 func_addr;
+		int ret;
 
-		if (ctx->prog->is_func)
-			emit_addr_mov_i64(tmp, func, ctx);
+		ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass,
+					    &func_addr, &func_addr_fixed);
+		if (ret < 0)
+			return ret;
+		if (func_addr_fixed)
+			/* We can use optimized emission here. */
+			emit_a64_mov_i64(tmp, func_addr, ctx);
 		else
-			emit_a64_mov_i64(tmp, func, ctx);
+			emit_addr_mov_i64(tmp, func_addr, ctx);
 		emit(A64_BLR(tmp), ctx);
 		emit(A64_MOV(1, r0, A64_R(0)), ctx);
 		break;
@@ -753,7 +761,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
 	return 0;
 }
 
-static int build_body(struct jit_ctx *ctx)
+static int build_body(struct jit_ctx *ctx, bool extra_pass)
 {
 	const struct bpf_prog *prog = ctx->prog;
 	int i;
@@ -762,7 +770,7 @@ static int build_body(struct jit_ctx *ctx)
 		const struct bpf_insn *insn = &prog->insnsi[i];
 		int ret;
 
-		ret = build_insn(insn, ctx);
+		ret = build_insn(insn, ctx, extra_pass);
 		if (ret > 0) {
 			i++;
 			if (ctx->image == NULL)
@@ -858,7 +866,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 	/* 1. Initial fake pass to compute ctx->idx. */
 
 	/* Fake pass to fill in ctx->offset. */
-	if (build_body(&ctx)) {
+	if (build_body(&ctx, extra_pass)) {
 		prog = orig_prog;
 		goto out_off;
 	}
@@ -888,7 +896,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 
 	build_prologue(&ctx, was_classic);
 
-	if (build_body(&ctx)) {
+	if (build_body(&ctx, extra_pass)) {
 		bpf_jit_binary_free(header);
 		prog = orig_prog;
 		goto out_off;
-- 
GitLab


From c44768a33da81b4a0986e79bbf0588f1a0651dec Mon Sep 17 00:00:00 2001
From: David Miller <davem@davemloft.net>
Date: Mon, 26 Nov 2018 13:03:46 -0800
Subject: [PATCH 1424/1890] sparc: Fix JIT fused branch convergance.

On T4 and later sparc64 cpus we can use the fused compare and branch
instruction.

However, it can only be used if the branch destination is in the range
of a signed 10-bit immediate offset.  This amounts to 1024
instructions forwards or backwards.

After the commit referenced in the Fixes: tag, the largest possible
size program seen by the JIT explodes by a significant factor.

As a result of this convergance takes many more passes since the
expanded "BPF_LDX | BPF_MSH | BPF_B" code sequence, for example,
contains several embedded branch on condition instructions.

On each pass, as suddenly new fused compare and branch instances
become valid, this makes thousands more in range for the next pass.
And so on and so forth.

This is most greatly exemplified by "BPF_MAXINSNS: exec all MSH" which
takes 35 passes to converge, and shrinks the image by about 64K.

To decrease the cost of this number of convergance passes, do the
convergance pass before we have the program image allocated, just like
other JITs (such as x86) do.

Fixes: e0cea7ce988c ("bpf: implement ld_abs/ld_ind in native bpf")
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 arch/sparc/net/bpf_jit_comp_64.c | 77 ++++++++++++++++++++------------
 1 file changed, 49 insertions(+), 28 deletions(-)

diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
index 222785af550b4..7217d63596431 100644
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -1425,12 +1425,12 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 	struct bpf_prog *tmp, *orig_prog = prog;
 	struct sparc64_jit_data *jit_data;
 	struct bpf_binary_header *header;
+	u32 prev_image_size, image_size;
 	bool tmp_blinded = false;
 	bool extra_pass = false;
 	struct jit_ctx ctx;
-	u32 image_size;
 	u8 *image_ptr;
-	int pass;
+	int pass, i;
 
 	if (!prog->jit_requested)
 		return orig_prog;
@@ -1461,61 +1461,82 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 		header = jit_data->header;
 		extra_pass = true;
 		image_size = sizeof(u32) * ctx.idx;
+		prev_image_size = image_size;
+		pass = 1;
 		goto skip_init_ctx;
 	}
 
 	memset(&ctx, 0, sizeof(ctx));
 	ctx.prog = prog;
 
-	ctx.offset = kcalloc(prog->len, sizeof(unsigned int), GFP_KERNEL);
+	ctx.offset = kmalloc_array(prog->len, sizeof(unsigned int), GFP_KERNEL);
 	if (ctx.offset == NULL) {
 		prog = orig_prog;
 		goto out_off;
 	}
 
-	/* Fake pass to detect features used, and get an accurate assessment
-	 * of what the final image size will be.
+	/* Longest sequence emitted is for bswap32, 12 instructions.  Pre-cook
+	 * the offset array so that we converge faster.
 	 */
-	if (build_body(&ctx)) {
-		prog = orig_prog;
-		goto out_off;
-	}
-	build_prologue(&ctx);
-	build_epilogue(&ctx);
+	for (i = 0; i < prog->len; i++)
+		ctx.offset[i] = i * (12 * 4);
 
-	/* Now we know the actual image size. */
-	image_size = sizeof(u32) * ctx.idx;
-	header = bpf_jit_binary_alloc(image_size, &image_ptr,
-				      sizeof(u32), jit_fill_hole);
-	if (header == NULL) {
-		prog = orig_prog;
-		goto out_off;
-	}
-
-	ctx.image = (u32 *)image_ptr;
-skip_init_ctx:
-	for (pass = 1; pass < 3; pass++) {
+	prev_image_size = ~0U;
+	for (pass = 1; pass < 40; pass++) {
 		ctx.idx = 0;
 
 		build_prologue(&ctx);
-
 		if (build_body(&ctx)) {
-			bpf_jit_binary_free(header);
 			prog = orig_prog;
 			goto out_off;
 		}
-
 		build_epilogue(&ctx);
 
 		if (bpf_jit_enable > 1)
-			pr_info("Pass %d: shrink = %d, seen = [%c%c%c%c%c%c]\n", pass,
-				image_size - (ctx.idx * 4),
+			pr_info("Pass %d: size = %u, seen = [%c%c%c%c%c%c]\n", pass,
+				ctx.idx * 4,
 				ctx.tmp_1_used ? '1' : ' ',
 				ctx.tmp_2_used ? '2' : ' ',
 				ctx.tmp_3_used ? '3' : ' ',
 				ctx.saw_frame_pointer ? 'F' : ' ',
 				ctx.saw_call ? 'C' : ' ',
 				ctx.saw_tail_call ? 'T' : ' ');
+
+		if (ctx.idx * 4 == prev_image_size)
+			break;
+		prev_image_size = ctx.idx * 4;
+		cond_resched();
+	}
+
+	/* Now we know the actual image size. */
+	image_size = sizeof(u32) * ctx.idx;
+	header = bpf_jit_binary_alloc(image_size, &image_ptr,
+				      sizeof(u32), jit_fill_hole);
+	if (header == NULL) {
+		prog = orig_prog;
+		goto out_off;
+	}
+
+	ctx.image = (u32 *)image_ptr;
+skip_init_ctx:
+	ctx.idx = 0;
+
+	build_prologue(&ctx);
+
+	if (build_body(&ctx)) {
+		bpf_jit_binary_free(header);
+		prog = orig_prog;
+		goto out_off;
+	}
+
+	build_epilogue(&ctx);
+
+	if (ctx.idx * 4 != prev_image_size) {
+		pr_err("bpf_jit: Failed to converge, prev_size=%u size=%d\n",
+		       prev_image_size, ctx.idx * 4);
+		bpf_jit_binary_free(header);
+		prog = orig_prog;
+		goto out_off;
 	}
 
 	if (bpf_jit_enable > 1)
-- 
GitLab


From e2ac579a7a18bcd9e8cf14cf42eac0b8a2ba6c4b Mon Sep 17 00:00:00 2001
From: David Miller <davem@davemloft.net>
Date: Mon, 26 Nov 2018 14:52:18 -0800
Subject: [PATCH 1425/1890] sparc: Correct ctx->saw_frame_pointer logic.

We need to initialize the frame pointer register not just if it is
seen as a source operand, but also if it is seen as the destination
operand of a store or an atomic instruction (which effectively is a
source operand).

This is exercised by test_verifier's "non-invalid fp arithmetic"

Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 arch/sparc/net/bpf_jit_comp_64.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
index 7217d63596431..ec4da4dc98f12 100644
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -1270,6 +1270,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
 		const u8 tmp2 = bpf2sparc[TMP_REG_2];
 		u32 opcode = 0, rs2;
 
+		if (insn->dst_reg == BPF_REG_FP)
+			ctx->saw_frame_pointer = true;
+
 		ctx->tmp_2_used = true;
 		emit_loadimm(imm, tmp2, ctx);
 
@@ -1308,6 +1311,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
 		const u8 tmp = bpf2sparc[TMP_REG_1];
 		u32 opcode = 0, rs2;
 
+		if (insn->dst_reg == BPF_REG_FP)
+			ctx->saw_frame_pointer = true;
+
 		switch (BPF_SIZE(code)) {
 		case BPF_W:
 			opcode = ST32;
@@ -1340,6 +1346,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
 		const u8 tmp2 = bpf2sparc[TMP_REG_2];
 		const u8 tmp3 = bpf2sparc[TMP_REG_3];
 
+		if (insn->dst_reg == BPF_REG_FP)
+			ctx->saw_frame_pointer = true;
+
 		ctx->tmp_1_used = true;
 		ctx->tmp_2_used = true;
 		ctx->tmp_3_used = true;
@@ -1360,6 +1369,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
 		const u8 tmp2 = bpf2sparc[TMP_REG_2];
 		const u8 tmp3 = bpf2sparc[TMP_REG_3];
 
+		if (insn->dst_reg == BPF_REG_FP)
+			ctx->saw_frame_pointer = true;
+
 		ctx->tmp_1_used = true;
 		ctx->tmp_2_used = true;
 		ctx->tmp_3_used = true;
-- 
GitLab


From fa1e0c9690bfa19eadd2adbaf511fb4fef9270e6 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 27 Nov 2018 01:21:00 +0100
Subject: [PATCH 1426/1890] bpf, doc: add entries of who looks over which jits

Make the high-level BPF JIT entry a general 'catch-all' and add
architecture specific entries to make it more clear who actively
maintains which BPF JIT compiler. The list (L) address implies
that this eventually lands in the bpf patchwork bucket. Goal is
that this set of responsible developers listed here is always up
to date and a point of contact for helping out in e.g. feature
development, fixes, review or testing patches in order to help
long-term in ensuring quality of the BPF JITs and therefore BPF
core under a given architecture. Every new JIT in future /must/
have an entry here as well.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Acked-by: Sandipan Das <sandipan@linux.ibm.com>
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Zi Shen Lim <zlim.lnx@gmail.com>
Acked-by: Paul Burton <paul.burton@mips.com>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Wang YanQing <udknight@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 MAINTAINERS | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 62 insertions(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 03c46f4831433..bfaa411199021 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2801,7 +2801,7 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git
 Q:	https://patchwork.ozlabs.org/project/netdev/list/?delegate=77147
 S:	Supported
-F:	arch/x86/net/bpf_jit*
+F:	arch/*/net/*
 F:	Documentation/networking/filter.txt
 F:	Documentation/bpf/
 F:	include/linux/bpf*
@@ -2821,6 +2821,67 @@ F:	tools/bpf/
 F:	tools/lib/bpf/
 F:	tools/testing/selftests/bpf/
 
+BPF JIT for ARM
+M:	Shubham Bansal <illusionist.neo@gmail.com>
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	arch/arm/net/
+
+BPF JIT for ARM64
+M:	Daniel Borkmann <daniel@iogearbox.net>
+M:	Alexei Starovoitov <ast@kernel.org>
+M:	Zi Shen Lim <zlim.lnx@gmail.com>
+L:	netdev@vger.kernel.org
+S:	Supported
+F:	arch/arm64/net/
+
+BPF JIT for MIPS (32-BIT AND 64-BIT)
+M:	Paul Burton <paul.burton@mips.com>
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	arch/mips/net/
+
+BPF JIT for NFP NICs
+M:	Jakub Kicinski <jakub.kicinski@netronome.com>
+L:	netdev@vger.kernel.org
+S:	Supported
+F:	drivers/net/ethernet/netronome/nfp/bpf/
+
+BPF JIT for POWERPC (32-BIT AND 64-BIT)
+M:	Naveen N. Rao <naveen.n.rao@linux.ibm.com>
+M:	Sandipan Das <sandipan@linux.ibm.com>
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	arch/powerpc/net/
+
+BPF JIT for S390
+M:	Martin Schwidefsky <schwidefsky@de.ibm.com>
+M:	Heiko Carstens <heiko.carstens@de.ibm.com>
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	arch/s390/net/
+X:	arch/s390/net/pnet.c
+
+BPF JIT for SPARC (32-BIT AND 64-BIT)
+M:	David S. Miller <davem@davemloft.net>
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	arch/sparc/net/
+
+BPF JIT for X86 32-BIT
+M:	Wang YanQing <udknight@gmail.com>
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	arch/x86/net/bpf_jit_comp32.c
+
+BPF JIT for X86 64-BIT
+M:	Alexei Starovoitov <ast@kernel.org>
+M:	Daniel Borkmann <daniel@iogearbox.net>
+L:	netdev@vger.kernel.org
+S:	Supported
+F:	arch/x86/net/
+X:	arch/x86/net/bpf_jit_comp32.c
+
 BROADCOM B44 10/100 ETHERNET DRIVER
 M:	Michael Chan <michael.chan@broadcom.com>
 L:	netdev@vger.kernel.org
-- 
GitLab


From ef78e5ec9214376c5cb989f5da70b02d0c117b66 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= <mb@lightnvm.io>
Date: Mon, 26 Nov 2018 11:27:03 -0800
Subject: [PATCH 1427/1890] ia64: export node_distance function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The numa_slit variable used by node_distance is available to a
module as long as it is linked at compile-time. However, it is
not available to loadable modules. Leading to errors such as:

  ERROR: "numa_slit" [drivers/nvme/host/nvme-core.ko] undefined!

The error above is caused by the nvme multipath code that makes
use of node_distance for its path calculation. When the patch was
added, the lightnvm subsystem would select nvme and always compile
it in, leading to the node_distance call to always succeed.
However, when this requirement was removed, nvme could be compiled
in as a module, which exposed this bug.

This patch extracts node_distance to a function and exports it.
Since ACPI is depending on node_distance being a simple lookup to
numa_slit, the previous behavior is exposed as slit_distance and its
users updated.

Fixes: f333444708f82 "nvme: take node locality into account when selecting a path"
Fixes: 73569e11032f "lightnvm: remove dependencies on BLK_DEV_NVME and PCI"
Signed-off-by: Matias BjÃ¸ring <mb@lightnvm.io>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/include/asm/numa.h | 4 +++-
 arch/ia64/kernel/acpi.c      | 6 +++---
 arch/ia64/mm/numa.c          | 6 ++++++
 3 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/arch/ia64/include/asm/numa.h b/arch/ia64/include/asm/numa.h
index ebef7f40aabbe..c5c253cb9bd63 100644
--- a/arch/ia64/include/asm/numa.h
+++ b/arch/ia64/include/asm/numa.h
@@ -59,7 +59,9 @@ extern struct node_cpuid_s node_cpuid[NR_CPUS];
  */
 
 extern u8 numa_slit[MAX_NUMNODES * MAX_NUMNODES];
-#define node_distance(from,to) (numa_slit[(from) * MAX_NUMNODES + (to)])
+#define slit_distance(from,to) (numa_slit[(from) * MAX_NUMNODES + (to)])
+extern int __node_distance(int from, int to);
+#define node_distance(from,to) __node_distance(from, to)
 
 extern int paddr_to_nid(unsigned long paddr);
 
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 1dacbf5e9e09a..41eb281709da1 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -578,8 +578,8 @@ void __init acpi_numa_fixup(void)
 	if (!slit_table) {
 		for (i = 0; i < MAX_NUMNODES; i++)
 			for (j = 0; j < MAX_NUMNODES; j++)
-				node_distance(i, j) = i == j ? LOCAL_DISTANCE :
-							REMOTE_DISTANCE;
+				slit_distance(i, j) = i == j ?
+					LOCAL_DISTANCE : REMOTE_DISTANCE;
 		return;
 	}
 
@@ -592,7 +592,7 @@ void __init acpi_numa_fixup(void)
 			if (!pxm_bit_test(j))
 				continue;
 			node_to = pxm_to_node(j);
-			node_distance(node_from, node_to) =
+			slit_distance(node_from, node_to) =
 			    slit_table->entry[i * slit_table->locality_count + j];
 		}
 	}
diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c
index 3861d6e32d5ff..a03803506b0c0 100644
--- a/arch/ia64/mm/numa.c
+++ b/arch/ia64/mm/numa.c
@@ -36,6 +36,12 @@ struct node_cpuid_s node_cpuid[NR_CPUS] =
  */
 u8 numa_slit[MAX_NUMNODES * MAX_NUMNODES];
 
+int __node_distance(int from, int to)
+{
+	return slit_distance(from, to);
+}
+EXPORT_SYMBOL(__node_distance);
+
 /* Identify which cnode a physical address resides on */
 int
 paddr_to_nid(unsigned long paddr)
-- 
GitLab


From 2958b66694e018c552be0b60521fec27e8d12988 Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Mon, 26 Nov 2018 13:29:41 -0800
Subject: [PATCH 1428/1890] xtensa: enable coprocessors that are being flushed

coprocessor_flush_all may be called from a context of a thread that is
different from the thread being flushed. In that case contents of the
cpenable special register may not match ti->cpenable of the target
thread, resulting in unhandled coprocessor exception in the kernel
context.
Set cpenable special register to the ti->cpenable of the target register
for the duration of the flush and restore it afterwards.
This fixes the following crash caused by coprocessor register inspection
in native gdb:

  (gdb) p/x $w0
  Illegal instruction in kernel: sig: 9 [#1] PREEMPT
  Call Trace:
    ___might_sleep+0x184/0x1a4
    __might_sleep+0x41/0xac
    exit_signals+0x14/0x218
    do_exit+0xc9/0x8b8
    die+0x99/0xa0
    do_illegal_instruction+0x18/0x6c
    common_exception+0x77/0x77
    coprocessor_flush+0x16/0x3c
    arch_ptrace+0x46c/0x674
    sys_ptrace+0x2ce/0x3b4
    system_call+0x54/0x80
    common_exception+0x77/0x77
  note: gdb[100] exited with preempt_count 1
  Killed

Cc: stable@vger.kernel.org
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/kernel/process.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c
index 483dcfb6e681d..4bb68133a72af 100644
--- a/arch/xtensa/kernel/process.c
+++ b/arch/xtensa/kernel/process.c
@@ -94,18 +94,21 @@ void coprocessor_release_all(struct thread_info *ti)
 
 void coprocessor_flush_all(struct thread_info *ti)
 {
-	unsigned long cpenable;
+	unsigned long cpenable, old_cpenable;
 	int i;
 
 	preempt_disable();
 
+	RSR_CPENABLE(old_cpenable);
 	cpenable = ti->cpenable;
+	WSR_CPENABLE(cpenable);
 
 	for (i = 0; i < XCHAL_CP_MAX; i++) {
 		if ((cpenable & 1) != 0 && coprocessor_owner[i] == ti)
 			coprocessor_flush(ti, i);
 		cpenable >>= 1;
 	}
+	WSR_CPENABLE(old_cpenable);
 
 	preempt_enable();
 }
-- 
GitLab


From 03bc996af0cc71c7f30c384d8ce7260172423b34 Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Mon, 26 Nov 2018 15:18:26 -0800
Subject: [PATCH 1429/1890] xtensa: fix coprocessor context offset definitions

Coprocessor context offsets are used by the assembly code that moves
coprocessor context between the individual fields of the
thread_info::xtregs_cp structure and coprocessor registers.
This fixes coprocessor context clobbering on flushing and reloading
during normal user code execution and user process debugging in the
presence of more than one coprocessor in the core configuration.

Cc: stable@vger.kernel.org
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/kernel/asm-offsets.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/xtensa/kernel/asm-offsets.c b/arch/xtensa/kernel/asm-offsets.c
index 67904f55f1884..120dd746a1475 100644
--- a/arch/xtensa/kernel/asm-offsets.c
+++ b/arch/xtensa/kernel/asm-offsets.c
@@ -94,14 +94,14 @@ int main(void)
 	DEFINE(THREAD_SP, offsetof (struct task_struct, thread.sp));
 	DEFINE(THREAD_CPENABLE, offsetof (struct thread_info, cpenable));
 #if XTENSA_HAVE_COPROCESSORS
-	DEFINE(THREAD_XTREGS_CP0, offsetof (struct thread_info, xtregs_cp));
-	DEFINE(THREAD_XTREGS_CP1, offsetof (struct thread_info, xtregs_cp));
-	DEFINE(THREAD_XTREGS_CP2, offsetof (struct thread_info, xtregs_cp));
-	DEFINE(THREAD_XTREGS_CP3, offsetof (struct thread_info, xtregs_cp));
-	DEFINE(THREAD_XTREGS_CP4, offsetof (struct thread_info, xtregs_cp));
-	DEFINE(THREAD_XTREGS_CP5, offsetof (struct thread_info, xtregs_cp));
-	DEFINE(THREAD_XTREGS_CP6, offsetof (struct thread_info, xtregs_cp));
-	DEFINE(THREAD_XTREGS_CP7, offsetof (struct thread_info, xtregs_cp));
+	DEFINE(THREAD_XTREGS_CP0, offsetof(struct thread_info, xtregs_cp.cp0));
+	DEFINE(THREAD_XTREGS_CP1, offsetof(struct thread_info, xtregs_cp.cp1));
+	DEFINE(THREAD_XTREGS_CP2, offsetof(struct thread_info, xtregs_cp.cp2));
+	DEFINE(THREAD_XTREGS_CP3, offsetof(struct thread_info, xtregs_cp.cp3));
+	DEFINE(THREAD_XTREGS_CP4, offsetof(struct thread_info, xtregs_cp.cp4));
+	DEFINE(THREAD_XTREGS_CP5, offsetof(struct thread_info, xtregs_cp.cp5));
+	DEFINE(THREAD_XTREGS_CP6, offsetof(struct thread_info, xtregs_cp.cp6));
+	DEFINE(THREAD_XTREGS_CP7, offsetof(struct thread_info, xtregs_cp.cp7));
 #endif
 	DEFINE(THREAD_XTREGS_USER, offsetof (struct thread_info, xtregs_user));
 	DEFINE(XTREGS_USER_SIZE, sizeof(xtregs_user_t));
-- 
GitLab


From 38a35a78c5e270cbe53c4fef6b0d3c2da90dd849 Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Mon, 26 Nov 2018 18:06:01 -0800
Subject: [PATCH 1430/1890] xtensa: fix coprocessor part of
 ptrace_{get,set}xregs

Layout of coprocessor registers in the elf_xtregs_t and
xtregs_coprocessor_t may be different due to alignment. Thus it is not
always possible to copy data between the xtregs_coprocessor_t structure
and the elf_xtregs_t and get correct values for all registers.
Use a table of offsets and sizes of individual coprocessor register
groups to do coprocessor context copying in the ptrace_getxregs and
ptrace_setxregs.
This fixes incorrect coprocessor register values reading from the user
process by the native gdb on an xtensa core with multiple coprocessors
and registers with high alignment requirements.

Cc: stable@vger.kernel.org
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/kernel/ptrace.c | 42 +++++++++++++++++++++++++++++++++----
 1 file changed, 38 insertions(+), 4 deletions(-)

diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c
index c0845cb1cbb99..d9541be0605ad 100644
--- a/arch/xtensa/kernel/ptrace.c
+++ b/arch/xtensa/kernel/ptrace.c
@@ -127,12 +127,37 @@ static int ptrace_setregs(struct task_struct *child, void __user *uregs)
 }
 
 
+#if XTENSA_HAVE_COPROCESSORS
+#define CP_OFFSETS(cp) \
+	{ \
+		.elf_xtregs_offset = offsetof(elf_xtregs_t, cp), \
+		.ti_offset = offsetof(struct thread_info, xtregs_cp.cp), \
+		.sz = sizeof(xtregs_ ## cp ## _t), \
+	}
+
+static const struct {
+	size_t elf_xtregs_offset;
+	size_t ti_offset;
+	size_t sz;
+} cp_offsets[] = {
+	CP_OFFSETS(cp0),
+	CP_OFFSETS(cp1),
+	CP_OFFSETS(cp2),
+	CP_OFFSETS(cp3),
+	CP_OFFSETS(cp4),
+	CP_OFFSETS(cp5),
+	CP_OFFSETS(cp6),
+	CP_OFFSETS(cp7),
+};
+#endif
+
 static int ptrace_getxregs(struct task_struct *child, void __user *uregs)
 {
 	struct pt_regs *regs = task_pt_regs(child);
 	struct thread_info *ti = task_thread_info(child);
 	elf_xtregs_t __user *xtregs = uregs;
 	int ret = 0;
+	int i __maybe_unused;
 
 	if (!access_ok(VERIFY_WRITE, uregs, sizeof(elf_xtregs_t)))
 		return -EIO;
@@ -140,8 +165,13 @@ static int ptrace_getxregs(struct task_struct *child, void __user *uregs)
 #if XTENSA_HAVE_COPROCESSORS
 	/* Flush all coprocessor registers to memory. */
 	coprocessor_flush_all(ti);
-	ret |= __copy_to_user(&xtregs->cp0, &ti->xtregs_cp,
-			      sizeof(xtregs_coprocessor_t));
+
+	for (i = 0; i < ARRAY_SIZE(cp_offsets); ++i)
+		ret |= __copy_to_user((char __user *)xtregs +
+				      cp_offsets[i].elf_xtregs_offset,
+				      (const char *)ti +
+				      cp_offsets[i].ti_offset,
+				      cp_offsets[i].sz);
 #endif
 	ret |= __copy_to_user(&xtregs->opt, &regs->xtregs_opt,
 			      sizeof(xtregs->opt));
@@ -157,6 +187,7 @@ static int ptrace_setxregs(struct task_struct *child, void __user *uregs)
 	struct pt_regs *regs = task_pt_regs(child);
 	elf_xtregs_t *xtregs = uregs;
 	int ret = 0;
+	int i __maybe_unused;
 
 	if (!access_ok(VERIFY_READ, uregs, sizeof(elf_xtregs_t)))
 		return -EFAULT;
@@ -166,8 +197,11 @@ static int ptrace_setxregs(struct task_struct *child, void __user *uregs)
 	coprocessor_flush_all(ti);
 	coprocessor_release_all(ti);
 
-	ret |= __copy_from_user(&ti->xtregs_cp, &xtregs->cp0,
-				sizeof(xtregs_coprocessor_t));
+	for (i = 0; i < ARRAY_SIZE(cp_offsets); ++i)
+		ret |= __copy_from_user((char *)ti + cp_offsets[i].ti_offset,
+					(const char __user *)xtregs +
+					cp_offsets[i].elf_xtregs_offset,
+					cp_offsets[i].sz);
 #endif
 	ret |= __copy_from_user(&regs->xtregs_opt, &xtregs->opt,
 				sizeof(xtregs->opt));
-- 
GitLab


From d6a2b9535d1e52bea269c138614c4801469d10e1 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Mon, 26 Nov 2018 16:39:47 -0700
Subject: [PATCH 1431/1890] nvme: Free ctrl device name on init failure

Free the kobject name that was allocated for the controller device on
failure rather than its parent.

Fixes: d22524a4782a9 ("nvme: switch controller refcounting to use struct device")
Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 559d567693b8d..5afda6fe5ae90 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3585,7 +3585,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
 
 	return 0;
 out_free_name:
-	kfree_const(dev->kobj.name);
+	kfree_const(ctrl->device->kobj.name);
 out_release_instance:
 	ida_simple_remove(&nvme_instance_ida, ctrl->instance);
 out:
-- 
GitLab


From 6484a677294aa5d08c0210f2f387ebb9be646115 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Wed, 14 Nov 2018 01:57:03 +0000
Subject: [PATCH 1432/1890] misc: mic/scif: fix copy-paste error in
 scif_create_remote_lookup

gcc '-Wunused-but-set-variable' warning:

drivers/misc/mic/scif/scif_rma.c: In function 'scif_create_remote_lookup':
drivers/misc/mic/scif/scif_rma.c:373:25: warning:
 variable 'vmalloc_num_pages' set but not used [-Wunused-but-set-variable]

'vmalloc_num_pages' should be used to determine if the address is
within the vmalloc range.

Fixes: ba612aa8b487 ("misc: mic: SCIF memory registration and unregistration")
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/mic/scif/scif_rma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/mic/scif/scif_rma.c b/drivers/misc/mic/scif/scif_rma.c
index c824329f7012a..0e4193cb08cf1 100644
--- a/drivers/misc/mic/scif/scif_rma.c
+++ b/drivers/misc/mic/scif/scif_rma.c
@@ -416,7 +416,7 @@ static int scif_create_remote_lookup(struct scif_dev *remote_dev,
 		if (err)
 			goto error_window;
 		err = scif_map_page(&window->num_pages_lookup.lookup[j],
-				    vmalloc_dma_phys ?
+				    vmalloc_num_pages ?
 				    vmalloc_to_page(&window->num_pages[i]) :
 				    virt_to_page(&window->num_pages[i]),
 				    remote_dev);
-- 
GitLab


From a6ca633e13f888cbb0a92309a1e090818cffcc86 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Wed, 21 Nov 2018 10:35:21 +0100
Subject: [PATCH 1433/1890] ARM: davinci: dm355: set the GPIO base to 0

Commit 587f7a694f01 ("gpio: davinci: Use dev name for label and
automatic base selection") broke the GPIO support on DaVinci boards
in legacy mode by allowing gpiolib to set the GPIO base automatically.

DaVinci board files use the legacy GPIO API with hard-coded GPIO line
numbers. Use the new fields in struct davinci_gpio_platform_data to
manually set the GPIO base to 0.

Fixes: 587f7a694f01 ("gpio: davinci: Use dev name for label and automatic base selection")
Cc: stable@vger.kernel.org
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
---
 arch/arm/mach-davinci/dm355.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm/mach-davinci/dm355.c b/arch/arm/mach-davinci/dm355.c
index 2b0f5d97ab7c1..4c6e0bef45092 100644
--- a/arch/arm/mach-davinci/dm355.c
+++ b/arch/arm/mach-davinci/dm355.c
@@ -584,6 +584,8 @@ static struct resource dm355_gpio_resources[] = {
 };
 
 static struct davinci_gpio_platform_data dm355_gpio_platform_data = {
+	.no_auto_base	= true,
+	.base		= 0,
 	.ngpio		= 104,
 };
 
-- 
GitLab


From 55a891d0d0bd3db9303c580857147c37e2c76a5a Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Wed, 21 Nov 2018 10:35:22 +0100
Subject: [PATCH 1434/1890] ARM: davinci: da830: set the GPIO base to 0

Commit 587f7a694f01 ("gpio: davinci: Use dev name for label and
automatic base selection") broke the GPIO support on DaVinci boards
in legacy mode by allowing gpiolib to set the GPIO base automatically.

DaVinci board files use the legacy GPIO API with hard-coded GPIO line
numbers. Use the new fields in struct davinci_gpio_platform_data to
manually set the GPIO base to 0.

Fixes: 587f7a694f01 ("gpio: davinci: Use dev name for label and automatic base selection")
Cc: stable@vger.kernel.org
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
---
 arch/arm/mach-davinci/da830.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/arm/mach-davinci/da830.c b/arch/arm/mach-davinci/da830.c
index 0bc5bd2665df8..2cc9fe4c3a911 100644
--- a/arch/arm/mach-davinci/da830.c
+++ b/arch/arm/mach-davinci/da830.c
@@ -759,7 +759,9 @@ static struct davinci_id da830_ids[] = {
 };
 
 static struct davinci_gpio_platform_data da830_gpio_platform_data = {
-	.ngpio = 128,
+	.no_auto_base	= true,
+	.base		= 0,
+	.ngpio		= 128,
 };
 
 int __init da830_register_gpio(void)
-- 
GitLab


From 27df7977099c2b8d32399cd1752f527c5a343dfa Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Wed, 21 Nov 2018 10:35:23 +0100
Subject: [PATCH 1435/1890] ARM: davinci: dm644x: set the GPIO base to 0

Commit 587f7a694f01 ("gpio: davinci: Use dev name for label and
automatic base selection") broke the GPIO support on DaVinci boards
in legacy mode by allowing gpiolib to set the GPIO base automatically.

DaVinci board files use the legacy GPIO API with hard-coded GPIO line
numbers. Use the new fields in struct davinci_gpio_platform_data to
manually set the GPIO base to 0.

Fixes: 587f7a694f01 ("gpio: davinci: Use dev name for label and automatic base selection")
Cc: stable@vger.kernel.org
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
---
 arch/arm/mach-davinci/dm644x.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm/mach-davinci/dm644x.c b/arch/arm/mach-davinci/dm644x.c
index de1ec6dc01e94..38f92b7d413ef 100644
--- a/arch/arm/mach-davinci/dm644x.c
+++ b/arch/arm/mach-davinci/dm644x.c
@@ -518,6 +518,8 @@ static struct resource dm644_gpio_resources[] = {
 };
 
 static struct davinci_gpio_platform_data dm644_gpio_platform_data = {
+	.no_auto_base	= true,
+	.base		= 0,
 	.ngpio		= 71,
 };
 
-- 
GitLab


From 2b9034b5eaddc09c0e9529b93446eb975f97f814 Mon Sep 17 00:00:00 2001
From: David Miller <davem@davemloft.net>
Date: Mon, 26 Nov 2018 21:55:09 -0800
Subject: [PATCH 1436/1890] sparc: Adjust bpf JIT prologue for PSEUDO calls.

Move all arguments into output registers from input registers.

This path is exercised by test_verifier.c's "calls: two calls with
args" test.  Adjust BPF_TAILCALL_PROLOGUE_SKIP as needed.

Let's also make the prologue length a constant size regardless of
the combination of ->saw_frame_pointer and ->saw_tail_call
settings.

Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 arch/sparc/net/bpf_jit_comp_64.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
index ec4da4dc98f12..5fda4f7bf15d1 100644
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -791,7 +791,7 @@ static int emit_compare_and_branch(const u8 code, const u8 dst, u8 src,
 }
 
 /* Just skip the save instruction and the ctx register move.  */
-#define BPF_TAILCALL_PROLOGUE_SKIP	16
+#define BPF_TAILCALL_PROLOGUE_SKIP	32
 #define BPF_TAILCALL_CNT_SP_OFF		(STACK_BIAS + 128)
 
 static void build_prologue(struct jit_ctx *ctx)
@@ -824,9 +824,15 @@ static void build_prologue(struct jit_ctx *ctx)
 		const u8 vfp = bpf2sparc[BPF_REG_FP];
 
 		emit(ADD | IMMED | RS1(FP) | S13(STACK_BIAS) | RD(vfp), ctx);
+	} else {
+		emit_nop(ctx);
 	}
 
 	emit_reg_move(I0, O0, ctx);
+	emit_reg_move(I1, O1, ctx);
+	emit_reg_move(I2, O2, ctx);
+	emit_reg_move(I3, O3, ctx);
+	emit_reg_move(I4, O4, ctx);
 	/* If you add anything here, adjust BPF_TAILCALL_PROLOGUE_SKIP above. */
 }
 
-- 
GitLab


From 2a48602615e0a2f563549c7d5c8d507f904cf96e Mon Sep 17 00:00:00 2001
From: Chanho Park <parkch98@gmail.com>
Date: Thu, 22 Nov 2018 18:23:47 +0900
Subject: [PATCH 1437/1890] tty: do not set TTY_IO_ERROR flag if console port

Since Commit 761ed4a94582 ('tty: serial_core: convert uart_close to use
tty_port_close') and Commit 4dda864d7307 ('tty: serial_core: Fix serial
console crash on port shutdown), a serial port which is used as
console can be stuck when logging out if there is a remained process.
After logged out, agetty will try to grab the serial port but it will
be failed because the previous process did not release the port
correctly. To fix this, TTY_IO_ERROR bit should not be enabled of
tty_port_close if the port is console port.

Reproduce step:
- Run background processes from serial console
$ while true; do sleep 10; done &

- Log out
$ logout
-> Stuck

- Read journal log by journalctl | tail
Jan 28 16:07:01 ubuntu systemd[1]: Stopped Serial Getty on ttyAMA0.
Jan 28 16:07:01 ubuntu systemd[1]: Started Serial Getty on ttyAMA0.
Jan 28 16:07:02 ubuntu agetty[1643]: /dev/ttyAMA0: not a tty

Fixes: 761ed4a94582 ("tty: serial_core: convert uart_close to use tty_port_close")
Cc: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: Rob Herring <robh@kernel.org>
Cc: Jiri Slaby <jslaby@suse.com>
Signed-off-by: Chanho Park <parkch98@gmail.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/tty_port.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/tty/tty_port.c b/drivers/tty/tty_port.c
index cb6075096a5b4..044c3cbdcfa40 100644
--- a/drivers/tty/tty_port.c
+++ b/drivers/tty/tty_port.c
@@ -633,7 +633,8 @@ void tty_port_close(struct tty_port *port, struct tty_struct *tty,
 	if (tty_port_close_start(port, tty, filp) == 0)
 		return;
 	tty_port_shutdown(port, tty);
-	set_bit(TTY_IO_ERROR, &tty->flags);
+	if (!port->console)
+		set_bit(TTY_IO_ERROR, &tty->flags);
 	tty_port_close_end(port, tty);
 	tty_port_tty_set(port, NULL);
 }
-- 
GitLab


From e5f5b717983bccfa033282e9886811635602510e Mon Sep 17 00:00:00 2001
From: xingaopeng <xingaopeng@huawei.com>
Date: Sat, 24 Nov 2018 19:21:59 +0800
Subject: [PATCH 1438/1890] ext2: initialize opts.s_mount_opt as zero before
 using it

We need to initialize opts.s_mount_opt as zero before using it, else we
may get some unexpected mount options.

Fixes: 088519572ca8 ("ext2: Parse mount options into a dedicated structure")
CC: stable@vger.kernel.org
Signed-off-by: xingaopeng <xingaopeng@huawei.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext2/super.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index cb91baa4275d8..eb11502e3fcd4 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -892,6 +892,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
 	if (sb->s_magic != EXT2_SUPER_MAGIC)
 		goto cantfind_ext2;
 
+	opts.s_mount_opt = 0;
 	/* Set defaults before we parse the mount options */
 	def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
 	if (def_mount_opts & EXT2_DEFM_DEBUG)
-- 
GitLab


From ecebf55d27a11538ea84aee0be643dd953f830d5 Mon Sep 17 00:00:00 2001
From: Pan Bian <bianpan2016@163.com>
Date: Sun, 25 Nov 2018 08:58:02 +0800
Subject: [PATCH 1439/1890] ext2: fix potential use after free

The function ext2_xattr_set calls brelse(bh) to drop the reference count
of bh. After that, bh may be freed. However, following brelse(bh),
it reads bh->b_data via macro HDR(bh). This may result in a
use-after-free bug. This patch moves brelse(bh) after reading field.

CC: stable@vger.kernel.org
Signed-off-by: Pan Bian <bianpan2016@163.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext2/xattr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 62d9a659a8ff4..dd8f10db82e99 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -612,9 +612,9 @@ bad_block:		ext2_error(sb, "ext2_xattr_set",
 	}
 
 cleanup:
-	brelse(bh);
 	if (!(bh && header == HDR(bh)))
 		kfree(header);
+	brelse(bh);
 	up_write(&EXT2_I(inode)->xattr_sem);
 
 	return error;
-- 
GitLab


From 104f708fd1241b22f808bdf066ab67dc5a051de5 Mon Sep 17 00:00:00 2001
From: Harald Freudenberger <freude@linux.ibm.com>
Date: Fri, 9 Nov 2018 14:59:24 +0100
Subject: [PATCH 1440/1890] s390/zcrypt: reinit ap queue state machine during
 device probe

Until the vfio-ap driver came into live there was a well known
agreement about the way how ap devices are initialized and their
states when the driver's probe function is called.

However, the vfio device driver when receiving an ap queue device does
additional resets thereby removing the registration for interrupts for
the ap device done by the ap bus core code. So when later the vfio
driver releases the device and one of the default zcrypt drivers takes
care of the device the interrupt registration needs to get
renewed. The current code does no renew and result is that requests
send into such a queue will never see a reply processed - the
application hangs.

This patch adds a function which resets the aq queue state machine for
the ap queue device and triggers the walk through the initial states
(which are reset and registration for interrupts). This function is
now called before the driver's probe function is invoked.

When the association between driver and device is released, the
driver's remove function is called. The current implementation calls a
ap queue function ap_queue_remove(). This invokation has been moved to
the ap bus function to make the probe / remove pair for ap bus and
drivers more symmetric.

Fixes: 7e0bdbe5c21c ("s390/zcrypt: AP bus support for alternate driver(s)")
Cc: stable@vger.kernel.org # 4.19+
Signed-off-by: Harald Freudenberger <freude@linux.ibm.com>
Reviewd-by: Tony Krowiak <akrowiak@linux.ibm.com>
Reviewd-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/crypto/ap_bus.c       |  8 ++++----
 drivers/s390/crypto/ap_bus.h       |  1 +
 drivers/s390/crypto/ap_queue.c     | 15 +++++++++++++++
 drivers/s390/crypto/zcrypt_cex2a.c |  1 -
 drivers/s390/crypto/zcrypt_cex2c.c |  1 -
 drivers/s390/crypto/zcrypt_cex4.c  |  1 -
 6 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
index 048665e4f13d4..9f5a201c4c878 100644
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c
@@ -775,6 +775,8 @@ static int ap_device_probe(struct device *dev)
 		drvres = ap_drv->flags & AP_DRIVER_FLAG_DEFAULT;
 		if (!!devres != !!drvres)
 			return -ENODEV;
+		/* (re-)init queue's state machine */
+		ap_queue_reinit_state(to_ap_queue(dev));
 	}
 
 	/* Add queue/card to list of active queues/cards */
@@ -807,6 +809,8 @@ static int ap_device_remove(struct device *dev)
 	struct ap_device *ap_dev = to_ap_dev(dev);
 	struct ap_driver *ap_drv = ap_dev->drv;
 
+	if (is_queue_dev(dev))
+		ap_queue_remove(to_ap_queue(dev));
 	if (ap_drv->remove)
 		ap_drv->remove(ap_dev);
 
@@ -1444,10 +1448,6 @@ static void ap_scan_bus(struct work_struct *unused)
 			aq->ap_dev.device.parent = &ac->ap_dev.device;
 			dev_set_name(&aq->ap_dev.device,
 				     "%02x.%04x", id, dom);
-			/* Start with a device reset */
-			spin_lock_bh(&aq->lock);
-			ap_wait(ap_sm_event(aq, AP_EVENT_POLL));
-			spin_unlock_bh(&aq->lock);
 			/* Register device */
 			rc = device_register(&aq->ap_dev.device);
 			if (rc) {
diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h
index 3eed1b36c876d..bfc66e4a9de14 100644
--- a/drivers/s390/crypto/ap_bus.h
+++ b/drivers/s390/crypto/ap_bus.h
@@ -254,6 +254,7 @@ struct ap_queue *ap_queue_create(ap_qid_t qid, int device_type);
 void ap_queue_remove(struct ap_queue *aq);
 void ap_queue_suspend(struct ap_device *ap_dev);
 void ap_queue_resume(struct ap_device *ap_dev);
+void ap_queue_reinit_state(struct ap_queue *aq);
 
 struct ap_card *ap_card_create(int id, int queue_depth, int raw_device_type,
 			       int comp_device_type, unsigned int functions);
diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c
index 66f7334bcb032..0aa4b3ccc948c 100644
--- a/drivers/s390/crypto/ap_queue.c
+++ b/drivers/s390/crypto/ap_queue.c
@@ -718,5 +718,20 @@ void ap_queue_remove(struct ap_queue *aq)
 {
 	ap_flush_queue(aq);
 	del_timer_sync(&aq->timeout);
+
+	/* reset with zero, also clears irq registration */
+	spin_lock_bh(&aq->lock);
+	ap_zapq(aq->qid);
+	aq->state = AP_STATE_BORKED;
+	spin_unlock_bh(&aq->lock);
 }
 EXPORT_SYMBOL(ap_queue_remove);
+
+void ap_queue_reinit_state(struct ap_queue *aq)
+{
+	spin_lock_bh(&aq->lock);
+	aq->state = AP_STATE_RESET_START;
+	ap_wait(ap_sm_event(aq, AP_EVENT_POLL));
+	spin_unlock_bh(&aq->lock);
+}
+EXPORT_SYMBOL(ap_queue_reinit_state);
diff --git a/drivers/s390/crypto/zcrypt_cex2a.c b/drivers/s390/crypto/zcrypt_cex2a.c
index 146f54f5cbb81..c50f3e86cc748 100644
--- a/drivers/s390/crypto/zcrypt_cex2a.c
+++ b/drivers/s390/crypto/zcrypt_cex2a.c
@@ -196,7 +196,6 @@ static void zcrypt_cex2a_queue_remove(struct ap_device *ap_dev)
 	struct ap_queue *aq = to_ap_queue(&ap_dev->device);
 	struct zcrypt_queue *zq = aq->private;
 
-	ap_queue_remove(aq);
 	if (zq)
 		zcrypt_queue_unregister(zq);
 }
diff --git a/drivers/s390/crypto/zcrypt_cex2c.c b/drivers/s390/crypto/zcrypt_cex2c.c
index 546f676767348..35c7c6672713b 100644
--- a/drivers/s390/crypto/zcrypt_cex2c.c
+++ b/drivers/s390/crypto/zcrypt_cex2c.c
@@ -251,7 +251,6 @@ static void zcrypt_cex2c_queue_remove(struct ap_device *ap_dev)
 	struct ap_queue *aq = to_ap_queue(&ap_dev->device);
 	struct zcrypt_queue *zq = aq->private;
 
-	ap_queue_remove(aq);
 	if (zq)
 		zcrypt_queue_unregister(zq);
 }
diff --git a/drivers/s390/crypto/zcrypt_cex4.c b/drivers/s390/crypto/zcrypt_cex4.c
index f9d4c6c7521d7..582ffa7e0f187 100644
--- a/drivers/s390/crypto/zcrypt_cex4.c
+++ b/drivers/s390/crypto/zcrypt_cex4.c
@@ -275,7 +275,6 @@ static void zcrypt_cex4_queue_remove(struct ap_device *ap_dev)
 	struct ap_queue *aq = to_ap_queue(&ap_dev->device);
 	struct zcrypt_queue *zq = aq->private;
 
-	ap_queue_remove(aq);
 	if (zq)
 		zcrypt_queue_unregister(zq);
 }
-- 
GitLab


From e8ed64b08eddc05043e556832616a478bbe4bb00 Mon Sep 17 00:00:00 2001
From: Girija Kumar Kasinadhuni <gkumar@neverware.com>
Date: Mon, 26 Nov 2018 13:40:46 -0500
Subject: [PATCH 1441/1890] ALSA: hda/realtek - Add auto-mute quirk for HP
 Spectre x360 laptop

This device makes a loud buzzing sound when a headphone is inserted while
playing audio at full volume through the speaker.

Fixes: bbf8ff6b1d2a ("ALSA: hda/realtek - Fixup for HP x360 laptops with B&O speakers")
Signed-off-by: Girija Kumar Kasinadhuni <gkumar@neverware.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index e66da22272fdf..7d16dbb18e416 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -5506,6 +5506,7 @@ enum {
 	ALC295_FIXUP_HP_X360,
 	ALC221_FIXUP_HP_HEADSET_MIC,
 	ALC285_FIXUP_LENOVO_HEADPHONE_NOISE,
+	ALC295_FIXUP_HP_AUTO_MUTE,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -5670,6 +5671,8 @@ static const struct hda_fixup alc269_fixups[] = {
 	[ALC269_FIXUP_HP_MUTE_LED_MIC3] = {
 		.type = HDA_FIXUP_FUNC,
 		.v.func = alc269_fixup_hp_mute_led_mic3,
+		.chained = true,
+		.chain_id = ALC295_FIXUP_HP_AUTO_MUTE
 	},
 	[ALC269_FIXUP_HP_GPIO_LED] = {
 		.type = HDA_FIXUP_FUNC,
@@ -6377,6 +6380,10 @@ static const struct hda_fixup alc269_fixups[] = {
 		.type = HDA_FIXUP_FUNC,
 		.v.func = alc285_fixup_invalidate_dacs,
 	},
+	[ALC295_FIXUP_HP_AUTO_MUTE] = {
+		.type = HDA_FIXUP_FUNC,
+		.v.func = alc_fixup_auto_mute_via_amp,
+	},
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
-- 
GitLab


From 1078bef0cd9291355a20369b21cd823026ab8eaa Mon Sep 17 00:00:00 2001
From: Kailang Yang <kailang@realtek.com>
Date: Thu, 8 Nov 2018 16:36:15 +0800
Subject: [PATCH 1442/1890] ALSA: hda/realtek - Support ALC300

This patch will enable ALC300.

[ It's almost equivalent with other ALC269-compatible ones, and
  apparently has no loopback mixer -- tiwai ]

Signed-off-by: Kailang Yang <kailang@realtek.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 7d16dbb18e416..06f93032d0ccf 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -388,6 +388,7 @@ static void alc_fill_eapd_coef(struct hda_codec *codec)
 	case 0x10ec0285:
 	case 0x10ec0298:
 	case 0x10ec0289:
+	case 0x10ec0300:
 		alc_update_coef_idx(codec, 0x10, 1<<9, 0);
 		break;
 	case 0x10ec0275:
@@ -2830,6 +2831,7 @@ enum {
 	ALC269_TYPE_ALC215,
 	ALC269_TYPE_ALC225,
 	ALC269_TYPE_ALC294,
+	ALC269_TYPE_ALC300,
 	ALC269_TYPE_ALC700,
 };
 
@@ -2864,6 +2866,7 @@ static int alc269_parse_auto_config(struct hda_codec *codec)
 	case ALC269_TYPE_ALC215:
 	case ALC269_TYPE_ALC225:
 	case ALC269_TYPE_ALC294:
+	case ALC269_TYPE_ALC300:
 	case ALC269_TYPE_ALC700:
 		ssids = alc269_ssids;
 		break;
@@ -7323,6 +7326,10 @@ static int patch_alc269(struct hda_codec *codec)
 		spec->gen.mixer_nid = 0; /* ALC2x4 does not have any loopback mixer path */
 		alc_update_coef_idx(codec, 0x6b, 0x0018, (1<<4) | (1<<3)); /* UAJ MIC Vref control by verb */
 		break;
+	case 0x10ec0300:
+		spec->codec_variant = ALC269_TYPE_ALC300;
+		spec->gen.mixer_nid = 0; /* no loopback on ALC300 */
+		break;
 	case 0x10ec0700:
 	case 0x10ec0701:
 	case 0x10ec0703:
@@ -8433,6 +8440,7 @@ static const struct hda_device_id snd_hda_id_realtek[] = {
 	HDA_CODEC_ENTRY(0x10ec0295, "ALC295", patch_alc269),
 	HDA_CODEC_ENTRY(0x10ec0298, "ALC298", patch_alc269),
 	HDA_CODEC_ENTRY(0x10ec0299, "ALC299", patch_alc269),
+	HDA_CODEC_ENTRY(0x10ec0300, "ALC300", patch_alc269),
 	HDA_CODEC_REV_ENTRY(0x10ec0861, 0x100340, "ALC660", patch_alc861),
 	HDA_CODEC_ENTRY(0x10ec0660, "ALC660-VD", patch_alc861vd),
 	HDA_CODEC_ENTRY(0x10ec0861, "ALC861", patch_alc861),
-- 
GitLab


From 15cb127e3c8f6232096d5dba6a5b4046bc292d70 Mon Sep 17 00:00:00 2001
From: Gustavo Pimentel <gustavo.pimentel@synopsys.com>
Date: Fri, 23 Nov 2018 18:00:21 +0100
Subject: [PATCH 1443/1890] PCI: dwc: Fix MSI-X EP framework address
 calculation bug

Fix an error caused by 3-bit right rotation on offset address
calculation of MSI-X table in dw_pcie_ep_raise_msix_irq().

The initial testing code was setting by default the offset address of
MSI-X table to zero, so that even with a 3-bit right rotation the
computed result would still be zero and valid, therefore this bug went
unnoticed.

Fixes: beb4641a787d ("PCI: dwc: Add MSI-X callbacks handler")
Signed-off-by: Gustavo Pimentel <gustavo.pimentel@synopsys.com>
[lorenzo.pieralisi@arm.com: updated commit log]
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: stable@vger.kernel.org
---
 drivers/pci/controller/dwc/pcie-designware-ep.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c
index 1e7b02221eac9..de8635af4cde2 100644
--- a/drivers/pci/controller/dwc/pcie-designware-ep.c
+++ b/drivers/pci/controller/dwc/pcie-designware-ep.c
@@ -440,7 +440,6 @@ int dw_pcie_ep_raise_msix_irq(struct dw_pcie_ep *ep, u8 func_no,
 	tbl_offset = dw_pcie_readl_dbi(pci, reg);
 	bir = (tbl_offset & PCI_MSIX_TABLE_BIR);
 	tbl_offset &= PCI_MSIX_TABLE_OFFSET;
-	tbl_offset >>= 3;
 
 	reg = PCI_BASE_ADDRESS_0 + (4 * bir);
 	bar_addr_upper = 0;
-- 
GitLab


From a87c99e61236ba8ca962ce97a19fab5ebd588d35 Mon Sep 17 00:00:00 2001
From: Luiz Capitulino <lcapitulino@redhat.com>
Date: Fri, 23 Nov 2018 12:02:14 -0500
Subject: [PATCH 1444/1890] KVM: VMX: re-add ple_gap module parameter

Apparently, the ple_gap parameter was accidentally removed
by commit c8e88717cfc6b36bedea22368d97667446318291. Add it
back.

Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
Cc: stable@vger.kernel.org
Fixes: c8e88717cfc6b36bedea22368d97667446318291
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 4555077d69ce2..be6f13f1c25f7 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -174,6 +174,7 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
  * refer SDM volume 3b section 21.6.13 & 22.1.3.
  */
 static unsigned int ple_gap = KVM_DEFAULT_PLE_GAP;
+module_param(ple_gap, uint, 0444);
 
 static unsigned int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
 module_param(ple_window, uint, 0444);
-- 
GitLab


From 38ab012f109caf10f471db1adf284e620dd8d701 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <kernellwp@gmail.com>
Date: Tue, 20 Nov 2018 09:39:30 +0800
Subject: [PATCH 1445/1890] KVM: LAPIC: Fix pv ipis use-before-initialization
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reported by syzkaller:

 BUG: unable to handle kernel NULL pointer dereference at 0000000000000014
 PGD 800000040410c067 P4D 800000040410c067 PUD 40410d067 PMD 0
 Oops: 0000 [#1] PREEMPT SMP PTI
 CPU: 3 PID: 2567 Comm: poc Tainted: G           OE     4.19.0-rc5 #16
 RIP: 0010:kvm_pv_send_ipi+0x94/0x350 [kvm]
 Call Trace:
  kvm_emulate_hypercall+0x3cc/0x700 [kvm]
  handle_vmcall+0xe/0x10 [kvm_intel]
  vmx_handle_exit+0xc1/0x11b0 [kvm_intel]
  vcpu_enter_guest+0x9fb/0x1910 [kvm]
  kvm_arch_vcpu_ioctl_run+0x35c/0x610 [kvm]
  kvm_vcpu_ioctl+0x3e9/0x6d0 [kvm]
  do_vfs_ioctl+0xa5/0x690
  ksys_ioctl+0x6d/0x80
  __x64_sys_ioctl+0x1a/0x20
  do_syscall_64+0x83/0x6e0
  entry_SYSCALL_64_after_hwframe+0x49/0xbe

The reason is that the apic map has not yet been initialized, the testcase
triggers pv_send_ipi interface by vmcall which results in kvm->arch.apic_map
is dereferenced. This patch fixes it by checking whether or not apic map is
NULL and bailing out immediately if that is the case.

Fixes: 4180bf1b65 (KVM: X86: Implement "send IPI" hypercall)
Reported-by: Wei Wu <ww9210@gmail.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim KrÄmÃ¡Å™ <rkrcmar@redhat.com>
Cc: Wei Wu <ww9210@gmail.com>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/lapic.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 89db20f8cb707..02f2291dcf7e7 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -576,6 +576,11 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
 	rcu_read_lock();
 	map = rcu_dereference(kvm->arch.apic_map);
 
+	if (unlikely(!map)) {
+		count = -EOPNOTSUPP;
+		goto out;
+	}
+
 	if (min > map->max_apic_id)
 		goto out;
 	/* Bits above cluster_size are masked in the caller.  */
-- 
GitLab


From e97f852fd4561e77721bb9a4e0ea9d98305b1e93 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpengli@tencent.com>
Date: Tue, 20 Nov 2018 16:34:18 +0800
Subject: [PATCH 1446/1890] KVM: X86: Fix scan ioapic use-before-initialization
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reported by syzkaller:

 BUG: unable to handle kernel NULL pointer dereference at 00000000000001c8
 PGD 80000003ec4da067 P4D 80000003ec4da067 PUD 3f7bfa067 PMD 0
 Oops: 0000 [#1] PREEMPT SMP PTI
 CPU: 7 PID: 5059 Comm: debug Tainted: G           OE     4.19.0-rc5 #16
 RIP: 0010:__lock_acquire+0x1a6/0x1990
 Call Trace:
  lock_acquire+0xdb/0x210
  _raw_spin_lock+0x38/0x70
  kvm_ioapic_scan_entry+0x3e/0x110 [kvm]
  vcpu_enter_guest+0x167e/0x1910 [kvm]
  kvm_arch_vcpu_ioctl_run+0x35c/0x610 [kvm]
  kvm_vcpu_ioctl+0x3e9/0x6d0 [kvm]
  do_vfs_ioctl+0xa5/0x690
  ksys_ioctl+0x6d/0x80
  __x64_sys_ioctl+0x1a/0x20
  do_syscall_64+0x83/0x6e0
  entry_SYSCALL_64_after_hwframe+0x49/0xbe

The reason is that the testcase writes hyperv synic HV_X64_MSR_SINT6 msr
and triggers scan ioapic logic to load synic vectors into EOI exit bitmap.
However, irqchip is not initialized by this simple testcase, ioapic/apic
objects should not be accessed.
This can be triggered by the following program:

    #define _GNU_SOURCE

    #include <endian.h>
    #include <stdint.h>
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include <sys/syscall.h>
    #include <sys/types.h>
    #include <unistd.h>

    uint64_t r[3] = {0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff};

    int main(void)
    {
    	syscall(__NR_mmap, 0x20000000, 0x1000000, 3, 0x32, -1, 0);
    	long res = 0;
    	memcpy((void*)0x20000040, "/dev/kvm", 9);
    	res = syscall(__NR_openat, 0xffffffffffffff9c, 0x20000040, 0, 0);
    	if (res != -1)
    		r[0] = res;
    	res = syscall(__NR_ioctl, r[0], 0xae01, 0);
    	if (res != -1)
    		r[1] = res;
    	res = syscall(__NR_ioctl, r[1], 0xae41, 0);
    	if (res != -1)
    		r[2] = res;
    	memcpy(
    			(void*)0x20000080,
    			"\x01\x00\x00\x00\x00\x5b\x61\xbb\x96\x00\x00\x40\x00\x00\x00\x00\x01\x00"
    			"\x08\x00\x00\x00\x00\x00\x0b\x77\xd1\x78\x4d\xd8\x3a\xed\xb1\x5c\x2e\x43"
    			"\xaa\x43\x39\xd6\xff\xf5\xf0\xa8\x98\xf2\x3e\x37\x29\x89\xde\x88\xc6\x33"
    			"\xfc\x2a\xdb\xb7\xe1\x4c\xac\x28\x61\x7b\x9c\xa9\xbc\x0d\xa0\x63\xfe\xfe"
    			"\xe8\x75\xde\xdd\x19\x38\xdc\x34\xf5\xec\x05\xfd\xeb\x5d\xed\x2e\xaf\x22"
    			"\xfa\xab\xb7\xe4\x42\x67\xd0\xaf\x06\x1c\x6a\x35\x67\x10\x55\xcb",
    			106);
    	syscall(__NR_ioctl, r[2], 0x4008ae89, 0x20000080);
    	syscall(__NR_ioctl, r[2], 0xae80, 0);
    	return 0;
    }

This patch fixes it by bailing out scan ioapic if ioapic is not initialized in
kernel.

Reported-by: Wei Wu <ww9210@gmail.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim KrÄmÃ¡Å™ <rkrcmar@redhat.com>
Cc: Wei Wu <ww9210@gmail.com>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5cd5647120f2b..64cae03b2c20a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7455,7 +7455,8 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
 	else {
 		if (vcpu->arch.apicv_active)
 			kvm_x86_ops->sync_pir_to_irr(vcpu);
-		kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
+		if (ioapic_in_kernel(vcpu->kvm))
+			kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
 	}
 
 	if (is_guest_mode(vcpu))
-- 
GitLab


From 30510387a5e45bfcf8190e03ec7aa15b295828e2 Mon Sep 17 00:00:00 2001
From: Wei Wang <wawei@amazon.de>
Date: Mon, 12 Nov 2018 12:23:14 +0000
Subject: [PATCH 1447/1890] svm: Add mutex_lock to protect
 apic_access_page_done on AMD systems

There is a race condition when accessing kvm->arch.apic_access_page_done.
Due to it, x86_set_memory_region will fail when creating the second vcpu
for a svm guest.

Add a mutex_lock to serialize the accesses to apic_access_page_done.
This lock is also used by vmx for the same purpose.

Signed-off-by: Wei Wang <wawei@amazon.de>
Signed-off-by: Amadeusz Juskowiak <ajusk@amazon.de>
Signed-off-by: Julian Stecklina <jsteckli@amazon.de>
Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Reviewed-by: Joerg Roedel <jroedel@suse.de>
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 0e21ccc46792f..033ec01661b93 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1664,20 +1664,23 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
 static int avic_init_access_page(struct kvm_vcpu *vcpu)
 {
 	struct kvm *kvm = vcpu->kvm;
-	int ret;
+	int ret = 0;
 
+	mutex_lock(&kvm->slots_lock);
 	if (kvm->arch.apic_access_page_done)
-		return 0;
+		goto out;
 
-	ret = x86_set_memory_region(kvm,
-				    APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
-				    APIC_DEFAULT_PHYS_BASE,
-				    PAGE_SIZE);
+	ret = __x86_set_memory_region(kvm,
+				      APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
+				      APIC_DEFAULT_PHYS_BASE,
+				      PAGE_SIZE);
 	if (ret)
-		return ret;
+		goto out;
 
 	kvm->arch.apic_access_page_done = true;
-	return 0;
+out:
+	mutex_unlock(&kvm->slots_lock);
+	return ret;
 }
 
 static int avic_init_backing_page(struct kvm_vcpu *vcpu)
-- 
GitLab


From 7f9ad1dfa3c768d1116c2dbacd7a09f9a871534e Mon Sep 17 00:00:00 2001
From: Liran Alon <liran.alon@oracle.com>
Date: Sat, 17 Nov 2018 14:05:06 +0200
Subject: [PATCH 1448/1890] KVM: nVMX: Fix kernel info-leak when enabling
 KVM_CAP_HYPERV_ENLIGHTENED_VMCS more than once

Consider the case that userspace enables KVM_CAP_HYPERV_ENLIGHTENED_VMCS twice:
1) kvm_vcpu_ioctl_enable_cap() is called to enable
KVM_CAP_HYPERV_ENLIGHTENED_VMCS which calls nested_enable_evmcs().
2) nested_enable_evmcs() sets enlightened_vmcs_enabled to true and fills
vmcs_version which is then copied to userspace.
3) kvm_vcpu_ioctl_enable_cap() is called again to enable
KVM_CAP_HYPERV_ENLIGHTENED_VMCS which calls nested_enable_evmcs().
4) This time nested_enable_evmcs() just returns 0 as
enlightened_vmcs_enabled is already true. *Without filling
vmcs_version*.
5) kvm_vcpu_ioctl_enable_cap() continues as usual and copies
*uninitialized* vmcs_version to userspace which leads to kernel info-leak.

Fix this issue by simply changing nested_enable_evmcs() to always fill
vmcs_version output argument. Even when enlightened_vmcs_enabled is
already set to true.

Note that SVM's nested_enable_evmcs() should not be modified because it
always returns a non-zero value (-ENODEV) which results in
kvm_vcpu_ioctl_enable_cap() skipping the copy of vmcs_version to
userspace (as it should).

Fixes: 57b119da3594 ("KVM: nVMX: add KVM_CAP_HYPERV_ENLIGHTENED_VMCS capability")
Reported-by: syzbot+cfbc368e283d381f8cef@syzkaller.appspotmail.com
Reviewed-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Liran Alon <liran.alon@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index be6f13f1c25f7..065f1df740004 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1611,12 +1611,6 @@ static int nested_enable_evmcs(struct kvm_vcpu *vcpu,
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
-	/* We don't support disabling the feature for simplicity. */
-	if (vmx->nested.enlightened_vmcs_enabled)
-		return 0;
-
-	vmx->nested.enlightened_vmcs_enabled = true;
-
 	/*
 	 * vmcs_version represents the range of supported Enlightened VMCS
 	 * versions: lower 8 bits is the minimal version, higher 8 bits is the
@@ -1626,6 +1620,12 @@ static int nested_enable_evmcs(struct kvm_vcpu *vcpu,
 	if (vmcs_version)
 		*vmcs_version = (KVM_EVMCS_VERSION << 8) | 1;
 
+	/* We don't support disabling the feature for simplicity. */
+	if (vmx->nested.enlightened_vmcs_enabled)
+		return 0;
+
+	vmx->nested.enlightened_vmcs_enabled = true;
+
 	vmx->nested.msrs.pinbased_ctls_high &= ~EVMCS1_UNSUPPORTED_PINCTRL;
 	vmx->nested.msrs.entry_ctls_high &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL;
 	vmx->nested.msrs.exit_ctls_high &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL;
-- 
GitLab


From bcbfbd8ec21096027f1ee13ce6c185e8175166f6 Mon Sep 17 00:00:00 2001
From: Liran Alon <liran.alon@oracle.com>
Date: Thu, 8 Nov 2018 00:43:06 +0200
Subject: [PATCH 1449/1890] KVM: x86: Fix kernel info-leak in
 KVM_HC_CLOCK_PAIRING hypercall

kvm_pv_clock_pairing() allocates local var
"struct kvm_clock_pairing clock_pairing" on stack and initializes
all it's fields besides padding (clock_pairing.pad[]).

Because clock_pairing var is written completely (including padding)
to guest memory, failure to init struct padding results in kernel
info-leak.

Fix the issue by making sure to also init the padding with zeroes.

Fixes: 55dd00a73a51 ("KVM: x86: add KVM_HC_CLOCK_PAIRING hypercall")
Reported-by: syzbot+a8ef68d71211ba264f56@syzkaller.appspotmail.com
Reviewed-by: Mark Kanda <mark.kanda@oracle.com>
Signed-off-by: Liran Alon <liran.alon@oracle.com>
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 64cae03b2c20a..7e4be1f2f253d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6918,6 +6918,7 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
 	clock_pairing.nsec = ts.tv_nsec;
 	clock_pairing.tsc = kvm_read_l1_tsc(vcpu, cycle);
 	clock_pairing.flags = 0;
+	memset(&clock_pairing.pad, 0, sizeof(clock_pairing.pad));
 
 	ret = 0;
 	if (kvm_write_guest(vcpu->kvm, paddr, &clock_pairing,
-- 
GitLab


From f48b4711dd6e1cf282f9dfd159c14a305909c97c Mon Sep 17 00:00:00 2001
From: Liran Alon <liran.alon@oracle.com>
Date: Tue, 20 Nov 2018 18:03:25 +0200
Subject: [PATCH 1450/1890] KVM: VMX: Update shared MSRs to be saved/restored
 on MSR_EFER.LMA changes

When guest transitions from/to long-mode by modifying MSR_EFER.LMA,
the list of shared MSRs to be saved/restored on guest<->host
transitions is updated (See vmx_set_efer() call to setup_msrs()).

On every entry to guest, vcpu_enter_guest() calls
vmx_prepare_switch_to_guest(). This function should also take care
of setting the shared MSRs to be saved/restored. However, the
function does nothing in case we are already running with loaded
guest state (vmx->loaded_cpu_state != NULL).

This means that even when guest modifies MSR_EFER.LMA which results
in updating the list of shared MSRs, it isn't being taken into account
by vmx_prepare_switch_to_guest() because it happens while we are
running with loaded guest state.

To fix above mentioned issue, add a flag to mark that the list of
shared MSRs has been updated and modify vmx_prepare_switch_to_guest()
to set shared MSRs when running with host state *OR* list of shared
MSRs has been updated.

Note that this issue was mistakenly introduced by commit
678e315e78a7 ("KVM: vmx: add dedicated utility to access guest's
kernel_gs_base") because previously vmx_set_efer() always called
vmx_load_host_state() which resulted in vmx_prepare_switch_to_guest() to
set shared MSRs.

Fixes: 678e315e78a7 ("KVM: vmx: add dedicated utility to access guest's kernel_gs_base")
Reported-by: Eyal Moscovici <eyal.moscovici@oracle.com>
Reviewed-by: Mihai Carabas <mihai.carabas@oracle.com>
Reviewed-by: Liam Merwick <liam.merwick@oracle.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Liran Alon <liran.alon@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 065f1df740004..d09d673100125 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -985,6 +985,7 @@ struct vcpu_vmx {
 	struct shared_msr_entry *guest_msrs;
 	int                   nmsrs;
 	int                   save_nmsrs;
+	bool                  guest_msrs_dirty;
 	unsigned long	      host_idt_base;
 #ifdef CONFIG_X86_64
 	u64 		      msr_host_kernel_gs_base;
@@ -2898,6 +2899,20 @@ static void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
 
 	vmx->req_immediate_exit = false;
 
+	/*
+	 * Note that guest MSRs to be saved/restored can also be changed
+	 * when guest state is loaded. This happens when guest transitions
+	 * to/from long-mode by setting MSR_EFER.LMA.
+	 */
+	if (!vmx->loaded_cpu_state || vmx->guest_msrs_dirty) {
+		vmx->guest_msrs_dirty = false;
+		for (i = 0; i < vmx->save_nmsrs; ++i)
+			kvm_set_shared_msr(vmx->guest_msrs[i].index,
+					   vmx->guest_msrs[i].data,
+					   vmx->guest_msrs[i].mask);
+
+	}
+
 	if (vmx->loaded_cpu_state)
 		return;
 
@@ -2958,11 +2973,6 @@ static void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
 		vmcs_writel(HOST_GS_BASE, gs_base);
 		host_state->gs_base = gs_base;
 	}
-
-	for (i = 0; i < vmx->save_nmsrs; ++i)
-		kvm_set_shared_msr(vmx->guest_msrs[i].index,
-				   vmx->guest_msrs[i].data,
-				   vmx->guest_msrs[i].mask);
 }
 
 static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
@@ -3437,6 +3447,7 @@ static void setup_msrs(struct vcpu_vmx *vmx)
 		move_msr_up(vmx, index, save_nmsrs++);
 
 	vmx->save_nmsrs = save_nmsrs;
+	vmx->guest_msrs_dirty = true;
 
 	if (cpu_has_vmx_msr_bitmap())
 		vmx_update_msr_bitmap(&vmx->vcpu);
-- 
GitLab


From 354cb410d87314e2eda344feea84809e4261570a Mon Sep 17 00:00:00 2001
From: Yi Wang <wang.yi59@zte.com.cn>
Date: Thu, 8 Nov 2018 16:48:36 +0800
Subject: [PATCH 1451/1890] KVM: x86: fix empty-body warnings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We get the following warnings about empty statements when building
with 'W=1':

arch/x86/kvm/lapic.c:632:53: warning: suggest braces around empty body in an â€˜ifâ€™ statement [-Wempty-body]
arch/x86/kvm/lapic.c:1907:42: warning: suggest braces around empty body in an â€˜ifâ€™ statement [-Wempty-body]
arch/x86/kvm/lapic.c:1936:65: warning: suggest braces around empty body in an â€˜ifâ€™ statement [-Wempty-body]
arch/x86/kvm/lapic.c:1975:44: warning: suggest braces around empty body in an â€˜ifâ€™ statement [-Wempty-body]

Rework the debug helper macro to get rid of these warnings.

Signed-off-by: Yi Wang <wang.yi59@zte.com.cn>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/lapic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 02f2291dcf7e7..c4533d05c214b 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -55,7 +55,7 @@
 #define PRIo64 "o"
 
 /* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */
-#define apic_debug(fmt, arg...)
+#define apic_debug(fmt, arg...) do {} while (0)
 
 /* 14 is the version for Xeon and Pentium 8.4.8*/
 #define APIC_VERSION			(0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16))
-- 
GitLab


From 1e4329ee2c52692ea42cc677fb2133519718b34a Mon Sep 17 00:00:00 2001
From: Yi Wang <wang.yi59@zte.com.cn>
Date: Thu, 8 Nov 2018 11:22:21 +0800
Subject: [PATCH 1452/1890] x86/kvm/vmx: fix old-style function declaration
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The inline keyword which is not at the beginning of the function
declaration may trigger the following build warnings, so let's fix it:

arch/x86/kvm/vmx.c:1309:1: warning: â€˜inlineâ€™ is not at beginning of declaration [-Wold-style-declaration]
arch/x86/kvm/vmx.c:5947:1: warning: â€˜inlineâ€™ is not at beginning of declaration [-Wold-style-declaration]
arch/x86/kvm/vmx.c:5985:1: warning: â€˜inlineâ€™ is not at beginning of declaration [-Wold-style-declaration]
arch/x86/kvm/vmx.c:6023:1: warning: â€˜inlineâ€™ is not at beginning of declaration [-Wold-style-declaration]

Signed-off-by: Yi Wang <wang.yi59@zte.com.cn>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d09d673100125..5f43fcfc225bb 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1308,7 +1308,7 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked);
 static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
 					    u16 error_code);
 static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
-static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
+static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
 							  u32 msr, int type);
 
 static DEFINE_PER_CPU(struct vmcs *, vmxarea);
@@ -5956,7 +5956,7 @@ static void free_vpid(int vpid)
 	spin_unlock(&vmx_vpid_lock);
 }
 
-static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
+static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
 							  u32 msr, int type)
 {
 	int f = sizeof(unsigned long);
@@ -5994,7 +5994,7 @@ static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bit
 	}
 }
 
-static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
+static __always_inline void vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
 							 u32 msr, int type)
 {
 	int f = sizeof(unsigned long);
@@ -6032,7 +6032,7 @@ static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitm
 	}
 }
 
-static void __always_inline vmx_set_intercept_for_msr(unsigned long *msr_bitmap,
+static __always_inline void vmx_set_intercept_for_msr(unsigned long *msr_bitmap,
 			     			      u32 msr, int type, bool value)
 {
 	if (value)
-- 
GitLab


From 326e742533bf0a23f0127d8ea62fb558ba665f08 Mon Sep 17 00:00:00 2001
From: Leonid Shatz <leonid.shatz@oracle.com>
Date: Tue, 6 Nov 2018 12:14:25 +0200
Subject: [PATCH 1453/1890] KVM: nVMX/nSVM: Fix bug which sets
 vcpu->arch.tsc_offset to L1 tsc_offset

Since commit e79f245ddec1 ("X86/KVM: Properly update 'tsc_offset' to
represent the running guest"), vcpu->arch.tsc_offset meaning was
changed to always reflect the tsc_offset value set on active VMCS.
Regardless if vCPU is currently running L1 or L2.

However, above mentioned commit failed to also change
kvm_vcpu_write_tsc_offset() to set vcpu->arch.tsc_offset correctly.
This is because vmx_write_tsc_offset() could set the tsc_offset value
in active VMCS to given offset parameter *plus vmcs12->tsc_offset*.
However, kvm_vcpu_write_tsc_offset() just sets vcpu->arch.tsc_offset
to given offset parameter. Without taking into account the possible
addition of vmcs12->tsc_offset. (Same is true for SVM case).

Fix this issue by changing kvm_x86_ops->write_tsc_offset() to return
actually set tsc_offset in active VMCS and modify
kvm_vcpu_write_tsc_offset() to set returned value in
vcpu->arch.tsc_offset.
In addition, rename write_tsc_offset() callback to write_l1_tsc_offset()
to make it clear that it is meant to set L1 TSC offset.

Fixes: e79f245ddec1 ("X86/KVM: Properly update 'tsc_offset' to represent the running guest")
Reviewed-by: Liran Alon <liran.alon@oracle.com>
Reviewed-by: Mihai Carabas <mihai.carabas@oracle.com>
Reviewed-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Signed-off-by: Leonid Shatz <leonid.shatz@oracle.com>
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  3 ++-
 arch/x86/kvm/svm.c              |  5 +++--
 arch/x86/kvm/vmx.c              | 21 +++++++++------------
 arch/x86/kvm/x86.c              |  6 +++---
 4 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 55e51ff7e421f..fbda5a917c5b7 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1094,7 +1094,8 @@ struct kvm_x86_ops {
 	bool (*has_wbinvd_exit)(void);
 
 	u64 (*read_l1_tsc_offset)(struct kvm_vcpu *vcpu);
-	void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
+	/* Returns actual tsc_offset set in active VMCS */
+	u64 (*write_l1_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
 
 	void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2);
 
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 033ec01661b93..a24733aade4c7 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1446,7 +1446,7 @@ static u64 svm_read_l1_tsc_offset(struct kvm_vcpu *vcpu)
 	return vcpu->arch.tsc_offset;
 }
 
-static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
+static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 	u64 g_tsc_offset = 0;
@@ -1464,6 +1464,7 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
 	svm->vmcb->control.tsc_offset = offset + g_tsc_offset;
 
 	mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
+	return svm->vmcb->control.tsc_offset;
 }
 
 static void avic_init_vmcb(struct vcpu_svm *svm)
@@ -7152,7 +7153,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
 	.has_wbinvd_exit = svm_has_wbinvd_exit,
 
 	.read_l1_tsc_offset = svm_read_l1_tsc_offset,
-	.write_tsc_offset = svm_write_tsc_offset,
+	.write_l1_tsc_offset = svm_write_l1_tsc_offset,
 
 	.set_tdp_cr3 = set_tdp_cr3,
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5f43fcfc225bb..764c23dc444fa 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3464,11 +3464,9 @@ static u64 vmx_read_l1_tsc_offset(struct kvm_vcpu *vcpu)
 	return vcpu->arch.tsc_offset;
 }
 
-/*
- * writes 'offset' into guest's timestamp counter offset register
- */
-static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
+static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
 {
+	u64 active_offset = offset;
 	if (is_guest_mode(vcpu)) {
 		/*
 		 * We're here if L1 chose not to trap WRMSR to TSC. According
@@ -3476,17 +3474,16 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
 		 * set for L2 remains unchanged, and still needs to be added
 		 * to the newly set TSC to get L2's TSC.
 		 */
-		struct vmcs12 *vmcs12;
-		/* recalculate vmcs02.TSC_OFFSET: */
-		vmcs12 = get_vmcs12(vcpu);
-		vmcs_write64(TSC_OFFSET, offset +
-			(nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETING) ?
-			 vmcs12->tsc_offset : 0));
+		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+		if (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETING))
+			active_offset += vmcs12->tsc_offset;
 	} else {
 		trace_kvm_write_tsc_offset(vcpu->vcpu_id,
 					   vmcs_read64(TSC_OFFSET), offset);
-		vmcs_write64(TSC_OFFSET, offset);
 	}
+
+	vmcs_write64(TSC_OFFSET, active_offset);
+	return active_offset;
 }
 
 /*
@@ -15074,7 +15071,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
 	.has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
 
 	.read_l1_tsc_offset = vmx_read_l1_tsc_offset,
-	.write_tsc_offset = vmx_write_tsc_offset,
+	.write_l1_tsc_offset = vmx_write_l1_tsc_offset,
 
 	.set_tdp_cr3 = vmx_set_cr3,
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7e4be1f2f253d..d02937760c3ba 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1665,8 +1665,7 @@ EXPORT_SYMBOL_GPL(kvm_read_l1_tsc);
 
 static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
 {
-	kvm_x86_ops->write_tsc_offset(vcpu, offset);
-	vcpu->arch.tsc_offset = offset;
+	vcpu->arch.tsc_offset = kvm_x86_ops->write_l1_tsc_offset(vcpu, offset);
 }
 
 static inline bool kvm_check_tsc_unstable(void)
@@ -1794,7 +1793,8 @@ EXPORT_SYMBOL_GPL(kvm_write_tsc);
 static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
 					   s64 adjustment)
 {
-	kvm_vcpu_write_tsc_offset(vcpu, vcpu->arch.tsc_offset + adjustment);
+	u64 tsc_offset = kvm_x86_ops->read_l1_tsc_offset(vcpu);
+	kvm_vcpu_write_tsc_offset(vcpu, tsc_offset + adjustment);
 }
 
 static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment)
-- 
GitLab


From 72aeb60c52bf74a0eeec77d6b41ce40145697d76 Mon Sep 17 00:00:00 2001
From: Liran Alon <liran.alon@oracle.com>
Date: Thu, 1 Nov 2018 10:57:39 +0200
Subject: [PATCH 1454/1890] KVM: nVMX: Verify eVMCS revision id match supported
 eVMCS version on eVMCS VMPTRLD

According to TLFS section 16.11.2 Enlightened VMCS, the first u32
field of eVMCS should specify eVMCS VersionNumber.

This version should be in the range of supported eVMCS versions exposed
to guest via CPUID.0x4000000A.EAX[0:15].
The range which KVM expose to guest in this CPUID field should be the
same as the value returned in vmcs_version by nested_enable_evmcs().

According to the above, eVMCS VMPTRLD should verify that version specified
in given eVMCS is in the supported range. However, current code
mistakenly verfies this field against VMCS12_REVISION.

One can also see that when KVM use eVMCS, it makes sure that
alloc_vmcs_cpu() sets allocated eVMCS revision_id to KVM_EVMCS_VERSION.

Obvious fix should just change eVMCS VMPTRLD to verify first u32 field
of eVMCS is equal to KVM_EVMCS_VERSION.
However, it turns out that Microsoft Hyper-V fails to comply to their
own invented interface: When Hyper-V use eVMCS, it just sets first u32
field of eVMCS to revision_id specified in MSR_IA32_VMX_BASIC (In our
case: VMCS12_REVISION). Instead of used eVMCS version number which is
one of the supported versions specified in CPUID.0x4000000A.EAX[0:15].
To overcome Hyper-V bug, we accept either a supported eVMCS version
or VMCS12_REVISION as valid values for first u32 field of eVMCS.

Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Reviewed-by: Nikita Leshenko <nikita.leshchenko@oracle.com>
Reviewed-by: Mark Kanda <mark.kanda@oracle.com>
Signed-off-by: Liran Alon <liran.alon@oracle.com>
Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 764c23dc444fa..0ffd8b2dbfe2a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -9378,7 +9378,30 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu,
 
 		vmx->nested.hv_evmcs = kmap(vmx->nested.hv_evmcs_page);
 
-		if (vmx->nested.hv_evmcs->revision_id != VMCS12_REVISION) {
+		/*
+		 * Currently, KVM only supports eVMCS version 1
+		 * (== KVM_EVMCS_VERSION) and thus we expect guest to set this
+		 * value to first u32 field of eVMCS which should specify eVMCS
+		 * VersionNumber.
+		 *
+		 * Guest should be aware of supported eVMCS versions by host by
+		 * examining CPUID.0x4000000A.EAX[0:15]. Host userspace VMM is
+		 * expected to set this CPUID leaf according to the value
+		 * returned in vmcs_version from nested_enable_evmcs().
+		 *
+		 * However, it turns out that Microsoft Hyper-V fails to comply
+		 * to their own invented interface: When Hyper-V use eVMCS, it
+		 * just sets first u32 field of eVMCS to revision_id specified
+		 * in MSR_IA32_VMX_BASIC. Instead of used eVMCS version number
+		 * which is one of the supported versions specified in
+		 * CPUID.0x4000000A.EAX[0:15].
+		 *
+		 * To overcome Hyper-V bug, we accept here either a supported
+		 * eVMCS version or VMCS12 revision_id as valid values for first
+		 * u32 field of eVMCS.
+		 */
+		if ((vmx->nested.hv_evmcs->revision_id != KVM_EVMCS_VERSION) &&
+		    (vmx->nested.hv_evmcs->revision_id != VMCS12_REVISION)) {
 			nested_release_evmcs(vcpu);
 			return 0;
 		}
-- 
GitLab


From 52ad7eb3d668867f9ee2e34d715cfb4860d36a93 Mon Sep 17 00:00:00 2001
From: Liran Alon <liran.alon@oracle.com>
Date: Tue, 13 Nov 2018 17:44:46 +0200
Subject: [PATCH 1455/1890] KVM: nVMX: vmcs12 revision_id is always
 VMCS12_REVISION even when copied from eVMCS

vmcs12 represents the per-CPU cache of L1 active vmcs12.

This cache can be loaded by one of the following:
1) Guest making a vmcs12 active by exeucting VMPTRLD
2) Guest specifying eVMCS in VP assist page and executing
VMLAUNCH/VMRESUME.

Either way, vmcs12 should have revision_id of VMCS12_REVISION.
Which is not equal to eVMCS revision_id which specifies used
VersionNumber of eVMCS struct (e.g. KVM_EVMCS_VERSION).

Specifically, this causes an issue in restoring a nested VM state
because vmx_set_nested_state() verifies that vmcs12->revision_id
is equal to VMCS12_REVISION which was not true in case vmcs12
was populated from an eVMCS by vmx_get_nested_state() which calls
copy_enlightened_to_vmcs12().

Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
Signed-off-by: Liran Alon <liran.alon@oracle.com>
Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 0ffd8b2dbfe2a..02edd9960e9d9 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8673,8 +8673,6 @@ static int copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx)
 	struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
 	struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
 
-	vmcs12->hdr.revision_id = evmcs->revision_id;
-
 	/* HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE */
 	vmcs12->tpr_threshold = evmcs->tpr_threshold;
 	vmcs12->guest_rip = evmcs->guest_rip;
@@ -9422,9 +9420,11 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu,
 		 * present in struct hv_enlightened_vmcs, ...). Make sure there
 		 * are no leftovers.
 		 */
-		if (from_launch)
-			memset(vmx->nested.cached_vmcs12, 0,
-			       sizeof(*vmx->nested.cached_vmcs12));
+		if (from_launch) {
+			struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+			memset(vmcs12, 0, sizeof(*vmcs12));
+			vmcs12->hdr.revision_id = VMCS12_REVISION;
+		}
 
 	}
 	return 1;
-- 
GitLab


From 0e0fee5c539b61fdd098332e0e2cc375d9073706 Mon Sep 17 00:00:00 2001
From: Junaid Shahid <junaids@google.com>
Date: Wed, 31 Oct 2018 14:53:57 -0700
Subject: [PATCH 1456/1890] kvm: mmu: Fix race in emulated page table writes

When a guest page table is updated via an emulated write,
kvm_mmu_pte_write() is called to update the shadow PTE using the just
written guest PTE value. But if two emulated guest PTE writes happened
concurrently, it is possible that the guest PTE and the shadow PTE end
up being out of sync. Emulated writes do not mark the shadow page as
unsync-ed, so this inconsistency will not be resolved even by a guest TLB
flush (unless the page was marked as unsync-ed at some other point).

This is fixed by re-reading the current value of the guest PTE after the
MMU lock has been acquired instead of just using the value that was
written prior to calling kvm_mmu_pte_write().

Signed-off-by: Junaid Shahid <junaids@google.com>
Reviewed-by: Wanpeng Li <wanpengli@tencent.com>
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu.c | 27 +++++++++------------------
 1 file changed, 9 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index cf5f572f23052..7c03c0f35444f 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -5074,9 +5074,9 @@ static bool need_remote_flush(u64 old, u64 new)
 }
 
 static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
-				    const u8 *new, int *bytes)
+				    int *bytes)
 {
-	u64 gentry;
+	u64 gentry = 0;
 	int r;
 
 	/*
@@ -5088,22 +5088,12 @@ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
 		/* Handle a 32-bit guest writing two halves of a 64-bit gpte */
 		*gpa &= ~(gpa_t)7;
 		*bytes = 8;
-		r = kvm_vcpu_read_guest(vcpu, *gpa, &gentry, 8);
-		if (r)
-			gentry = 0;
-		new = (const u8 *)&gentry;
 	}
 
-	switch (*bytes) {
-	case 4:
-		gentry = *(const u32 *)new;
-		break;
-	case 8:
-		gentry = *(const u64 *)new;
-		break;
-	default:
-		gentry = 0;
-		break;
+	if (*bytes == 4 || *bytes == 8) {
+		r = kvm_vcpu_read_guest_atomic(vcpu, *gpa, &gentry, *bytes);
+		if (r)
+			gentry = 0;
 	}
 
 	return gentry;
@@ -5207,8 +5197,6 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 
 	pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
 
-	gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, new, &bytes);
-
 	/*
 	 * No need to care whether allocation memory is successful
 	 * or not since pte prefetch is skiped if it does not have
@@ -5217,6 +5205,9 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 	mmu_topup_memory_caches(vcpu);
 
 	spin_lock(&vcpu->kvm->mmu_lock);
+
+	gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, &bytes);
+
 	++vcpu->kvm->stat.mmu_pte_write;
 	kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
 
-- 
GitLab


From fd65d3142f734bc4376053c8d75670041903134d Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Tue, 22 May 2018 09:54:20 -0700
Subject: [PATCH 1457/1890] kvm: svm: Ensure an IBPB on all affected CPUs when
 freeing a vmcb

Previously, we only called indirect_branch_prediction_barrier on the
logical CPU that freed a vmcb. This function should be called on all
logical CPUs that last loaded the vmcb in question.

Fixes: 15d45071523d ("KVM/x86: Add IBPB support")
Reported-by: Neel Natu <neelnatu@google.com>
Signed-off-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index a24733aade4c7..cc6467b35a85f 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2193,21 +2193,31 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
 	return ERR_PTR(err);
 }
 
+static void svm_clear_current_vmcb(struct vmcb *vmcb)
+{
+	int i;
+
+	for_each_online_cpu(i)
+		cmpxchg(&per_cpu(svm_data, i)->current_vmcb, vmcb, NULL);
+}
+
 static void svm_free_vcpu(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
+	/*
+	 * The vmcb page can be recycled, causing a false negative in
+	 * svm_vcpu_load(). So, ensure that no logical CPU has this
+	 * vmcb page recorded as its current vmcb.
+	 */
+	svm_clear_current_vmcb(svm->vmcb);
+
 	__free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT));
 	__free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
 	__free_page(virt_to_page(svm->nested.hsave));
 	__free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
 	kvm_vcpu_uninit(vcpu);
 	kmem_cache_free(kvm_vcpu_cache, svm);
-	/*
-	 * The vmcb page can be recycled, causing a false negative in
-	 * svm_vcpu_load(). So do a full IBPB now.
-	 */
-	indirect_branch_prediction_barrier();
 }
 
 static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
-- 
GitLab


From 976b489120cdab2b1b3a41ffa14661db43d58190 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 23 Nov 2018 22:51:32 +0100
Subject: [PATCH 1458/1890] efi: Prevent GICv3 WARN() by mapping the memreserve
 table before first use

Mapping the MEMRESERVE EFI configuration table from an early initcall
is too late: the GICv3 ITS code that creates persistent reservations
for the boot CPU's LPI tables is invoked from init_IRQ(), which runs
much earlier than the handling of the initcalls. This results in a
WARN() splat because the LPI tables cannot be reserved persistently,
which will result in silent memory corruption after a kexec reboot.

So instead, invoke the initialization performed by the initcall from
efi_mem_reserve_persistent() itself as well, but keep the initcall so
that the init is guaranteed to have been called before SMP boot.

Tested-by: Marc Zyngier <marc.zyngier@arm.com>
Tested-by: Jan Glauber <jglauber@cavium.com>
Tested-by: John Garry <john.garry@huawei.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Fixes: 63eb322d89c8 ("efi: Permit calling efi_mem_reserve_persistent() ...")
Link: http://lkml.kernel.org/r/20181123215132.7951-2-ard.biesheuvel@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/firmware/efi/efi.c | 36 ++++++++++++++++++++++++++----------
 1 file changed, 26 insertions(+), 10 deletions(-)

diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index fad7c62cfc0e4..415849bab2339 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -969,13 +969,33 @@ bool efi_is_table_address(unsigned long phys_addr)
 static DEFINE_SPINLOCK(efi_mem_reserve_persistent_lock);
 static struct linux_efi_memreserve *efi_memreserve_root __ro_after_init;
 
-int efi_mem_reserve_persistent(phys_addr_t addr, u64 size)
+static int __init efi_memreserve_map_root(void)
+{
+	if (efi.mem_reserve == EFI_INVALID_TABLE_ADDR)
+		return -ENODEV;
+
+	efi_memreserve_root = memremap(efi.mem_reserve,
+				       sizeof(*efi_memreserve_root),
+				       MEMREMAP_WB);
+	if (WARN_ON_ONCE(!efi_memreserve_root))
+		return -ENOMEM;
+	return 0;
+}
+
+int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size)
 {
 	struct linux_efi_memreserve *rsv;
+	int rc;
 
-	if (!efi_memreserve_root)
+	if (efi_memreserve_root == (void *)ULONG_MAX)
 		return -ENODEV;
 
+	if (!efi_memreserve_root) {
+		rc = efi_memreserve_map_root();
+		if (rc)
+			return rc;
+	}
+
 	rsv = kmalloc(sizeof(*rsv), GFP_ATOMIC);
 	if (!rsv)
 		return -ENOMEM;
@@ -993,14 +1013,10 @@ int efi_mem_reserve_persistent(phys_addr_t addr, u64 size)
 
 static int __init efi_memreserve_root_init(void)
 {
-	if (efi.mem_reserve == EFI_INVALID_TABLE_ADDR)
-		return -ENODEV;
-
-	efi_memreserve_root = memremap(efi.mem_reserve,
-				       sizeof(*efi_memreserve_root),
-				       MEMREMAP_WB);
-	if (!efi_memreserve_root)
-		return -ENOMEM;
+	if (efi_memreserve_root)
+		return 0;
+	if (efi_memreserve_map_root())
+		efi_memreserve_root = (void *)ULONG_MAX;
 	return 0;
 }
 early_initcall(efi_memreserve_root_init);
-- 
GitLab


From 98be694ba25fa0e87811e7784b782c9498741b26 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Fri, 9 Nov 2018 18:02:16 +0100
Subject: [PATCH 1459/1890] i2c: nvidia-gpu: adhere to I2C fault codes

As described in Documentation/i2c/fault-codes.

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Fixes: c71bcdcb42a7 ("i2c: add i2c bus driver for NVIDIA GPU")
Acked-by: Ajay Gupta <ajayg@nvidia.com>
Tested-by: Ajay Gupta <ajayg@nvidia.com>
---
 drivers/i2c/busses/i2c-nvidia-gpu.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/i2c/busses/i2c-nvidia-gpu.c b/drivers/i2c/busses/i2c-nvidia-gpu.c
index 8822357bca0c3..4307b42901a13 100644
--- a/drivers/i2c/busses/i2c-nvidia-gpu.c
+++ b/drivers/i2c/busses/i2c-nvidia-gpu.c
@@ -89,7 +89,7 @@ static int gpu_i2c_check_status(struct gpu_i2c_dev *i2cd)
 
 	if (time_is_before_jiffies(target)) {
 		dev_err(i2cd->dev, "i2c timeout error %x\n", val);
-		return -ETIME;
+		return -ETIMEDOUT;
 	}
 
 	val = readl(i2cd->regs + I2C_MST_CNTL);
@@ -97,9 +97,9 @@ static int gpu_i2c_check_status(struct gpu_i2c_dev *i2cd)
 	case I2C_MST_CNTL_STATUS_OKAY:
 		return 0;
 	case I2C_MST_CNTL_STATUS_NO_ACK:
-		return -EIO;
+		return -ENXIO;
 	case I2C_MST_CNTL_STATUS_TIMEOUT:
-		return -ETIME;
+		return -ETIMEDOUT;
 	default:
 		return 0;
 	}
-- 
GitLab


From 39129f28c9431522a6790e7b8c3283221a28447b Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Fri, 9 Nov 2018 18:07:32 +0100
Subject: [PATCH 1460/1890] i2c: nvidia-gpu: limit reads also for combined
 messages

If the controller can only do 4 byte reads, this needs to be applied for
the read-part of combined messages, too.

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Fixes: c71bcdcb42a7 ("i2c: add i2c bus driver for NVIDIA GPU")
Acked-by: Ajay Gupta <ajayg@nvidia.com>
Tested-by: Ajay Gupta <ajayg@nvidia.com>
---
 drivers/i2c/busses/i2c-nvidia-gpu.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/i2c/busses/i2c-nvidia-gpu.c b/drivers/i2c/busses/i2c-nvidia-gpu.c
index 4307b42901a13..e99c3bb583513 100644
--- a/drivers/i2c/busses/i2c-nvidia-gpu.c
+++ b/drivers/i2c/busses/i2c-nvidia-gpu.c
@@ -218,6 +218,7 @@ static int gpu_i2c_master_xfer(struct i2c_adapter *adap,
 
 static const struct i2c_adapter_quirks gpu_i2c_quirks = {
 	.max_read_len = 4,
+	.max_comb_2nd_msg_len = 4,
 	.flags = I2C_AQ_COMB_WRITE_THEN_READ,
 };
 
-- 
GitLab


From 0b57436f15bf40e432487086c4f2d01fd3529393 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Tue, 13 Nov 2018 12:15:42 +0100
Subject: [PATCH 1461/1890] i2c: rcar: check bus state before reinitializing

We should check the bus state before reinitializing the IP core.
Otherwise, the internal bus busy state which also tracks multi-master
activity is lost.

Credits go to the Renesas BSP team for suggesting this change.

Reported-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Fixes: ae481cc13965 ("i2c: rcar: fix resume by always initializing registers before transfer")
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-rcar.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c
index 4aa7dde876f3f..254e6219e5389 100644
--- a/drivers/i2c/busses/i2c-rcar.c
+++ b/drivers/i2c/busses/i2c-rcar.c
@@ -779,6 +779,11 @@ static int rcar_i2c_master_xfer(struct i2c_adapter *adap,
 
 	pm_runtime_get_sync(dev);
 
+	/* Check bus state before init otherwise bus busy info will be lost */
+	ret = rcar_i2c_bus_barrier(priv);
+	if (ret < 0)
+		goto out;
+
 	/* Gen3 needs a reset before allowing RXDMA once */
 	if (priv->devtype == I2C_RCAR_GEN3) {
 		priv->flags |= ID_P_NO_RXDMA;
@@ -791,10 +796,6 @@ static int rcar_i2c_master_xfer(struct i2c_adapter *adap,
 
 	rcar_i2c_init(priv);
 
-	ret = rcar_i2c_bus_barrier(priv);
-	if (ret < 0)
-		goto out;
-
 	for (i = 0; i < num; i++)
 		rcar_i2c_request_dma(priv, msgs + i);
 
-- 
GitLab


From 814cedbc0b78d75e335c96da9b9391142eab5600 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Tue, 27 Nov 2018 14:04:04 +0100
Subject: [PATCH 1462/1890] s390/mm: correct pgtable_bytes on page table
 downgrade

The downgrade of a page table from 3 levels to 2 levels for a 31-bit compat
process removes a pmd table which has to be counted against pgtable_bytes.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/mm/pgalloc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 814f26520aa2c..6791562779eec 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -131,6 +131,7 @@ void crst_table_downgrade(struct mm_struct *mm)
 	}
 
 	pgd = mm->pgd;
+	mm_dec_nr_pmds(mm);
 	mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
 	mm->context.asce_limit = _REGION3_SIZE;
 	mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
-- 
GitLab


From ac26d1f74cfc19c8dc9d533b5f20e99dbee3d9bd Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Tue, 27 Nov 2018 14:32:00 +0100
Subject: [PATCH 1463/1890] x86/fpu: Use the correct exception table macro in
 the XSTATE_OP wrapper

Commit

  75045f77f7a7 ("x86/extable: Introduce _ASM_EXTABLE_UA for uaccess fixups")

incorrectly replaced the fixup entry for XSTATE_OP with a user-#PF-only
fixup. XRSTOR can also raise #GP if the xstate content is invalid,
and _ASM_EXTABLE_UA doesn't expect that. Change this fixup back to
_ASM_EXTABLE so that #GP gets fixed up.

Fixes: 75045f77f7a7 ("x86/extable: Introduce _ASM_EXTABLE_UA for uaccess fixups")
Reported-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Tested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: kernel-hardening@lists.openwall.com
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20181126165957.xhsyu2dhyy45mrjo@linutronix.de
Link: https://lkml.kernel.org/r/20181127133200.38322-1-jannh@google.com
---
 arch/x86/include/asm/fpu/internal.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 5f7290e6e954e..69dcdf195b611 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -226,7 +226,7 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu)
 		     "3: movl $-2,%[err]\n\t"				\
 		     "jmp 2b\n\t"					\
 		     ".popsection\n\t"					\
-		     _ASM_EXTABLE_UA(1b, 3b)				\
+		     _ASM_EXTABLE(1b, 3b)				\
 		     : [err] "=r" (err)					\
 		     : "D" (st), "m" (*st), "a" (lmask), "d" (hmask)	\
 		     : "memory")
-- 
GitLab


From dfa74422d604abc2e16763db12646583219806e4 Mon Sep 17 00:00:00 2001
From: "Ewan D. Milne" <emilne@redhat.com>
Date: Mon, 26 Nov 2018 12:01:30 -0500
Subject: [PATCH 1464/1890] nvme-fc: initialize nvme_req(rq)->ctrl after
 calling __nvme_fc_init_request()

__nvme_fc_init_request() invokes memset() on the nvme_fcp_op_w_sgl structure, which
NULLed-out the nvme_req(req)->ctrl field previously set by nvme_fc_init_request().
This apparently was not referenced until commit faf4a44fff ("nvme: support traffic
based keep-alive") which now results in a crash in nvme_complete_rq():

[ 8386.897130] RIP: 0010:panic+0x220/0x26c
[ 8386.901406] Code: 83 3d 6f ee 72 01 00 74 05 e8 e8 54 02 00 48 c7 c6 40 fd 5b b4 48 c7 c7 d8 8d c6 b3 31e
[ 8386.922359] RSP: 0018:ffff99650019fc40 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff13
[ 8386.930804] RAX: 0000000000000046 RBX: 0000000000000000 RCX: 0000000000000006
[ 8386.938764] RDX: 0000000000000000 RSI: 0000000000000082 RDI: ffff8e325f8168b0
[ 8386.946725] RBP: ffff99650019fcb0 R08: 0000000000000000 R09: 00000000000004f8
[ 8386.954687] R10: 0000000000000000 R11: ffff99650019f9b8 R12: ffffffffb3c55f3c
[ 8386.962648] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000001
[ 8386.970613]  oops_end+0xd1/0xe0
[ 8386.974116]  no_context+0x1b2/0x3c0
[ 8386.978006]  do_page_fault+0x32/0x140
[ 8386.982090]  page_fault+0x1e/0x30
[ 8386.985786] RIP: 0010:nvme_complete_rq+0x65/0x1d0 [nvme_core]
[ 8386.992195] Code: 41 bc 03 00 00 00 74 16 0f 86 c3 00 00 00 66 3d 83 00 41 bc 06 00 00 00 0f 85 e7 00 000
[ 8387.013147] RSP: 0018:ffff99650019fe18 EFLAGS: 00010246
[ 8387.018973] RAX: 0000000000000000 RBX: ffff8e322ae51280 RCX: 0000000000000001
[ 8387.026935] RDX: 0000000000000400 RSI: 0000000000000001 RDI: ffff8e322ae51280
[ 8387.034897] RBP: ffff8e322ae51280 R08: 0000000000000000 R09: ffffffffb2f0b890
[ 8387.042859] R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000000
[ 8387.050821] R13: 0000000000000100 R14: 0000000000000004 R15: ffff8e2b0446d990
[ 8387.058782]  ? swiotlb_unmap_page+0x40/0x40
[ 8387.063448]  nvme_fc_complete_rq+0x2d/0x70 [nvme_fc]
[ 8387.068986]  blk_done_softirq+0xa1/0xd0
[ 8387.073264]  __do_softirq+0xd6/0x2a9
[ 8387.077251]  run_ksoftirqd+0x26/0x40
[ 8387.081238]  smpboot_thread_fn+0x10e/0x160
[ 8387.085807]  kthread+0xf8/0x130
[ 8387.089309]  ? sort_range+0x20/0x20
[ 8387.093198]  ? kthread_stop+0x110/0x110
[ 8387.097475]  ret_from_fork+0x35/0x40
[ 8387.101462] ---[ end trace 7106b0adf5e422f8 ]---

Fixes: faf4a44fff ("nvme: support traffic based keep-alive")
Signed-off-by: Ewan D. Milne <emilne@redhat.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 54032c4666361..feb86b59170e4 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -1752,12 +1752,12 @@ nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq,
 	struct nvme_fc_queue *queue = &ctrl->queues[queue_idx];
 	int res;
 
-	nvme_req(rq)->ctrl = &ctrl->ctrl;
 	res = __nvme_fc_init_request(ctrl, queue, &op->op, rq, queue->rqcnt++);
 	if (res)
 		return res;
 	op->op.fcp_req.first_sgl = &op->sgl[0];
 	op->op.fcp_req.private = &op->priv[0];
+	nvme_req(rq)->ctrl = &ctrl->ctrl;
 	return res;
 }
 
-- 
GitLab


From 751a0cc0cd3a0d51e6aaf6fd3b8bd31f4ecfaf3e Mon Sep 17 00:00:00 2001
From: Igor Konopko <igor.j.konopko@intel.com>
Date: Fri, 23 Nov 2018 16:58:10 +0100
Subject: [PATCH 1465/1890] nvme-pci: fix surprise removal

When a PCIe NVMe device is not present, nvme_dev_remove_admin() calls
blk_cleanup_queue() on the admin queue, which frees the hctx for that
queue.  Moments later, on the same path nvme_kill_queues() calls
blk_mq_unquiesce_queue() on admin queue and tries to access hctx of it,
which leads to following OOPS:

Oops: 0000 [#1] SMP PTI
RIP: 0010:sbitmap_any_bit_set+0xb/0x40
Call Trace:
 blk_mq_run_hw_queue+0xd5/0x150
 blk_mq_run_hw_queues+0x3a/0x50
 nvme_kill_queues+0x26/0x50
 nvme_remove_namespaces+0xb2/0xc0
 nvme_remove+0x60/0x140
 pci_device_remove+0x3b/0xb0

Fixes: cb4bfda62afa2 ("nvme-pci: fix hot removal during error handling")
Signed-off-by: Igor Konopko <igor.j.konopko@intel.com>
Reviewed-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 5afda6fe5ae90..bb39b91253c2d 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3607,7 +3607,7 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
 	down_read(&ctrl->namespaces_rwsem);
 
 	/* Forcibly unquiesce queues to avoid blocking dispatch */
-	if (ctrl->admin_q)
+	if (ctrl->admin_q && !blk_queue_dying(ctrl->admin_q))
 		blk_mq_unquiesce_queue(ctrl->admin_q);
 
 	list_for_each_entry(ns, &ctrl->namespaces, list)
-- 
GitLab


From 3d8e450f517cdb33da77827ec75929354753e9c0 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Tue, 20 Nov 2018 05:19:36 -0500
Subject: [PATCH 1466/1890] media: dvb-pll: fix tuner frequency ranges

Tuners should report frequencies in Hz. That works fine on most
drivers, but, in the case of dvb-pll, some settings are for
satellite tuners, while others are for terrestrial/cable ones.

The code was trying to solve it at probing time, but that doesn't
work, as, when _attach is called, the delivery system may be wrong.

Fix it by ensuring that all frequencies are in Hz at the per-tuner
max/min values.

While here, add a debug message, as this would help to debug any
issues there.

It partially fixes the following bug:
  https://bugzilla.opensuse.org/show_bug.cgi?id=1116374

Fixes: a3f90c75b833 ("media: dvb: convert tuner_info frequencies to Hz")
Reported-by: Stakanov Schufter <stakanov@eclipso.eu>
Reported-by: Takashi Iwai <tiwai@suse.de>
Cc: stable@vger.kernel.org # For 4.19
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/dvb-frontends/dvb-pll.c | 103 ++++++++++++--------------
 1 file changed, 48 insertions(+), 55 deletions(-)

diff --git a/drivers/media/dvb-frontends/dvb-pll.c b/drivers/media/dvb-frontends/dvb-pll.c
index 6d4b2eec67b4f..fff5816f6ec48 100644
--- a/drivers/media/dvb-frontends/dvb-pll.c
+++ b/drivers/media/dvb-frontends/dvb-pll.c
@@ -80,8 +80,8 @@ struct dvb_pll_desc {
 
 static const struct dvb_pll_desc dvb_pll_thomson_dtt7579 = {
 	.name  = "Thomson dtt7579",
-	.min   = 177000000,
-	.max   = 858000000,
+	.min   = 177 * MHz,
+	.max   = 858 * MHz,
 	.iffreq= 36166667,
 	.sleepdata = (u8[]){ 2, 0xb4, 0x03 },
 	.count = 4,
@@ -102,8 +102,8 @@ static void thomson_dtt759x_bw(struct dvb_frontend *fe, u8 *buf)
 
 static const struct dvb_pll_desc dvb_pll_thomson_dtt759x = {
 	.name  = "Thomson dtt759x",
-	.min   = 177000000,
-	.max   = 896000000,
+	.min   = 177 * MHz,
+	.max   = 896 * MHz,
 	.set   = thomson_dtt759x_bw,
 	.iffreq= 36166667,
 	.sleepdata = (u8[]){ 2, 0x84, 0x03 },
@@ -126,8 +126,8 @@ static void thomson_dtt7520x_bw(struct dvb_frontend *fe, u8 *buf)
 
 static const struct dvb_pll_desc dvb_pll_thomson_dtt7520x = {
 	.name  = "Thomson dtt7520x",
-	.min   = 185000000,
-	.max   = 900000000,
+	.min   = 185 * MHz,
+	.max   = 900 * MHz,
 	.set   = thomson_dtt7520x_bw,
 	.iffreq = 36166667,
 	.count = 7,
@@ -144,8 +144,8 @@ static const struct dvb_pll_desc dvb_pll_thomson_dtt7520x = {
 
 static const struct dvb_pll_desc dvb_pll_lg_z201 = {
 	.name  = "LG z201",
-	.min   = 174000000,
-	.max   = 862000000,
+	.min   = 174 * MHz,
+	.max   = 862 * MHz,
 	.iffreq= 36166667,
 	.sleepdata = (u8[]){ 2, 0xbc, 0x03 },
 	.count = 5,
@@ -160,8 +160,8 @@ static const struct dvb_pll_desc dvb_pll_lg_z201 = {
 
 static const struct dvb_pll_desc dvb_pll_unknown_1 = {
 	.name  = "unknown 1", /* used by dntv live dvb-t */
-	.min   = 174000000,
-	.max   = 862000000,
+	.min   = 174 * MHz,
+	.max   = 862 * MHz,
 	.iffreq= 36166667,
 	.count = 9,
 	.entries = {
@@ -182,8 +182,8 @@ static const struct dvb_pll_desc dvb_pll_unknown_1 = {
  */
 static const struct dvb_pll_desc dvb_pll_tua6010xs = {
 	.name  = "Infineon TUA6010XS",
-	.min   =  44250000,
-	.max   = 858000000,
+	.min   = 44250 * kHz,
+	.max   = 858 * MHz,
 	.iffreq= 36125000,
 	.count = 3,
 	.entries = {
@@ -196,8 +196,8 @@ static const struct dvb_pll_desc dvb_pll_tua6010xs = {
 /* Panasonic env57h1xd5 (some Philips PLL ?) */
 static const struct dvb_pll_desc dvb_pll_env57h1xd5 = {
 	.name  = "Panasonic ENV57H1XD5",
-	.min   =  44250000,
-	.max   = 858000000,
+	.min   = 44250 * kHz,
+	.max   = 858 * MHz,
 	.iffreq= 36125000,
 	.count = 4,
 	.entries = {
@@ -220,8 +220,8 @@ static void tda665x_bw(struct dvb_frontend *fe, u8 *buf)
 
 static const struct dvb_pll_desc dvb_pll_tda665x = {
 	.name  = "Philips TDA6650/TDA6651",
-	.min   =  44250000,
-	.max   = 858000000,
+	.min   = 44250 * kHz,
+	.max   = 858 * MHz,
 	.set   = tda665x_bw,
 	.iffreq= 36166667,
 	.initdata = (u8[]){ 4, 0x0b, 0xf5, 0x85, 0xab },
@@ -254,8 +254,8 @@ static void tua6034_bw(struct dvb_frontend *fe, u8 *buf)
 
 static const struct dvb_pll_desc dvb_pll_tua6034 = {
 	.name  = "Infineon TUA6034",
-	.min   =  44250000,
-	.max   = 858000000,
+	.min   = 44250 * kHz,
+	.max   = 858 * MHz,
 	.iffreq= 36166667,
 	.count = 3,
 	.set   = tua6034_bw,
@@ -278,8 +278,8 @@ static void tded4_bw(struct dvb_frontend *fe, u8 *buf)
 
 static const struct dvb_pll_desc dvb_pll_tded4 = {
 	.name = "ALPS TDED4",
-	.min = 47000000,
-	.max = 863000000,
+	.min =  47 * MHz,
+	.max = 863 * MHz,
 	.iffreq= 36166667,
 	.set   = tded4_bw,
 	.count = 4,
@@ -296,8 +296,8 @@ static const struct dvb_pll_desc dvb_pll_tded4 = {
  */
 static const struct dvb_pll_desc dvb_pll_tdhu2 = {
 	.name = "ALPS TDHU2",
-	.min = 54000000,
-	.max = 864000000,
+	.min =  54 * MHz,
+	.max = 864 * MHz,
 	.iffreq= 44000000,
 	.count = 4,
 	.entries = {
@@ -313,8 +313,8 @@ static const struct dvb_pll_desc dvb_pll_tdhu2 = {
  */
 static const struct dvb_pll_desc dvb_pll_samsung_tbmv = {
 	.name = "Samsung TBMV30111IN / TBMV30712IN1",
-	.min = 54000000,
-	.max = 860000000,
+	.min =  54 * MHz,
+	.max = 860 * MHz,
 	.iffreq= 44000000,
 	.count = 6,
 	.entries = {
@@ -332,8 +332,8 @@ static const struct dvb_pll_desc dvb_pll_samsung_tbmv = {
  */
 static const struct dvb_pll_desc dvb_pll_philips_sd1878_tda8261 = {
 	.name  = "Philips SD1878",
-	.min   =  950000,
-	.max   = 2150000,
+	.min   =  950 * MHz,
+	.max   = 2150 * MHz,
 	.iffreq= 249, /* zero-IF, offset 249 is to round up */
 	.count = 4,
 	.entries = {
@@ -398,8 +398,8 @@ static void opera1_bw(struct dvb_frontend *fe, u8 *buf)
 
 static const struct dvb_pll_desc dvb_pll_opera1 = {
 	.name  = "Opera Tuner",
-	.min   =  900000,
-	.max   = 2250000,
+	.min   =  900 * MHz,
+	.max   = 2250 * MHz,
 	.initdata = (u8[]){ 4, 0x08, 0xe5, 0xe1, 0x00 },
 	.initdata2 = (u8[]){ 4, 0x08, 0xe5, 0xe5, 0x00 },
 	.iffreq= 0,
@@ -445,8 +445,8 @@ static void samsung_dtos403ih102a_set(struct dvb_frontend *fe, u8 *buf)
 /* unknown pll used in Samsung DTOS403IH102A DVB-C tuner */
 static const struct dvb_pll_desc dvb_pll_samsung_dtos403ih102a = {
 	.name   = "Samsung DTOS403IH102A",
-	.min    =  44250000,
-	.max    = 858000000,
+	.min    = 44250 * kHz,
+	.max    = 858 * MHz,
 	.iffreq =  36125000,
 	.count  = 8,
 	.set    = samsung_dtos403ih102a_set,
@@ -465,8 +465,8 @@ static const struct dvb_pll_desc dvb_pll_samsung_dtos403ih102a = {
 /* Samsung TDTC9251DH0 DVB-T NIM, as used on AirStar 2 */
 static const struct dvb_pll_desc dvb_pll_samsung_tdtc9251dh0 = {
 	.name	= "Samsung TDTC9251DH0",
-	.min	=  48000000,
-	.max	= 863000000,
+	.min	=  48 * MHz,
+	.max	= 863 * MHz,
 	.iffreq	=  36166667,
 	.count	= 3,
 	.entries = {
@@ -479,8 +479,8 @@ static const struct dvb_pll_desc dvb_pll_samsung_tdtc9251dh0 = {
 /* Samsung TBDU18132 DVB-S NIM with TSA5059 PLL, used in SkyStar2 DVB-S 2.3 */
 static const struct dvb_pll_desc dvb_pll_samsung_tbdu18132 = {
 	.name = "Samsung TBDU18132",
-	.min	=  950000,
-	.max	= 2150000, /* guesses */
+	.min	=  950 * MHz,
+	.max	= 2150 * MHz, /* guesses */
 	.iffreq = 0,
 	.count = 2,
 	.entries = {
@@ -500,8 +500,8 @@ static const struct dvb_pll_desc dvb_pll_samsung_tbdu18132 = {
 /* Samsung TBMU24112 DVB-S NIM with SL1935 zero-IF tuner */
 static const struct dvb_pll_desc dvb_pll_samsung_tbmu24112 = {
 	.name = "Samsung TBMU24112",
-	.min	=  950000,
-	.max	= 2150000, /* guesses */
+	.min	=  950 * MHz,
+	.max	= 2150 * MHz, /* guesses */
 	.iffreq = 0,
 	.count = 2,
 	.entries = {
@@ -521,8 +521,8 @@ static const struct dvb_pll_desc dvb_pll_samsung_tbmu24112 = {
  * 822 - 862   1  *  0   0   1   0   0   0   0x88 */
 static const struct dvb_pll_desc dvb_pll_alps_tdee4 = {
 	.name = "ALPS TDEE4",
-	.min	=  47000000,
-	.max	= 862000000,
+	.min	=  47 * MHz,
+	.max	= 862 * MHz,
 	.iffreq	=  36125000,
 	.count = 4,
 	.entries = {
@@ -537,8 +537,8 @@ static const struct dvb_pll_desc dvb_pll_alps_tdee4 = {
 /* CP cur. 50uA, AGC takeover: 103dBuV, PORT3 on */
 static const struct dvb_pll_desc dvb_pll_tua6034_friio = {
 	.name   = "Infineon TUA6034 ISDB-T (Friio)",
-	.min    =  90000000,
-	.max    = 770000000,
+	.min    =  90 * MHz,
+	.max    = 770 * MHz,
 	.iffreq =  57000000,
 	.initdata = (u8[]){ 4, 0x9a, 0x50, 0xb2, 0x08 },
 	.sleepdata = (u8[]){ 4, 0x9a, 0x70, 0xb3, 0x0b },
@@ -553,8 +553,8 @@ static const struct dvb_pll_desc dvb_pll_tua6034_friio = {
 /* Philips TDA6651 ISDB-T, used in Earthsoft PT1 */
 static const struct dvb_pll_desc dvb_pll_tda665x_earth_pt1 = {
 	.name   = "Philips TDA6651 ISDB-T (EarthSoft PT1)",
-	.min    =  90000000,
-	.max    = 770000000,
+	.min    =  90 * MHz,
+	.max    = 770 * MHz,
 	.iffreq =  57000000,
 	.initdata = (u8[]){ 5, 0x0e, 0x7f, 0xc1, 0x80, 0x80 },
 	.count = 10,
@@ -799,7 +799,6 @@ struct dvb_frontend *dvb_pll_attach(struct dvb_frontend *fe, int pll_addr,
 	struct dvb_pll_priv *priv = NULL;
 	int ret;
 	const struct dvb_pll_desc *desc;
-	struct dtv_frontend_properties *c = &fe->dtv_property_cache;
 
 	b1 = kmalloc(1, GFP_KERNEL);
 	if (!b1)
@@ -845,18 +844,12 @@ struct dvb_frontend *dvb_pll_attach(struct dvb_frontend *fe, int pll_addr,
 
 	strncpy(fe->ops.tuner_ops.info.name, desc->name,
 		sizeof(fe->ops.tuner_ops.info.name));
-	switch (c->delivery_system) {
-	case SYS_DVBS:
-	case SYS_DVBS2:
-	case SYS_TURBO:
-	case SYS_ISDBS:
-		fe->ops.tuner_ops.info.frequency_min_hz = desc->min * kHz;
-		fe->ops.tuner_ops.info.frequency_max_hz = desc->max * kHz;
-		break;
-	default:
-		fe->ops.tuner_ops.info.frequency_min_hz = desc->min;
-		fe->ops.tuner_ops.info.frequency_max_hz = desc->max;
-	}
+
+	fe->ops.tuner_ops.info.frequency_min_hz = desc->min;
+	fe->ops.tuner_ops.info.frequency_max_hz = desc->max;
+
+	dprintk("%s tuner, frequency range: %u...%u\n",
+		desc->name, desc->min, desc->max);
 
 	if (!desc->initdata)
 		fe->ops.tuner_ops.init = NULL;
-- 
GitLab


From 3420f65cbbd0555049bd02394bed33a0ef74d860 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Fri, 23 Nov 2018 12:10:57 -0500
Subject: [PATCH 1467/1890] media: dvb-pll: don't re-validate tuner frequencies

The dvb_frontend core already checks for the frequencies. No
need for any additional check inside the driver.

It is part of the fixes for the following bug:
  https://bugzilla.opensuse.org/show_bug.cgi?id=1116374

Fixes: a3f90c75b833 ("media: dvb: convert tuner_info frequencies to Hz")
Reported-by: Stakanov Schufter <stakanov@eclipso.eu>
Reported-by: Takashi Iwai <tiwai@suse.de>
Cc: stable@vger.kernel.org # For 4.19
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/dvb-frontends/dvb-pll.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/media/dvb-frontends/dvb-pll.c b/drivers/media/dvb-frontends/dvb-pll.c
index fff5816f6ec48..29836c1a40e98 100644
--- a/drivers/media/dvb-frontends/dvb-pll.c
+++ b/drivers/media/dvb-frontends/dvb-pll.c
@@ -610,9 +610,6 @@ static int dvb_pll_configure(struct dvb_frontend *fe, u8 *buf,
 	u32 div;
 	int i;
 
-	if (frequency && (frequency < desc->min || frequency > desc->max))
-		return -EINVAL;
-
 	for (i = 0; i < desc->count; i++) {
 		if (frequency > desc->entries[i].limit)
 			continue;
-- 
GitLab


From a7c3a0d5f8d8cd5cdb32c06d4d68f5b4e4d2104b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Fri, 23 Nov 2018 07:18:12 -0500
Subject: [PATCH 1468/1890] media: mediactl docs: Fix licensing message

Right now, it mentions two SPDX headers that don't exist inside the Kernel:
	GFDL-1.1-or-later
And an exception:
	no-invariant-sections

While it would be trivial to add the first one, there's no way,
currently, to distinguish, with SPDX, between a free and a non-free
document under GFDL.

Free documents with GFDL should not have invariant sections.

There's an open issue at SPDX tree waiting for it to be solved.
While we don't have this issue closed, let's just replace by a
free-text license, and add a TODO note to remind us to revisit it
later.

Reviewed-by: Tomasz Figa <tfiga@chromium.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 .../uapi/mediactl/media-ioc-request-alloc.rst | 26 ++++++++++++++++++-
 .../uapi/mediactl/media-request-ioc-queue.rst | 26 ++++++++++++++++++-
 .../mediactl/media-request-ioc-reinit.rst     | 26 ++++++++++++++++++-
 .../media/uapi/mediactl/request-api.rst       | 26 ++++++++++++++++++-
 .../uapi/mediactl/request-func-close.rst      | 26 ++++++++++++++++++-
 .../uapi/mediactl/request-func-ioctl.rst      | 26 ++++++++++++++++++-
 .../media/uapi/mediactl/request-func-poll.rst | 26 ++++++++++++++++++-
 7 files changed, 175 insertions(+), 7 deletions(-)

diff --git a/Documentation/media/uapi/mediactl/media-ioc-request-alloc.rst b/Documentation/media/uapi/mediactl/media-ioc-request-alloc.rst
index 0f8b31874002c..de131f00c2496 100644
--- a/Documentation/media/uapi/mediactl/media-ioc-request-alloc.rst
+++ b/Documentation/media/uapi/mediactl/media-ioc-request-alloc.rst
@@ -1,4 +1,28 @@
-.. SPDX-License-Identifier: GPL-2.0 OR GFDL-1.1-or-later WITH no-invariant-sections
+.. This file is dual-licensed: you can use it either under the terms
+.. of the GPL or the GFDL 1.1+ license, at your option. Note that this
+.. dual licensing only applies to this file, and not this project as a
+.. whole.
+..
+.. a) This file is free software; you can redistribute it and/or
+..    modify it under the terms of the GNU General Public License as
+..    published by the Free Software Foundation; either version 2 of
+..    the License, or (at your option) any later version.
+..
+..    This file is distributed in the hope that it will be useful,
+..    but WITHOUT ANY WARRANTY; without even the implied warranty of
+..    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+..    GNU General Public License for more details.
+..
+.. Or, alternatively,
+..
+.. b) Permission is granted to copy, distribute and/or modify this
+..    document under the terms of the GNU Free Documentation License,
+..    Version 1.1 or any later version published by the Free Software
+..    Foundation, with no Invariant Sections, no Front-Cover Texts
+..    and no Back-Cover Texts. A copy of the license is included at
+..    Documentation/media/uapi/fdl-appendix.rst.
+..
+.. TODO: replace it to GPL-2.0 OR GFDL-1.1-or-later WITH no-invariant-sections
 
 .. _media_ioc_request_alloc:
 
diff --git a/Documentation/media/uapi/mediactl/media-request-ioc-queue.rst b/Documentation/media/uapi/mediactl/media-request-ioc-queue.rst
index 6dd2d7fea7144..5d2604345e191 100644
--- a/Documentation/media/uapi/mediactl/media-request-ioc-queue.rst
+++ b/Documentation/media/uapi/mediactl/media-request-ioc-queue.rst
@@ -1,4 +1,28 @@
-.. SPDX-License-Identifier: GPL-2.0 OR GFDL-1.1-or-later WITH no-invariant-sections
+.. This file is dual-licensed: you can use it either under the terms
+.. of the GPL or the GFDL 1.1+ license, at your option. Note that this
+.. dual licensing only applies to this file, and not this project as a
+.. whole.
+..
+.. a) This file is free software; you can redistribute it and/or
+..    modify it under the terms of the GNU General Public License as
+..    published by the Free Software Foundation; either version 2 of
+..    the License, or (at your option) any later version.
+..
+..    This file is distributed in the hope that it will be useful,
+..    but WITHOUT ANY WARRANTY; without even the implied warranty of
+..    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+..    GNU General Public License for more details.
+..
+.. Or, alternatively,
+..
+.. b) Permission is granted to copy, distribute and/or modify this
+..    document under the terms of the GNU Free Documentation License,
+..    Version 1.1 or any later version published by the Free Software
+..    Foundation, with no Invariant Sections, no Front-Cover Texts
+..    and no Back-Cover Texts. A copy of the license is included at
+..    Documentation/media/uapi/fdl-appendix.rst.
+..
+.. TODO: replace it to GPL-2.0 OR GFDL-1.1-or-later WITH no-invariant-sections
 
 .. _media_request_ioc_queue:
 
diff --git a/Documentation/media/uapi/mediactl/media-request-ioc-reinit.rst b/Documentation/media/uapi/mediactl/media-request-ioc-reinit.rst
index febe888494c8d..ec61960c81ce9 100644
--- a/Documentation/media/uapi/mediactl/media-request-ioc-reinit.rst
+++ b/Documentation/media/uapi/mediactl/media-request-ioc-reinit.rst
@@ -1,4 +1,28 @@
-.. SPDX-License-Identifier: GPL-2.0 OR GFDL-1.1-or-later WITH no-invariant-sections
+.. This file is dual-licensed: you can use it either under the terms
+.. of the GPL or the GFDL 1.1+ license, at your option. Note that this
+.. dual licensing only applies to this file, and not this project as a
+.. whole.
+..
+.. a) This file is free software; you can redistribute it and/or
+..    modify it under the terms of the GNU General Public License as
+..    published by the Free Software Foundation; either version 2 of
+..    the License, or (at your option) any later version.
+..
+..    This file is distributed in the hope that it will be useful,
+..    but WITHOUT ANY WARRANTY; without even the implied warranty of
+..    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+..    GNU General Public License for more details.
+..
+.. Or, alternatively,
+..
+.. b) Permission is granted to copy, distribute and/or modify this
+..    document under the terms of the GNU Free Documentation License,
+..    Version 1.1 or any later version published by the Free Software
+..    Foundation, with no Invariant Sections, no Front-Cover Texts
+..    and no Back-Cover Texts. A copy of the license is included at
+..    Documentation/media/uapi/fdl-appendix.rst.
+..
+.. TODO: replace it to GPL-2.0 OR GFDL-1.1-or-later WITH no-invariant-sections
 
 .. _media_request_ioc_reinit:
 
diff --git a/Documentation/media/uapi/mediactl/request-api.rst b/Documentation/media/uapi/mediactl/request-api.rst
index 5f4a23029c487..945113dcb2185 100644
--- a/Documentation/media/uapi/mediactl/request-api.rst
+++ b/Documentation/media/uapi/mediactl/request-api.rst
@@ -1,4 +1,28 @@
-.. SPDX-License-Identifier: GPL-2.0 OR GFDL-1.1-or-later WITH no-invariant-sections
+.. This file is dual-licensed: you can use it either under the terms
+.. of the GPL or the GFDL 1.1+ license, at your option. Note that this
+.. dual licensing only applies to this file, and not this project as a
+.. whole.
+..
+.. a) This file is free software; you can redistribute it and/or
+..    modify it under the terms of the GNU General Public License as
+..    published by the Free Software Foundation; either version 2 of
+..    the License, or (at your option) any later version.
+..
+..    This file is distributed in the hope that it will be useful,
+..    but WITHOUT ANY WARRANTY; without even the implied warranty of
+..    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+..    GNU General Public License for more details.
+..
+.. Or, alternatively,
+..
+.. b) Permission is granted to copy, distribute and/or modify this
+..    document under the terms of the GNU Free Documentation License,
+..    Version 1.1 or any later version published by the Free Software
+..    Foundation, with no Invariant Sections, no Front-Cover Texts
+..    and no Back-Cover Texts. A copy of the license is included at
+..    Documentation/media/uapi/fdl-appendix.rst.
+..
+.. TODO: replace it to GPL-2.0 OR GFDL-1.1-or-later WITH no-invariant-sections
 
 .. _media-request-api:
 
diff --git a/Documentation/media/uapi/mediactl/request-func-close.rst b/Documentation/media/uapi/mediactl/request-func-close.rst
index 098d7f2b95482..dcf3f35bcf176 100644
--- a/Documentation/media/uapi/mediactl/request-func-close.rst
+++ b/Documentation/media/uapi/mediactl/request-func-close.rst
@@ -1,4 +1,28 @@
-.. SPDX-License-Identifier: GPL-2.0 OR GFDL-1.1-or-later WITH no-invariant-sections
+.. This file is dual-licensed: you can use it either under the terms
+.. of the GPL or the GFDL 1.1+ license, at your option. Note that this
+.. dual licensing only applies to this file, and not this project as a
+.. whole.
+..
+.. a) This file is free software; you can redistribute it and/or
+..    modify it under the terms of the GNU General Public License as
+..    published by the Free Software Foundation; either version 2 of
+..    the License, or (at your option) any later version.
+..
+..    This file is distributed in the hope that it will be useful,
+..    but WITHOUT ANY WARRANTY; without even the implied warranty of
+..    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+..    GNU General Public License for more details.
+..
+.. Or, alternatively,
+..
+.. b) Permission is granted to copy, distribute and/or modify this
+..    document under the terms of the GNU Free Documentation License,
+..    Version 1.1 or any later version published by the Free Software
+..    Foundation, with no Invariant Sections, no Front-Cover Texts
+..    and no Back-Cover Texts. A copy of the license is included at
+..    Documentation/media/uapi/fdl-appendix.rst.
+..
+.. TODO: replace it to GPL-2.0 OR GFDL-1.1-or-later WITH no-invariant-sections
 
 .. _request-func-close:
 
diff --git a/Documentation/media/uapi/mediactl/request-func-ioctl.rst b/Documentation/media/uapi/mediactl/request-func-ioctl.rst
index ff7b072a69991..11a22f8878439 100644
--- a/Documentation/media/uapi/mediactl/request-func-ioctl.rst
+++ b/Documentation/media/uapi/mediactl/request-func-ioctl.rst
@@ -1,4 +1,28 @@
-.. SPDX-License-Identifier: GPL-2.0 OR GFDL-1.1-or-later WITH no-invariant-sections
+.. This file is dual-licensed: you can use it either under the terms
+.. of the GPL or the GFDL 1.1+ license, at your option. Note that this
+.. dual licensing only applies to this file, and not this project as a
+.. whole.
+..
+.. a) This file is free software; you can redistribute it and/or
+..    modify it under the terms of the GNU General Public License as
+..    published by the Free Software Foundation; either version 2 of
+..    the License, or (at your option) any later version.
+..
+..    This file is distributed in the hope that it will be useful,
+..    but WITHOUT ANY WARRANTY; without even the implied warranty of
+..    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+..    GNU General Public License for more details.
+..
+.. Or, alternatively,
+..
+.. b) Permission is granted to copy, distribute and/or modify this
+..    document under the terms of the GNU Free Documentation License,
+..    Version 1.1 or any later version published by the Free Software
+..    Foundation, with no Invariant Sections, no Front-Cover Texts
+..    and no Back-Cover Texts. A copy of the license is included at
+..    Documentation/media/uapi/fdl-appendix.rst.
+..
+.. TODO: replace it to GPL-2.0 OR GFDL-1.1-or-later WITH no-invariant-sections
 
 .. _request-func-ioctl:
 
diff --git a/Documentation/media/uapi/mediactl/request-func-poll.rst b/Documentation/media/uapi/mediactl/request-func-poll.rst
index 85191254f381a..2609fd54d519c 100644
--- a/Documentation/media/uapi/mediactl/request-func-poll.rst
+++ b/Documentation/media/uapi/mediactl/request-func-poll.rst
@@ -1,4 +1,28 @@
-.. SPDX-License-Identifier: GPL-2.0 OR GFDL-1.1-or-later WITH no-invariant-sections
+.. This file is dual-licensed: you can use it either under the terms
+.. of the GPL or the GFDL 1.1+ license, at your option. Note that this
+.. dual licensing only applies to this file, and not this project as a
+.. whole.
+..
+.. a) This file is free software; you can redistribute it and/or
+..    modify it under the terms of the GNU General Public License as
+..    published by the Free Software Foundation; either version 2 of
+..    the License, or (at your option) any later version.
+..
+..    This file is distributed in the hope that it will be useful,
+..    but WITHOUT ANY WARRANTY; without even the implied warranty of
+..    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+..    GNU General Public License for more details.
+..
+.. Or, alternatively,
+..
+.. b) Permission is granted to copy, distribute and/or modify this
+..    document under the terms of the GNU Free Documentation License,
+..    Version 1.1 or any later version published by the Free Software
+..    Foundation, with no Invariant Sections, no Front-Cover Texts
+..    and no Back-Cover Texts. A copy of the license is included at
+..    Documentation/media/uapi/fdl-appendix.rst.
+..
+.. TODO: replace it to GPL-2.0 OR GFDL-1.1-or-later WITH no-invariant-sections
 
 .. _request-func-poll:
 
-- 
GitLab


From 45611c61dd503454b2edae00aabe1e429ec49ebe Mon Sep 17 00:00:00 2001
From: Bernd Eckstein <3erndeckstein@gmail.com>
Date: Fri, 23 Nov 2018 13:51:26 +0100
Subject: [PATCH 1469/1890] usbnet: ipheth: fix potential recvmsg bug and
 recvmsg bug 2

The bug is not easily reproducable, as it may occur very infrequently
(we had machines with 20minutes heavy downloading before it occurred)
However, on a virual machine (VMWare on Windows 10 host) it occurred
pretty frequently (1-2 seconds after a speedtest was started)

dev->tx_skb mab be freed via dev_kfree_skb_irq on a callback
before it is set.

This causes the following problems:
- double free of the skb or potential memory leak
- in dmesg: 'recvmsg bug' and 'recvmsg bug 2' and eventually
  general protection fault

Example dmesg output:
[  134.841986] ------------[ cut here ]------------
[  134.841987] recvmsg bug: copied 9C24A555 seq 9C24B557 rcvnxt 9C25A6B3 fl 0
[  134.841993] WARNING: CPU: 7 PID: 2629 at /build/linux-hwe-On9fm7/linux-hwe-4.15.0/net/ipv4/tcp.c:1865 tcp_recvmsg+0x44d/0xab0
[  134.841994] Modules linked in: ipheth(OE) kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel aes_x86_64 crypto_simd glue_helper cryptd vmw_balloon intel_rapl_perf joydev input_leds serio_raw vmw_vsock_vmci_transport vsock shpchp i2c_piix4 mac_hid binfmt_misc vmw_vmci parport_pc ppdev lp parport autofs4 vmw_pvscsi vmxnet3 hid_generic usbhid hid vmwgfx ttm drm_kms_helper syscopyarea sysfillrect mptspi mptscsih sysimgblt ahci psmouse fb_sys_fops pata_acpi mptbase libahci e1000 drm scsi_transport_spi
[  134.842046] CPU: 7 PID: 2629 Comm: python Tainted: G        W  OE    4.15.0-34-generic #37~16.04.1-Ubuntu
[  134.842046] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 05/19/2017
[  134.842048] RIP: 0010:tcp_recvmsg+0x44d/0xab0
[  134.842048] RSP: 0018:ffffa6630422bcc8 EFLAGS: 00010286
[  134.842049] RAX: 0000000000000000 RBX: ffff997616f4f200 RCX: 0000000000000006
[  134.842049] RDX: 0000000000000007 RSI: 0000000000000082 RDI: ffff9976257d6490
[  134.842050] RBP: ffffa6630422bd98 R08: 0000000000000001 R09: 000000000004bba4
[  134.842050] R10: 0000000001e00c6f R11: 000000000004bba4 R12: ffff99760dee3000
[  134.842051] R13: 0000000000000000 R14: ffff99760dee3514 R15: 0000000000000000
[  134.842051] FS:  00007fe332347700(0000) GS:ffff9976257c0000(0000) knlGS:0000000000000000
[  134.842052] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  134.842053] CR2: 0000000001e41000 CR3: 000000020e9b4006 CR4: 00000000003606e0
[  134.842055] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  134.842055] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[  134.842057] Call Trace:
[  134.842060]  ? aa_sk_perm+0x53/0x1a0
[  134.842064]  inet_recvmsg+0x51/0xc0
[  134.842066]  sock_recvmsg+0x43/0x50
[  134.842070]  SYSC_recvfrom+0xe4/0x160
[  134.842072]  ? __schedule+0x3de/0x8b0
[  134.842075]  ? ktime_get_ts64+0x4c/0xf0
[  134.842079]  SyS_recvfrom+0xe/0x10
[  134.842082]  do_syscall_64+0x73/0x130
[  134.842086]  entry_SYSCALL_64_after_hwframe+0x3d/0xa2
[  134.842086] RIP: 0033:0x7fe331f5a81d
[  134.842088] RSP: 002b:00007ffe8da98398 EFLAGS: 00000246 ORIG_RAX: 000000000000002d
[  134.842090] RAX: ffffffffffffffda RBX: ffffffffffffffff RCX: 00007fe331f5a81d
[  134.842094] RDX: 00000000000003fb RSI: 0000000001e00874 RDI: 0000000000000003
[  134.842095] RBP: 00007fe32f642c70 R08: 0000000000000000 R09: 0000000000000000
[  134.842097] R10: 0000000000000000 R11: 0000000000000246 R12: 00007fe332347698
[  134.842099] R13: 0000000001b7e0a0 R14: 0000000001e00874 R15: 0000000000000000
[  134.842103] Code: 24 fd ff ff e9 cc fe ff ff 48 89 d8 41 8b 8c 24 10 05 00 00 44 8b 45 80 48 c7 c7 08 bd 59 8b 48 89 85 68 ff ff ff e8 b3 c4 7d ff <0f> 0b 48 8b 85 68 ff ff ff e9 e9 fe ff ff 41 8b 8c 24 10 05 00
[  134.842126] ---[ end trace b7138fc08c83147f ]---
[  134.842144] general protection fault: 0000 [#1] SMP PTI
[  134.842145] Modules linked in: ipheth(OE) kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel aes_x86_64 crypto_simd glue_helper cryptd vmw_balloon intel_rapl_perf joydev input_leds serio_raw vmw_vsock_vmci_transport vsock shpchp i2c_piix4 mac_hid binfmt_misc vmw_vmci parport_pc ppdev lp parport autofs4 vmw_pvscsi vmxnet3 hid_generic usbhid hid vmwgfx ttm drm_kms_helper syscopyarea sysfillrect mptspi mptscsih sysimgblt ahci psmouse fb_sys_fops pata_acpi mptbase libahci e1000 drm scsi_transport_spi
[  134.842161] CPU: 7 PID: 2629 Comm: python Tainted: G        W  OE    4.15.0-34-generic #37~16.04.1-Ubuntu
[  134.842162] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 05/19/2017
[  134.842164] RIP: 0010:tcp_close+0x2c6/0x440
[  134.842165] RSP: 0018:ffffa6630422bde8 EFLAGS: 00010202
[  134.842167] RAX: 0000000000000000 RBX: ffff99760dee3000 RCX: 0000000180400034
[  134.842168] RDX: 5c4afd407207a6c4 RSI: ffffe868495bd300 RDI: ffff997616f4f200
[  134.842169] RBP: ffffa6630422be08 R08: 0000000016f4d401 R09: 0000000180400034
[  134.842169] R10: ffffa6630422bd98 R11: 0000000000000000 R12: 000000000000600c
[  134.842170] R13: 0000000000000000 R14: ffff99760dee30c8 R15: ffff9975bd44fe00
[  134.842171] FS:  00007fe332347700(0000) GS:ffff9976257c0000(0000) knlGS:0000000000000000
[  134.842173] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  134.842174] CR2: 0000000001e41000 CR3: 000000020e9b4006 CR4: 00000000003606e0
[  134.842177] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  134.842178] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[  134.842179] Call Trace:
[  134.842181]  inet_release+0x42/0x70
[  134.842183]  __sock_release+0x42/0xb0
[  134.842184]  sock_close+0x15/0x20
[  134.842187]  __fput+0xea/0x220
[  134.842189]  ____fput+0xe/0x10
[  134.842191]  task_work_run+0x8a/0xb0
[  134.842193]  exit_to_usermode_loop+0xc4/0xd0
[  134.842195]  do_syscall_64+0xf4/0x130
[  134.842197]  entry_SYSCALL_64_after_hwframe+0x3d/0xa2
[  134.842197] RIP: 0033:0x7fe331f5a560
[  134.842198] RSP: 002b:00007ffe8da982e8 EFLAGS: 00000246 ORIG_RAX: 0000000000000003
[  134.842200] RAX: 0000000000000000 RBX: 00007fe32f642c70 RCX: 00007fe331f5a560
[  134.842201] RDX: 00000000008f5320 RSI: 0000000001cd4b50 RDI: 0000000000000003
[  134.842202] RBP: 00007fe32f6500f8 R08: 000000000000003c R09: 00000000009343c0
[  134.842203] R10: 0000000000000000 R11: 0000000000000246 R12: 00007fe32f6500d0
[  134.842204] R13: 00000000008f5320 R14: 00000000008f5320 R15: 0000000001cd4770
[  134.842205] Code: c8 00 00 00 45 31 e4 49 39 fe 75 4d eb 50 83 ab d8 00 00 00 01 48 8b 17 48 8b 47 08 48 c7 07 00 00 00 00 48 c7 47 08 00 00 00 00 <48> 89 42 08 48 89 10 0f b6 57 34 8b 47 2c 2b 47 28 83 e2 01 80
[  134.842226] RIP: tcp_close+0x2c6/0x440 RSP: ffffa6630422bde8
[  134.842227] ---[ end trace b7138fc08c831480 ]---

The proposed patch eliminates a potential racing condition.
Before, usb_submit_urb was called and _after_ that, the skb was attached
(dev->tx_skb). So, on a callback it was possible, however unlikely that the
skb was freed before it was set. That way (because dev->tx_skb was not set
to NULL after it was freed), it could happen that a skb from a earlier
transmission was freed a second time (and the skb we should have freed did
not get freed at all)

Now we free the skb directly in ipheth_tx(). It is not passed to the
callback anymore, eliminating the posibility of a double free of the same
skb. Depending on the retval of usb_submit_urb() we use dev_kfree_skb_any()
respectively dev_consume_skb_any() to free the skb.

Signed-off-by: Oliver Zweigle <Oliver.Zweigle@faro.com>
Signed-off-by: Bernd Eckstein <3ernd.Eckstein@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/ipheth.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c
index 7275761a1177c..3d8a70d3ea9bd 100644
--- a/drivers/net/usb/ipheth.c
+++ b/drivers/net/usb/ipheth.c
@@ -140,7 +140,6 @@ struct ipheth_device {
 	struct usb_device *udev;
 	struct usb_interface *intf;
 	struct net_device *net;
-	struct sk_buff *tx_skb;
 	struct urb *tx_urb;
 	struct urb *rx_urb;
 	unsigned char *tx_buf;
@@ -230,6 +229,7 @@ static void ipheth_rcvbulk_callback(struct urb *urb)
 	case -ENOENT:
 	case -ECONNRESET:
 	case -ESHUTDOWN:
+	case -EPROTO:
 		return;
 	case 0:
 		break;
@@ -281,7 +281,6 @@ static void ipheth_sndbulk_callback(struct urb *urb)
 		dev_err(&dev->intf->dev, "%s: urb status: %d\n",
 		__func__, status);
 
-	dev_kfree_skb_irq(dev->tx_skb);
 	if (status == 0)
 		netif_wake_queue(dev->net);
 	else
@@ -423,7 +422,7 @@ static int ipheth_tx(struct sk_buff *skb, struct net_device *net)
 	if (skb->len > IPHETH_BUF_SIZE) {
 		WARN(1, "%s: skb too large: %d bytes\n", __func__, skb->len);
 		dev->net->stats.tx_dropped++;
-		dev_kfree_skb_irq(skb);
+		dev_kfree_skb_any(skb);
 		return NETDEV_TX_OK;
 	}
 
@@ -443,12 +442,11 @@ static int ipheth_tx(struct sk_buff *skb, struct net_device *net)
 		dev_err(&dev->intf->dev, "%s: usb_submit_urb: %d\n",
 			__func__, retval);
 		dev->net->stats.tx_errors++;
-		dev_kfree_skb_irq(skb);
+		dev_kfree_skb_any(skb);
 	} else {
-		dev->tx_skb = skb;
-
 		dev->net->stats.tx_packets++;
 		dev->net->stats.tx_bytes += skb->len;
+		dev_consume_skb_any(skb);
 		netif_stop_queue(net);
 	}
 
-- 
GitLab


From c85ddecae6e5e82ca3ae6f20c63f1d865e2ff5ea Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Fri, 23 Nov 2018 19:41:29 +0100
Subject: [PATCH 1470/1890] net: phy: add workaround for issue where PHY driver
 doesn't bind to the device

After switching the r8169 driver to use phylib some user reported that
their network is broken. This was caused by the genphy PHY driver being
used instead of the dedicated PHY driver for the RTL8211B. Users
reported that loading the Realtek PHY driver module upfront fixes the
issue. See also this mail thread:
https://marc.info/?t=154279781800003&r=1&w=2
The issue is quite weird and the root cause seems to be somewhere in
the base driver core. The patch works around the issue and may be
removed once the actual issue is fixed.

The Fixes tag refers to the first reported occurrence of the issue.
The issue itself may have been existing much longer and it may affect
users of other network chips as well. Users typically will recognize
this issue only if their PHY stops working when being used with the
genphy driver.

Fixes: f1e911d5d0df ("r8169: add basic phylib support")
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy_device.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index ab33d1777132e..23ee3967c1667 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -2197,6 +2197,14 @@ int phy_driver_register(struct phy_driver *new_driver, struct module *owner)
 	new_driver->mdiodrv.driver.remove = phy_remove;
 	new_driver->mdiodrv.driver.owner = owner;
 
+	/* The following works around an issue where the PHY driver doesn't bind
+	 * to the device, resulting in the genphy driver being used instead of
+	 * the dedicated driver. The root cause of the issue isn't known yet
+	 * and seems to be in the base driver core. Once this is fixed we may
+	 * remove this workaround.
+	 */
+	new_driver->mdiodrv.driver.probe_type = PROBE_FORCE_SYNCHRONOUS;
+
 	retval = driver_register(&new_driver->mdiodrv.driver);
 	if (retval) {
 		pr_err("%s: Error %d in registering driver\n",
-- 
GitLab


From a8842e975503191e4982efb886299fc19972c97f Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Sun, 25 Nov 2018 23:21:08 +0000
Subject: [PATCH 1471/1890] firestream: fix spelling mistake: "Inititing" ->
 "Initializing"

There are spelling mistakes in debug messages, fix them.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/atm/firestream.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/atm/firestream.c b/drivers/atm/firestream.c
index 4e46dc9e41ad0..11e1663bdc4de 100644
--- a/drivers/atm/firestream.c
+++ b/drivers/atm/firestream.c
@@ -1410,7 +1410,7 @@ static int init_q(struct fs_dev *dev, struct queue *txq, int queue,
 
 	func_enter ();
 
-	fs_dprintk (FS_DEBUG_INIT, "Inititing queue at %x: %d entries:\n", 
+	fs_dprintk (FS_DEBUG_INIT, "Initializing queue at %x: %d entries:\n",
 		    queue, nentries);
 
 	p = aligned_kmalloc (sz, GFP_KERNEL, 0x10);
@@ -1443,7 +1443,7 @@ static int init_fp(struct fs_dev *dev, struct freepool *fp, int queue,
 {
 	func_enter ();
 
-	fs_dprintk (FS_DEBUG_INIT, "Inititing free pool at %x:\n", queue);
+	fs_dprintk (FS_DEBUG_INIT, "Initializing free pool at %x:\n", queue);
 
 	write_fs (dev, FP_CNF(queue), (bufsize * RBFP_RBS) | RBFP_RBSVAL | RBFP_CME);
 	write_fs (dev, FP_SA(queue),  0);
-- 
GitLab


From 0d32f17717e65e76cbdb248374dd162acdfe2fff Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 26 Nov 2018 14:52:44 +0800
Subject: [PATCH 1472/1890] sctp: increase sk_wmem_alloc when head->truesize is
 increased

I changed to count sk_wmem_alloc by skb truesize instead of 1 to
fix the sk_wmem_alloc leak caused by later truesize's change in
xfrm in Commit 02968ccf0125 ("sctp: count sk_wmem_alloc by skb
truesize in sctp_packet_transmit").

But I should have also increased sk_wmem_alloc when head->truesize
is increased in sctp_packet_gso_append() as xfrm does. Otherwise,
sctp gso packet will cause sk_wmem_alloc underflow.

Fixes: 02968ccf0125 ("sctp: count sk_wmem_alloc by skb truesize in sctp_packet_transmit")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/output.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/sctp/output.c b/net/sctp/output.c
index b0e74a3e77ec5..025f48e14a91f 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -410,6 +410,7 @@ static void sctp_packet_gso_append(struct sk_buff *head, struct sk_buff *skb)
 	head->truesize += skb->truesize;
 	head->data_len += skb->len;
 	head->len += skb->len;
+	refcount_add(skb->truesize, &head->sk->sk_wmem_alloc);
 
 	__skb_header_release(skb);
 }
-- 
GitLab


From f6f8c1c09c224076a6c9ca3d97c24b81698f98f8 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Mon, 26 Nov 2018 17:59:40 +1100
Subject: [PATCH 1473/1890] sparc: suppress the implicit-fallthrough warning

sparc builds with -Werror

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/signal_64.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c
index e9de1803a22e0..ca70787efd8e0 100644
--- a/arch/sparc/kernel/signal_64.c
+++ b/arch/sparc/kernel/signal_64.c
@@ -533,6 +533,7 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0)
 				regs->tpc -= 4;
 				regs->tnpc -= 4;
 				pt_regs_clear_syscall(regs);
+				/* fall through */
 			case ERESTART_RESTARTBLOCK:
 				regs->u_regs[UREG_G1] = __NR_restart_syscall;
 				regs->tpc -= 4;
-- 
GitLab


From e945067d95cc99a6e4601b6bda665f30c7917c3b Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Mon, 26 Nov 2018 18:11:46 +1100
Subject: [PATCH 1474/1890] sparc32: suppress an implicit-fallthrough warning

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/signal32.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c
index 4c5b3fcbed94c..e800ce13cc6e5 100644
--- a/arch/sparc/kernel/signal32.c
+++ b/arch/sparc/kernel/signal32.c
@@ -683,6 +683,7 @@ void do_signal32(struct pt_regs * regs)
 				regs->tpc -= 4;
 				regs->tnpc -= 4;
 				pt_regs_clear_syscall(regs);
+				/* fall through */
 			case ERESTART_RESTARTBLOCK:
 				regs->u_regs[UREG_G1] = __NR_restart_syscall;
 				regs->tpc -= 4;
-- 
GitLab


From e399ef194171717aefa78838d02b1e92267be2b1 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Mon, 26 Nov 2018 18:45:26 +1100
Subject: [PATCH 1475/1890] sparc32: supress another implicit-fallthrough
 warning

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/signal_32.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c
index 5665261cee37f..83953780ca016 100644
--- a/arch/sparc/kernel/signal_32.c
+++ b/arch/sparc/kernel/signal_32.c
@@ -508,6 +508,7 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0)
 				regs->pc -= 4;
 				regs->npc -= 4;
 				pt_regs_clear_syscall(regs);
+				/* fall through */
 			case ERESTART_RESTARTBLOCK:
 				regs->u_regs[UREG_G1] = __NR_restart_syscall;
 				regs->pc -= 4;
-- 
GitLab


From 24a6d2dd263bc910de018c78d1148b3e33b94512 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Date: Mon, 26 Nov 2018 15:07:16 +0100
Subject: [PATCH 1476/1890] net: thunderx: fix NULL pointer dereference in
 nic_remove

Fix a possible NULL pointer dereference in nic_remove routine
removing the nicpf module if nic_probe fails.
The issue can be triggered with the following reproducer:

$rmmod nicvf
$rmmod nicpf

[  521.412008] Unable to handle kernel access to user memory outside uaccess routines at virtual address 0000000000000014
[  521.422777] Mem abort info:
[  521.425561]   ESR = 0x96000004
[  521.428624]   Exception class = DABT (current EL), IL = 32 bits
[  521.434535]   SET = 0, FnV = 0
[  521.437579]   EA = 0, S1PTW = 0
[  521.440730] Data abort info:
[  521.443603]   ISV = 0, ISS = 0x00000004
[  521.447431]   CM = 0, WnR = 0
[  521.450417] user pgtable: 4k pages, 48-bit VAs, pgdp = 0000000072a3da42
[  521.457022] [0000000000000014] pgd=0000000000000000
[  521.461916] Internal error: Oops: 96000004 [#1] SMP
[  521.511801] Hardware name: GIGABYTE H270-T70/MT70-HD0, BIOS T49 02/02/2018
[  521.518664] pstate: 80400005 (Nzcv daif +PAN -UAO)
[  521.523451] pc : nic_remove+0x24/0x88 [nicpf]
[  521.527808] lr : pci_device_remove+0x48/0xd8
[  521.532066] sp : ffff000013433cc0
[  521.535370] x29: ffff000013433cc0 x28: ffff810f6ac50000
[  521.540672] x27: 0000000000000000 x26: 0000000000000000
[  521.545974] x25: 0000000056000000 x24: 0000000000000015
[  521.551274] x23: ffff8007ff89a110 x22: ffff000001667070
[  521.556576] x21: ffff8007ffb170b0 x20: ffff8007ffb17000
[  521.561877] x19: 0000000000000000 x18: 0000000000000025
[  521.567178] x17: 0000000000000000 x16: 000000000000010ffc33ff98 x8 : 0000000000000000
[  521.593683] x7 : 0000000000000000 x6 : 0000000000000001
[  521.598983] x5 : 0000000000000002 x4 : 0000000000000003
[  521.604284] x3 : ffff8007ffb17184 x2 : ffff8007ffb17184
[  521.609585] x1 : ffff000001662118 x0 : ffff000008557be0
[  521.614887] Process rmmod (pid: 1897, stack limit = 0x00000000859535c3)
[  521.621490] Call trace:
[  521.623928]  nic_remove+0x24/0x88 [nicpf]
[  521.627927]  pci_device_remove+0x48/0xd8
[  521.631847]  device_release_driver_internal+0x1b0/0x248
[  521.637062]  driver_detach+0x50/0xc0
[  521.640628]  bus_remove_driver+0x60/0x100
[  521.644627]  driver_unregister+0x34/0x60
[  521.648538]  pci_unregister_driver+0x24/0xd8
[  521.652798]  nic_cleanup_module+0x14/0x111c [nicpf]
[  521.657672]  __arm64_sys_delete_module+0x150/0x218
[  521.662460]  el0_svc_handler+0x94/0x110
[  521.666287]  el0_svc+0x8/0xc
[  521.669160] Code: aa1e03e0 9102c295 d503201f f9404eb3 (b9401660)

Fixes: 4863dea3fab0 ("net: Adding support for Cavium ThunderX network controller")
Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/thunder/nic_main.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c
index 55af04fa03a77..6c8dcb65ff031 100644
--- a/drivers/net/ethernet/cavium/thunder/nic_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nic_main.c
@@ -1441,6 +1441,9 @@ static void nic_remove(struct pci_dev *pdev)
 {
 	struct nicpf *nic = pci_get_drvdata(pdev);
 
+	if (!nic)
+		return;
+
 	if (nic->flags & NIC_SRIOV_ENABLED)
 		pci_disable_sriov(pdev);
 
-- 
GitLab


From 1d510657ac7e486dd443cabbbd0677253aeb705f Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 26 Nov 2018 15:53:54 +0000
Subject: [PATCH 1477/1890] qed: fix spelling mistake "attnetion" ->
 "attention"

The text in array s_igu_fifo_error_strs contains a spelling mistake,
fix it.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_debug.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c
index 78a638ec7c0ae..979f1e4bc18bf 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_debug.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c
@@ -6071,7 +6071,7 @@ static const char * const s_igu_fifo_error_strs[] = {
 	"no error",
 	"length error",
 	"function disabled",
-	"VF sent command to attnetion address",
+	"VF sent command to attention address",
 	"host sent prod update command",
 	"read of during interrupt register while in MIMD mode",
 	"access to PXP BAR reserved address",
-- 
GitLab


From 4b5adba07c945e191ac2ecd83627dd520dca152f Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 26 Nov 2018 15:34:01 +0000
Subject: [PATCH 1478/1890] net: via: via-velocity: fix spelling mistake
 "alignement" -> "alignment"

The text in array velocity_gstrings contains a spelling mistake,
rename rx_frame_alignement_errors to rx_frame_alignment_errors.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/via/via-velocity.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c
index ef9538ee53d0d..82412691ee66b 100644
--- a/drivers/net/ethernet/via/via-velocity.c
+++ b/drivers/net/ethernet/via/via-velocity.c
@@ -3605,7 +3605,7 @@ static const char velocity_gstrings[][ETH_GSTRING_LEN] = {
 	"tx_jumbo",
 	"rx_mac_control_frames",
 	"tx_mac_control_frames",
-	"rx_frame_alignement_errors",
+	"rx_frame_alignment_errors",
 	"rx_long_ok",
 	"rx_long_err",
 	"tx_sqe_errors",
-- 
GitLab


From cc5922054131f9abefdc0622ae64fc55e6b2671d Mon Sep 17 00:00:00 2001
From: Bryan Whitehead <Bryan.Whitehead@microchip.com>
Date: Mon, 26 Nov 2018 12:04:57 -0500
Subject: [PATCH 1479/1890] lan743x: fix return value for lan743x_tx_napi_poll

The lan743x driver, when under heavy traffic load, has been noticed
to sometimes hang, or cause a kernel panic.

Debugging reveals that the TX napi poll routine was returning
the wrong value, 'weight'. Most other drivers return 0.
And call napi_complete, instead of napi_complete_done.

Additionally when creating the tx napi poll routine.
Changed netif_napi_add, to netif_tx_napi_add.

Updates for v3:
    changed 'fixes' tag to match defined format

Updates for v2:
use napi_complete, instead of napi_complete_done in
    lan743x_tx_napi_poll
use netif_tx_napi_add, instead of netif_napi_add for
    registration of tx napi poll routine

fixes: 23f0703c125b ("lan743x: Add main source files for new lan743x driver")
Signed-off-by: Bryan Whitehead <Bryan.Whitehead@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/microchip/lan743x_main.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
index 867cddba840fe..d627129c0adc5 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.c
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -1672,7 +1672,7 @@ static int lan743x_tx_napi_poll(struct napi_struct *napi, int weight)
 		netif_wake_queue(adapter->netdev);
 	}
 
-	if (!napi_complete_done(napi, weight))
+	if (!napi_complete(napi))
 		goto done;
 
 	/* enable isr */
@@ -1681,7 +1681,7 @@ static int lan743x_tx_napi_poll(struct napi_struct *napi, int weight)
 	lan743x_csr_read(adapter, INT_STS);
 
 done:
-	return weight;
+	return 0;
 }
 
 static void lan743x_tx_ring_cleanup(struct lan743x_tx *tx)
@@ -1870,9 +1870,9 @@ static int lan743x_tx_open(struct lan743x_tx *tx)
 	tx->vector_flags = lan743x_intr_get_vector_flags(adapter,
 							 INT_BIT_DMA_TX_
 							 (tx->channel_number));
-	netif_napi_add(adapter->netdev,
-		       &tx->napi, lan743x_tx_napi_poll,
-		       tx->ring_size - 1);
+	netif_tx_napi_add(adapter->netdev,
+			  &tx->napi, lan743x_tx_napi_poll,
+			  tx->ring_size - 1);
 	napi_enable(&tx->napi);
 
 	data = 0;
-- 
GitLab


From ec835f891232d7763dea9da0358f31e24ca6dfb7 Mon Sep 17 00:00:00 2001
From: Jon Maloy <donmalo99@gmail.com>
Date: Mon, 26 Nov 2018 12:26:14 -0500
Subject: [PATCH 1480/1890] tipc: fix lockdep warning during node delete

We see the following lockdep warning:

[ 2284.078521] ======================================================
[ 2284.078604] WARNING: possible circular locking dependency detected
[ 2284.078604] 4.19.0+ #42 Tainted: G            E
[ 2284.078604] ------------------------------------------------------
[ 2284.078604] rmmod/254 is trying to acquire lock:
[ 2284.078604] 00000000acd94e28 ((&n->timer)#2){+.-.}, at: del_timer_sync+0x5/0xa0
[ 2284.078604]
[ 2284.078604] but task is already holding lock:
[ 2284.078604] 00000000f997afc0 (&(&tn->node_list_lock)->rlock){+.-.}, at: tipc_node_stop+0xac/0x190 [tipc]
[ 2284.078604]
[ 2284.078604] which lock already depends on the new lock.
[ 2284.078604]
[ 2284.078604]
[ 2284.078604] the existing dependency chain (in reverse order) is:
[ 2284.078604]
[ 2284.078604] -> #1 (&(&tn->node_list_lock)->rlock){+.-.}:
[ 2284.078604]        tipc_node_timeout+0x20a/0x330 [tipc]
[ 2284.078604]        call_timer_fn+0xa1/0x280
[ 2284.078604]        run_timer_softirq+0x1f2/0x4d0
[ 2284.078604]        __do_softirq+0xfc/0x413
[ 2284.078604]        irq_exit+0xb5/0xc0
[ 2284.078604]        smp_apic_timer_interrupt+0xac/0x210
[ 2284.078604]        apic_timer_interrupt+0xf/0x20
[ 2284.078604]        default_idle+0x1c/0x140
[ 2284.078604]        do_idle+0x1bc/0x280
[ 2284.078604]        cpu_startup_entry+0x19/0x20
[ 2284.078604]        start_secondary+0x187/0x1c0
[ 2284.078604]        secondary_startup_64+0xa4/0xb0
[ 2284.078604]
[ 2284.078604] -> #0 ((&n->timer)#2){+.-.}:
[ 2284.078604]        del_timer_sync+0x34/0xa0
[ 2284.078604]        tipc_node_delete+0x1a/0x40 [tipc]
[ 2284.078604]        tipc_node_stop+0xcb/0x190 [tipc]
[ 2284.078604]        tipc_net_stop+0x154/0x170 [tipc]
[ 2284.078604]        tipc_exit_net+0x16/0x30 [tipc]
[ 2284.078604]        ops_exit_list.isra.8+0x36/0x70
[ 2284.078604]        unregister_pernet_operations+0x87/0xd0
[ 2284.078604]        unregister_pernet_subsys+0x1d/0x30
[ 2284.078604]        tipc_exit+0x11/0x6f2 [tipc]
[ 2284.078604]        __x64_sys_delete_module+0x1df/0x240
[ 2284.078604]        do_syscall_64+0x66/0x460
[ 2284.078604]        entry_SYSCALL_64_after_hwframe+0x49/0xbe
[ 2284.078604]
[ 2284.078604] other info that might help us debug this:
[ 2284.078604]
[ 2284.078604]  Possible unsafe locking scenario:
[ 2284.078604]
[ 2284.078604]        CPU0                    CPU1
[ 2284.078604]        ----                    ----
[ 2284.078604]   lock(&(&tn->node_list_lock)->rlock);
[ 2284.078604]                                lock((&n->timer)#2);
[ 2284.078604]                                lock(&(&tn->node_list_lock)->rlock);
[ 2284.078604]   lock((&n->timer)#2);
[ 2284.078604]
[ 2284.078604]  *** DEADLOCK ***
[ 2284.078604]
[ 2284.078604] 3 locks held by rmmod/254:
[ 2284.078604]  #0: 000000003368be9b (pernet_ops_rwsem){+.+.}, at: unregister_pernet_subsys+0x15/0x30
[ 2284.078604]  #1: 0000000046ed9c86 (rtnl_mutex){+.+.}, at: tipc_net_stop+0x144/0x170 [tipc]
[ 2284.078604]  #2: 00000000f997afc0 (&(&tn->node_list_lock)->rlock){+.-.}, at: tipc_node_stop+0xac/0x19
[...}

The reason is that the node timer handler sometimes needs to delete a
node which has been disconnected for too long. To do this, it grabs
the lock 'node_list_lock', which may at the same time be held by the
generic node cleanup function, tipc_node_stop(), during module removal.
Since the latter is calling del_timer_sync() inside the same lock, we
have a potential deadlock.

We fix this letting the timer cleanup function use spin_trylock()
instead of just spin_lock(), and when it fails to grab the lock it
just returns so that the timer handler can terminate its execution.
This is safe to do, since tipc_node_stop() anyway is about to
delete both the timer and the node instance.

Fixes: 6a939f365bdb ("tipc: Auto removal of peer down node instance")
Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/node.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/tipc/node.c b/net/tipc/node.c
index 2afc4f8c37a74..4880197664335 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -584,12 +584,15 @@ static void  tipc_node_clear_links(struct tipc_node *node)
 /* tipc_node_cleanup - delete nodes that does not
  * have active links for NODE_CLEANUP_AFTER time
  */
-static int tipc_node_cleanup(struct tipc_node *peer)
+static bool tipc_node_cleanup(struct tipc_node *peer)
 {
 	struct tipc_net *tn = tipc_net(peer->net);
 	bool deleted = false;
 
-	spin_lock_bh(&tn->node_list_lock);
+	/* If lock held by tipc_node_stop() the node will be deleted anyway */
+	if (!spin_trylock_bh(&tn->node_list_lock))
+		return false;
+
 	tipc_node_write_lock(peer);
 
 	if (!node_is_up(peer) && time_after(jiffies, peer->delete_at)) {
-- 
GitLab


From 4df5ce9bc03e47d05f400e64aa32a82ec4cef419 Mon Sep 17 00:00:00 2001
From: Bryan Whitehead <Bryan.Whitehead@microchip.com>
Date: Mon, 26 Nov 2018 12:27:10 -0500
Subject: [PATCH 1481/1890] lan743x: Enable driver to work with LAN7431

This driver was designed to work with both LAN7430 and LAN7431.
The only difference between the two is the LAN7431 has support
for external phy.

This change adds LAN7431 to the list of recognized devices
supported by this driver.

Updates for v2:
    changed 'fixes' tag to match defined format

fixes: 23f0703c125b ("lan743x: Add main source files for new lan743x driver")
Signed-off-by: Bryan Whitehead <Bryan.Whitehead@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/microchip/lan743x_main.c | 1 +
 drivers/net/ethernet/microchip/lan743x_main.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
index d627129c0adc5..e8ca98c070f68 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.c
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -3017,6 +3017,7 @@ static const struct dev_pm_ops lan743x_pm_ops = {
 
 static const struct pci_device_id lan743x_pcidev_tbl[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_SMSC, PCI_DEVICE_ID_SMSC_LAN7430) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_SMSC, PCI_DEVICE_ID_SMSC_LAN7431) },
 	{ 0, }
 };
 
diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h
index 0e82b6368798a..2d6eea18973e8 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.h
+++ b/drivers/net/ethernet/microchip/lan743x_main.h
@@ -548,6 +548,7 @@ struct lan743x_adapter;
 /* SMSC acquired EFAR late 1990's, MCHP acquired SMSC 2012 */
 #define PCI_VENDOR_ID_SMSC		PCI_VENDOR_ID_EFAR
 #define PCI_DEVICE_ID_SMSC_LAN7430	(0x7430)
+#define PCI_DEVICE_ID_SMSC_LAN7431	(0x7431)
 
 #define PCI_CONFIG_LENGTH		(0x1000)
 
-- 
GitLab


From 07f7175b43827640d1e69c9eded89aa089a234b4 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Sun, 18 Nov 2018 17:14:10 -0500
Subject: [PATCH 1482/1890] x86/function_graph: Simplify with
 function_graph_enter()

The function_graph_enter() function does the work of calling the function
graph hook function and the management of the shadow stack, simplifying the
work done in the architecture dependent prepare_ftrace_return().

Have x86 use the new code, and remove the shadow stack management as well as
having to set up the trace structure.

This is needed to prepare for a fix of a design bug on how the curr_ret_stack
is used.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: x86@kernel.org
Cc: stable@kernel.org
Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback")
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 arch/x86/kernel/ftrace.c | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 01ebcb6f263e3..7ee8067cbf45c 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -994,7 +994,6 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
 {
 	unsigned long old;
 	int faulted;
-	struct ftrace_graph_ent trace;
 	unsigned long return_hooker = (unsigned long)
 				&return_to_handler;
 
@@ -1046,19 +1045,7 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
 		return;
 	}
 
-	trace.func = self_addr;
-	trace.depth = current->curr_ret_stack + 1;
-
-	/* Only trace if the calling function expects to */
-	if (!ftrace_graph_entry(&trace)) {
+	if (function_graph_enter(old, self_addr, frame_pointer, parent))
 		*parent = old;
-		return;
-	}
-
-	if (ftrace_push_return_trace(old, self_addr, &trace.depth,
-				     frame_pointer, parent) == -EBUSY) {
-		*parent = old;
-		return;
-	}
 }
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
-- 
GitLab


From f1f5b14afd7cce39e6a9b25c685e1ea34c231096 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Sun, 18 Nov 2018 17:19:26 -0500
Subject: [PATCH 1483/1890] ARM: function_graph: Simplify with
 function_graph_enter()

The function_graph_enter() function does the work of calling the function
graph hook function and the management of the shadow stack, simplifying the
work done in the architecture dependent prepare_ftrace_return().

Have ARM use the new code, and remove the shadow stack management as well as
having to set up the trace structure.

This is needed to prepare for a fix of a design bug on how the curr_ret_stack
is used.

Cc: Russell King <linux@armlinux.org.uk>
Cc: linux-arm-kernel@lists.infradead.org
Cc: stable@kernel.org
Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback")
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 arch/arm/kernel/ftrace.c | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c
index 0142fcfcc3d37..bda949fd84e8b 100644
--- a/arch/arm/kernel/ftrace.c
+++ b/arch/arm/kernel/ftrace.c
@@ -183,9 +183,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
 			   unsigned long frame_pointer)
 {
 	unsigned long return_hooker = (unsigned long) &return_to_handler;
-	struct ftrace_graph_ent trace;
 	unsigned long old;
-	int err;
 
 	if (unlikely(atomic_read(&current->tracing_graph_pause)))
 		return;
@@ -193,21 +191,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
 	old = *parent;
 	*parent = return_hooker;
 
-	trace.func = self_addr;
-	trace.depth = current->curr_ret_stack + 1;
-
-	/* Only trace if the calling function expects to */
-	if (!ftrace_graph_entry(&trace)) {
+	if (function_graph_enter(old, self_addr, frame_pointer, NULL))
 		*parent = old;
-		return;
-	}
-
-	err = ftrace_push_return_trace(old, self_addr, &trace.depth,
-				       frame_pointer, NULL);
-	if (err == -EBUSY) {
-		*parent = old;
-		return;
-	}
 }
 
 #ifdef CONFIG_DYNAMIC_FTRACE
-- 
GitLab


From 01e0ab2c4ff12358f15a856fd1a7bbea0670972b Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Sun, 18 Nov 2018 17:21:51 -0500
Subject: [PATCH 1484/1890] arm64: function_graph: Simplify with
 function_graph_enter()

The function_graph_enter() function does the work of calling the function
graph hook function and the management of the shadow stack, simplifying the
work done in the architecture dependent prepare_ftrace_return().

Have arm64 use the new code, and remove the shadow stack management as well as
having to set up the trace structure.

This is needed to prepare for a fix of a design bug on how the curr_ret_stack
is used.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: stable@kernel.org
Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback")
Acked-by: Will Deacon <will.deacon@arm.com>
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 arch/arm64/kernel/ftrace.c | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index 50986e388d2b2..57e962290df3a 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -216,8 +216,6 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
 {
 	unsigned long return_hooker = (unsigned long)&return_to_handler;
 	unsigned long old;
-	struct ftrace_graph_ent trace;
-	int err;
 
 	if (unlikely(atomic_read(&current->tracing_graph_pause)))
 		return;
@@ -229,18 +227,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
 	 */
 	old = *parent;
 
-	trace.func = self_addr;
-	trace.depth = current->curr_ret_stack + 1;
-
-	/* Only trace if the calling function expects to */
-	if (!ftrace_graph_entry(&trace))
-		return;
-
-	err = ftrace_push_return_trace(old, self_addr, &trace.depth,
-				       frame_pointer, NULL);
-	if (err == -EBUSY)
-		return;
-	else
+	if (!function_graph_enter(old, self_addr, frame_pointer, NULL))
 		*parent = return_hooker;
 }
 
-- 
GitLab


From 556763e5a500d71879d632867b75826551acd49c Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Sun, 18 Nov 2018 17:23:30 -0500
Subject: [PATCH 1485/1890] microblaze: function_graph: Simplify with
 function_graph_enter()

The function_graph_enter() function does the work of calling the function
graph hook function and the management of the shadow stack, simplifying the
work done in the architecture dependent prepare_ftrace_return().

Have microblaze use the new code, and remove the shadow stack management as well as
having to set up the trace structure.

This is needed to prepare for a fix of a design bug on how the curr_ret_stack
is used.

Cc: Michal Simek <monstr@monstr.eu>
Cc: stable@kernel.org
Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback")
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 arch/microblaze/kernel/ftrace.c | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

diff --git a/arch/microblaze/kernel/ftrace.c b/arch/microblaze/kernel/ftrace.c
index d57563c58a26b..224eea40e1ee8 100644
--- a/arch/microblaze/kernel/ftrace.c
+++ b/arch/microblaze/kernel/ftrace.c
@@ -22,8 +22,7 @@
 void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
 {
 	unsigned long old;
-	int faulted, err;
-	struct ftrace_graph_ent trace;
+	int faulted;
 	unsigned long return_hooker = (unsigned long)
 				&return_to_handler;
 
@@ -63,18 +62,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
 		return;
 	}
 
-	err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0, NULL);
-	if (err == -EBUSY) {
+	if (function_graph_enter(old, self_addr, 0, NULL))
 		*parent = old;
-		return;
-	}
-
-	trace.func = self_addr;
-	/* Only trace if the calling function expects to */
-	if (!ftrace_graph_entry(&trace)) {
-		current->curr_ret_stack--;
-		*parent = old;
-	}
 }
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 
-- 
GitLab


From 8712b27c5723c26400a2b350faf1d6d9fd7ffaad Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Sun, 18 Nov 2018 17:25:18 -0500
Subject: [PATCH 1486/1890] MIPS: function_graph: Simplify with
 function_graph_enter()

The function_graph_enter() function does the work of calling the function
graph hook function and the management of the shadow stack, simplifying the
work done in the architecture dependent prepare_ftrace_return().

Have MIPS use the new code, and remove the shadow stack management as well as
having to set up the trace structure.

This is needed to prepare for a fix of a design bug on how the curr_ret_stack
is used.

Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Paul Burton <paul.burton@mips.com>
Cc: James Hogan <jhogan@kernel.org>
Cc: linux-mips@linux-mips.org
Cc: stable@kernel.org
Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback")
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 arch/mips/kernel/ftrace.c | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c
index 7f3dfdbc3657e..b122cbb4aad18 100644
--- a/arch/mips/kernel/ftrace.c
+++ b/arch/mips/kernel/ftrace.c
@@ -322,7 +322,6 @@ void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra,
 			   unsigned long fp)
 {
 	unsigned long old_parent_ra;
-	struct ftrace_graph_ent trace;
 	unsigned long return_hooker = (unsigned long)
 	    &return_to_handler;
 	int faulted, insns;
@@ -369,12 +368,6 @@ void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra,
 	if (unlikely(faulted))
 		goto out;
 
-	if (ftrace_push_return_trace(old_parent_ra, self_ra, &trace.depth, fp,
-				     NULL) == -EBUSY) {
-		*parent_ra_addr = old_parent_ra;
-		return;
-	}
-
 	/*
 	 * Get the recorded ip of the current mcount calling site in the
 	 * __mcount_loc section, which will be used to filter the function
@@ -382,13 +375,10 @@ void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra,
 	 */
 
 	insns = core_kernel_text(self_ra) ? 2 : MCOUNT_OFFSET_INSNS + 1;
-	trace.func = self_ra - (MCOUNT_INSN_SIZE * insns);
+	self_ra -= (MCOUNT_INSN_SIZE * insns);
 
-	/* Only trace if the calling function expects to */
-	if (!ftrace_graph_entry(&trace)) {
-		current->curr_ret_stack--;
+	if (function_graph_enter(old_parent_ra, self_ra, fp, NULL))
 		*parent_ra_addr = old_parent_ra;
-	}
 	return;
 out:
 	ftrace_graph_stop();
-- 
GitLab


From d48ebb24866edea2c35be02a878f25bc65529370 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Sun, 18 Nov 2018 17:26:35 -0500
Subject: [PATCH 1487/1890] nds32: function_graph: Simplify with
 function_graph_enter()

The function_graph_enter() function does the work of calling the function
graph hook function and the management of the shadow stack, simplifying the
work done in the architecture dependent prepare_ftrace_return().

Have nds32 use the new code, and remove the shadow stack management as well as
having to set up the trace structure.

This is needed to prepare for a fix of a design bug on how the curr_ret_stack
is used.

Cc: Greentime Hu <greentime@andestech.com>
Cc: stable@kernel.org
Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback")
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 arch/nds32/kernel/ftrace.c | 18 ++----------------
 1 file changed, 2 insertions(+), 16 deletions(-)

diff --git a/arch/nds32/kernel/ftrace.c b/arch/nds32/kernel/ftrace.c
index a0a9679ad5dee..8a41372551ff3 100644
--- a/arch/nds32/kernel/ftrace.c
+++ b/arch/nds32/kernel/ftrace.c
@@ -211,29 +211,15 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
 			   unsigned long frame_pointer)
 {
 	unsigned long return_hooker = (unsigned long)&return_to_handler;
-	struct ftrace_graph_ent trace;
 	unsigned long old;
-	int err;
 
 	if (unlikely(atomic_read(&current->tracing_graph_pause)))
 		return;
 
 	old = *parent;
 
-	trace.func = self_addr;
-	trace.depth = current->curr_ret_stack + 1;
-
-	/* Only trace if the calling function expects to */
-	if (!ftrace_graph_entry(&trace))
-		return;
-
-	err = ftrace_push_return_trace(old, self_addr, &trace.depth,
-				       frame_pointer, NULL);
-
-	if (err == -EBUSY)
-		return;
-
-	*parent = return_hooker;
+	if (!function_graph_enter(old, self_addr, frame_pointer, NULL))
+		*parent = return_hooker;
 }
 
 noinline void ftrace_graph_caller(void)
-- 
GitLab


From a87532c78d291265efadc4b20a8c7a70cd59ea29 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Sun, 18 Nov 2018 17:27:43 -0500
Subject: [PATCH 1488/1890] parisc: function_graph: Simplify with
 function_graph_enter()

The function_graph_enter() function does the work of calling the function
graph hook function and the management of the shadow stack, simplifying the
work done in the architecture dependent prepare_ftrace_return().

Have parisc use the new code, and remove the shadow stack management as well as
having to set up the trace structure.

This is needed to prepare for a fix of a design bug on how the curr_ret_stack
is used.

Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: Helge Deller <deller@gmx.de>
Cc: linux-parisc@vger.kernel.org
Cc: stable@kernel.org
Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback")
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 arch/parisc/kernel/ftrace.c | 17 +++--------------
 1 file changed, 3 insertions(+), 14 deletions(-)

diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c
index 6fa8535d3cceb..e46a4157a8948 100644
--- a/arch/parisc/kernel/ftrace.c
+++ b/arch/parisc/kernel/ftrace.c
@@ -30,7 +30,6 @@ static void __hot prepare_ftrace_return(unsigned long *parent,
 					unsigned long self_addr)
 {
 	unsigned long old;
-	struct ftrace_graph_ent trace;
 	extern int parisc_return_to_handler;
 
 	if (unlikely(ftrace_graph_is_dead()))
@@ -41,19 +40,9 @@ static void __hot prepare_ftrace_return(unsigned long *parent,
 
 	old = *parent;
 
-	trace.func = self_addr;
-	trace.depth = current->curr_ret_stack + 1;
-
-	/* Only trace if the calling function expects to */
-	if (!ftrace_graph_entry(&trace))
-		return;
-
-        if (ftrace_push_return_trace(old, self_addr, &trace.depth,
-				     0, NULL) == -EBUSY)
-                return;
-
-	/* activate parisc_return_to_handler() as return point */
-	*parent = (unsigned long) &parisc_return_to_handler;
+	if (!function_graph_enter(old, self_addr, 0, NULL))
+		/* activate parisc_return_to_handler() as return point */
+		*parent = (unsigned long) &parisc_return_to_handler;
 }
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 
-- 
GitLab


From fe60522ec60082a1dd735691b82c64f65d4ad15e Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Sun, 18 Nov 2018 17:28:53 -0500
Subject: [PATCH 1489/1890] powerpc/function_graph: Simplify with
 function_graph_enter()

The function_graph_enter() function does the work of calling the function
graph hook function and the management of the shadow stack, simplifying the
work done in the architecture dependent prepare_ftrace_return().

Have powerpc use the new code, and remove the shadow stack management as well as
having to set up the trace structure.

This is needed to prepare for a fix of a design bug on how the curr_ret_stack
is used.

Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: linuxppc-dev@lists.ozlabs.org
Cc: stable@kernel.org
Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback")
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 arch/powerpc/kernel/trace/ftrace.c | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
index 4bf051d3e21e7..b65c8a34ad6ef 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -950,7 +950,6 @@ int ftrace_disable_ftrace_graph_caller(void)
  */
 unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
 {
-	struct ftrace_graph_ent trace;
 	unsigned long return_hooker;
 
 	if (unlikely(ftrace_graph_is_dead()))
@@ -961,18 +960,8 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
 
 	return_hooker = ppc_function_entry(return_to_handler);
 
-	trace.func = ip;
-	trace.depth = current->curr_ret_stack + 1;
-
-	/* Only trace if the calling function expects to */
-	if (!ftrace_graph_entry(&trace))
-		goto out;
-
-	if (ftrace_push_return_trace(parent, ip, &trace.depth, 0,
-				     NULL) == -EBUSY)
-		goto out;
-
-	parent = return_hooker;
+	if (!function_graph_enter(parent, ip, 0, NULL))
+		parent = return_hooker;
 out:
 	return parent;
 }
-- 
GitLab


From e949b6db51dc172a35c962bc4414ca148315fe21 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Sun, 18 Nov 2018 17:31:44 -0500
Subject: [PATCH 1490/1890] riscv/function_graph: Simplify with
 function_graph_enter()

The function_graph_enter() function does the work of calling the function
graph hook function and the management of the shadow stack, simplifying the
work done in the architecture dependent prepare_ftrace_return().

Have riscv use the new code, and remove the shadow stack management as well as
having to set up the trace structure.

This is needed to prepare for a fix of a design bug on how the curr_ret_stack
is used.

Cc: Greentime Hu <greentime@andestech.com>
Cc: Alan Kao <alankao@andestech.com>
Cc: stable@kernel.org
Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback")
Reviewed-by: Palmer Dabbelt <palmer@sifive.com>
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 arch/riscv/kernel/ftrace.c | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c
index 1157b6b52d259..c433f6d3dd64f 100644
--- a/arch/riscv/kernel/ftrace.c
+++ b/arch/riscv/kernel/ftrace.c
@@ -132,7 +132,6 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
 {
 	unsigned long return_hooker = (unsigned long)&return_to_handler;
 	unsigned long old;
-	struct ftrace_graph_ent trace;
 	int err;
 
 	if (unlikely(atomic_read(&current->tracing_graph_pause)))
@@ -144,17 +143,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
 	 */
 	old = *parent;
 
-	trace.func = self_addr;
-	trace.depth = current->curr_ret_stack + 1;
-
-	if (!ftrace_graph_entry(&trace))
-		return;
-
-	err = ftrace_push_return_trace(old, self_addr, &trace.depth,
-				       frame_pointer, parent);
-	if (err == -EBUSY)
-		return;
-	*parent = return_hooker;
+	if (function_graph_enter(old, self_addr, frame_pointer, parent))
+		*parent = return_hooker;
 }
 
 #ifdef CONFIG_DYNAMIC_FTRACE
-- 
GitLab


From 18588e1487b19e45bd90bd55ec8d3a1d44f3257f Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Sun, 18 Nov 2018 17:33:17 -0500
Subject: [PATCH 1491/1890] s390/function_graph: Simplify with
 function_graph_enter()

The function_graph_enter() function does the work of calling the function
graph hook function and the management of the shadow stack, simplifying the
work done in the architecture dependent prepare_ftrace_return().

Have s390 use the new code, and remove the shadow stack management as well as
having to set up the trace structure.

This is needed to prepare for a fix of a design bug on how the curr_ret_stack
is used.

Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Julian Wiedmann <jwi@linux.ibm.com>
Cc: linux-s390@vger.kernel.org
Cc: stable@kernel.org
Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback")
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 arch/s390/kernel/ftrace.c | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 84be7f02d0c21..39b13d71a8fe6 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -203,22 +203,13 @@ device_initcall(ftrace_plt_init);
  */
 unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
 {
-	struct ftrace_graph_ent trace;
-
 	if (unlikely(ftrace_graph_is_dead()))
 		goto out;
 	if (unlikely(atomic_read(&current->tracing_graph_pause)))
 		goto out;
 	ip -= MCOUNT_INSN_SIZE;
-	trace.func = ip;
-	trace.depth = current->curr_ret_stack + 1;
-	/* Only trace if the calling function expects to. */
-	if (!ftrace_graph_entry(&trace))
-		goto out;
-	if (ftrace_push_return_trace(parent, ip, &trace.depth, 0,
-				     NULL) == -EBUSY)
-		goto out;
-	parent = (unsigned long) return_to_handler;
+	if (!function_graph_enter(parent, ip, 0, NULL))
+		parent = (unsigned long) return_to_handler;
 out:
 	return parent;
 }
-- 
GitLab


From bc715ee4dbc5db462c59b9cfba92d31b3274fe3a Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Sun, 18 Nov 2018 17:35:37 -0500
Subject: [PATCH 1492/1890] sh/function_graph: Simplify with
 function_graph_enter()

The function_graph_enter() function does the work of calling the function
graph hook function and the management of the shadow stack, simplifying the
work done in the architecture dependent prepare_ftrace_return().

Have superh use the new code, and remove the shadow stack management as well as
having to set up the trace structure.

This is needed to prepare for a fix of a design bug on how the curr_ret_stack
is used.

Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Rich Felker <dalias@libc.org>
Cc: linux-sh@vger.kernel.org
Cc: stable@kernel.org
Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback")
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 arch/sh/kernel/ftrace.c | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

diff --git a/arch/sh/kernel/ftrace.c b/arch/sh/kernel/ftrace.c
index 96dd9f7da2506..1b04270e5460e 100644
--- a/arch/sh/kernel/ftrace.c
+++ b/arch/sh/kernel/ftrace.c
@@ -321,8 +321,7 @@ int ftrace_disable_ftrace_graph_caller(void)
 void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
 {
 	unsigned long old;
-	int faulted, err;
-	struct ftrace_graph_ent trace;
+	int faulted;
 	unsigned long return_hooker = (unsigned long)&return_to_handler;
 
 	if (unlikely(ftrace_graph_is_dead()))
@@ -365,18 +364,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
 		return;
 	}
 
-	err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0, NULL);
-	if (err == -EBUSY) {
+	if (function_graph_enter(old, self_addr, 0, NULL))
 		__raw_writel(old, parent);
-		return;
-	}
-
-	trace.func = self_addr;
-
-	/* Only trace if the calling function expects to */
-	if (!ftrace_graph_entry(&trace)) {
-		current->curr_ret_stack--;
-		__raw_writel(old, parent);
-	}
 }
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
-- 
GitLab


From 9c4bf5e0db164f330a2d3e128e9832661f69f0e9 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Sun, 18 Nov 2018 17:37:40 -0500
Subject: [PATCH 1493/1890] sparc/function_graph: Simplify with
 function_graph_enter()

The function_graph_enter() function does the work of calling the function
graph hook function and the management of the shadow stack, simplifying the
work done in the architecture dependent prepare_ftrace_return().

Have sparc use the new code, and remove the shadow stack management as well as
having to set up the trace structure.

This is needed to prepare for a fix of a design bug on how the curr_ret_stack
is used.

Cc: "David S. Miller" <davem@davemloft.net>
Cc: sparclinux@vger.kernel.org
Cc: stable@kernel.org
Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback")
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 arch/sparc/kernel/ftrace.c | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/arch/sparc/kernel/ftrace.c b/arch/sparc/kernel/ftrace.c
index 915dda4ae4120..684b84ce397f7 100644
--- a/arch/sparc/kernel/ftrace.c
+++ b/arch/sparc/kernel/ftrace.c
@@ -126,20 +126,11 @@ unsigned long prepare_ftrace_return(unsigned long parent,
 				    unsigned long frame_pointer)
 {
 	unsigned long return_hooker = (unsigned long) &return_to_handler;
-	struct ftrace_graph_ent trace;
 
 	if (unlikely(atomic_read(&current->tracing_graph_pause)))
 		return parent + 8UL;
 
-	trace.func = self_addr;
-	trace.depth = current->curr_ret_stack + 1;
-
-	/* Only trace if the calling function expects to */
-	if (!ftrace_graph_entry(&trace))
-		return parent + 8UL;
-
-	if (ftrace_push_return_trace(parent, self_addr, &trace.depth,
-				     frame_pointer, NULL) == -EBUSY)
+	if (function_graph_enter(parent, self_addr, frame_pointer, NULL))
 		return parent + 8UL;
 
 	return return_hooker;
-- 
GitLab


From d125f3f866df88da5a85df00291f88f0baa89f7c Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Mon, 19 Nov 2018 07:40:39 -0500
Subject: [PATCH 1494/1890] function_graph: Make ftrace_push_return_trace()
 static

As all architectures now call function_graph_enter() to do the entry work,
no architecture should ever call ftrace_push_return_trace(). Make it static.

This is needed to prepare for a fix of a design bug on how the curr_ret_stack
is used.

Cc: stable@kernel.org
Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback")
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/ftrace.h               | 3 ---
 kernel/trace/trace_functions_graph.c | 2 +-
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 5717e8f81c595..dd16e8218db3a 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -776,9 +776,6 @@ struct ftrace_ret_stack {
  */
 extern void return_to_handler(void);
 
-extern int
-ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
-			 unsigned long frame_pointer, unsigned long *retp);
 extern int
 function_graph_enter(unsigned long ret, unsigned long func,
 		     unsigned long frame_pointer, unsigned long *retp);
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 28f2602435d05..88ca787a1cdc1 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -118,7 +118,7 @@ print_graph_duration(struct trace_array *tr, unsigned long long duration,
 		     struct trace_seq *s, u32 flags);
 
 /* Add a function return address to the trace stack on thread info.*/
-int
+static int
 ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
 			 unsigned long frame_pointer, unsigned long *retp)
 {
-- 
GitLab


From 39eb456dacb543de90d3bc6a8e0ac5cf51ac475e Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Mon, 19 Nov 2018 08:07:12 -0500
Subject: [PATCH 1495/1890] function_graph: Use new curr_ret_depth to manage
 depth instead of curr_ret_stack

Currently, the depth of the ret_stack is determined by curr_ret_stack index.
The issue is that there's a race between setting of the curr_ret_stack and
calling of the callback attached to the return of the function.

Commit 03274a3ffb44 ("tracing/fgraph: Adjust fgraph depth before calling
trace return callback") moved the calling of the callback to after the
setting of the curr_ret_stack, even stating that it was safe to do so, when
in fact, it was the reason there was a barrier() there (yes, I should have
commented that barrier()).

Not only does the curr_ret_stack keep track of the current call graph depth,
it also keeps the ret_stack content from being overwritten by new data.

The function profiler, uses the "subtime" variable of ret_stack structure
and by moving the curr_ret_stack, it allows for interrupts to use the same
structure it was using, corrupting the data, and breaking the profiler.

To fix this, there needs to be two variables to handle the call stack depth
and the pointer to where the ret_stack is being used, as they need to change
at two different locations.

Cc: stable@kernel.org
Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback")
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/sched.h                |  1 +
 kernel/trace/ftrace.c                |  3 +++
 kernel/trace/trace_functions_graph.c | 21 +++++++++++++--------
 3 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index a51c13c2b1a03..d6183a55e8eb1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1116,6 +1116,7 @@ struct task_struct {
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	/* Index of current stored address in ret_stack: */
 	int				curr_ret_stack;
+	int				curr_ret_depth;
 
 	/* Stack of return addresses for return function tracing: */
 	struct ftrace_ret_stack		*ret_stack;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index f536f601bd46e..48513954713c4 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -6814,6 +6814,7 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
 			atomic_set(&t->tracing_graph_pause, 0);
 			atomic_set(&t->trace_overrun, 0);
 			t->curr_ret_stack = -1;
+			t->curr_ret_depth = -1;
 			/* Make sure the tasks see the -1 first: */
 			smp_wmb();
 			t->ret_stack = ret_stack_list[start++];
@@ -7038,6 +7039,7 @@ graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack)
 void ftrace_graph_init_idle_task(struct task_struct *t, int cpu)
 {
 	t->curr_ret_stack = -1;
+	t->curr_ret_depth = -1;
 	/*
 	 * The idle task has no parent, it either has its own
 	 * stack or no stack at all.
@@ -7068,6 +7070,7 @@ void ftrace_graph_init_task(struct task_struct *t)
 	/* Make sure we do not use the parent ret_stack */
 	t->ret_stack = NULL;
 	t->curr_ret_stack = -1;
+	t->curr_ret_depth = -1;
 
 	if (ftrace_graph_active) {
 		struct ftrace_ret_stack *ret_stack;
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 88ca787a1cdc1..02d4081a7f5af 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -119,7 +119,7 @@ print_graph_duration(struct trace_array *tr, unsigned long long duration,
 
 /* Add a function return address to the trace stack on thread info.*/
 static int
-ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
+ftrace_push_return_trace(unsigned long ret, unsigned long func,
 			 unsigned long frame_pointer, unsigned long *retp)
 {
 	unsigned long long calltime;
@@ -177,8 +177,6 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
 #ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
 	current->ret_stack[index].retp = retp;
 #endif
-	*depth = current->curr_ret_stack;
-
 	return 0;
 }
 
@@ -188,14 +186,20 @@ int function_graph_enter(unsigned long ret, unsigned long func,
 	struct ftrace_graph_ent trace;
 
 	trace.func = func;
-	trace.depth = current->curr_ret_stack + 1;
+	trace.depth = ++current->curr_ret_depth;
 
 	/* Only trace if the calling function expects to */
 	if (!ftrace_graph_entry(&trace))
-		return -EBUSY;
+		goto out;
 
-	return ftrace_push_return_trace(ret, func, &trace.depth,
-					frame_pointer, retp);
+	if (ftrace_push_return_trace(ret, func,
+				     frame_pointer, retp))
+		goto out;
+
+	return 0;
+ out:
+	current->curr_ret_depth--;
+	return -EBUSY;
 }
 
 /* Retrieve a function return address to the trace stack on thread info.*/
@@ -257,7 +261,7 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
 	trace->func = current->ret_stack[index].func;
 	trace->calltime = current->ret_stack[index].calltime;
 	trace->overrun = atomic_read(&current->trace_overrun);
-	trace->depth = index;
+	trace->depth = current->curr_ret_depth;
 }
 
 /*
@@ -273,6 +277,7 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
 	trace.rettime = trace_clock_local();
 	barrier();
 	current->curr_ret_stack--;
+	current->curr_ret_depth--;
 	/*
 	 * The curr_ret_stack can be less than -1 only if it was
 	 * filtered out and it's about to return from the function.
-- 
GitLab


From 552701dd0fa7c3d448142e87210590ba424694a0 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Mon, 19 Nov 2018 15:18:40 -0500
Subject: [PATCH 1496/1890] function_graph: Move return callback before update
 of curr_ret_stack

In the past, curr_ret_stack had two functions. One was to denote the depth
of the call graph, the other is to keep track of where on the ret_stack the
data is used. Although they may be slightly related, there are two cases
where they need to be used differently.

The one case is that it keeps the ret_stack data from being corrupted by an
interrupt coming in and overwriting the data still in use. The other is just
to know where the depth of the stack currently is.

The function profiler uses the ret_stack to save a "subtime" variable that
is part of the data on the ret_stack. If curr_ret_stack is modified too
early, then this variable can be corrupted.

The "max_depth" option, when set to 1, will record the first functions going
into the kernel. To see all top functions (when dealing with timings), the
depth variable needs to be lowered before calling the return hook. But by
lowering the curr_ret_stack, it makes the data on the ret_stack still being
used by the return hook susceptible to being overwritten.

Now that there's two variables to handle both cases (curr_ret_depth), we can
move them to the locations where they can handle both cases.

Cc: stable@kernel.org
Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback")
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace_functions_graph.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 02d4081a7f5af..4f0d72ae63622 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -261,7 +261,13 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
 	trace->func = current->ret_stack[index].func;
 	trace->calltime = current->ret_stack[index].calltime;
 	trace->overrun = atomic_read(&current->trace_overrun);
-	trace->depth = current->curr_ret_depth;
+	trace->depth = current->curr_ret_depth--;
+	/*
+	 * We still want to trace interrupts coming in if
+	 * max_depth is set to 1. Make sure the decrement is
+	 * seen before ftrace_graph_return.
+	 */
+	barrier();
 }
 
 /*
@@ -275,9 +281,14 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
 
 	ftrace_pop_return_trace(&trace, &ret, frame_pointer);
 	trace.rettime = trace_clock_local();
+	ftrace_graph_return(&trace);
+	/*
+	 * The ftrace_graph_return() may still access the current
+	 * ret_stack structure, we need to make sure the update of
+	 * curr_ret_stack is after that.
+	 */
 	barrier();
 	current->curr_ret_stack--;
-	current->curr_ret_depth--;
 	/*
 	 * The curr_ret_stack can be less than -1 only if it was
 	 * filtered out and it's about to return from the function.
@@ -288,13 +299,6 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
 		return ret;
 	}
 
-	/*
-	 * The trace should run after decrementing the ret counter
-	 * in case an interrupt were to come in. We don't want to
-	 * lose the interrupt if max_depth is set.
-	 */
-	ftrace_graph_return(&trace);
-
 	if (unlikely(!ret)) {
 		ftrace_graph_stop();
 		WARN_ON(1);
-- 
GitLab


From 7c6ea35ef50810aa12ab26f21cb858d980881576 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Tue, 20 Nov 2018 12:40:25 -0500
Subject: [PATCH 1497/1890] function_graph: Reverse the order of pushing the
 ret_stack and the callback

The function graph profiler uses the ret_stack to store the "subtime" and
reuse it by nested functions and also on the return. But the current logic
has the profiler callback called before the ret_stack is updated, and it is
just modifying the ret_stack that will later be allocated (it's just lucky
that the "subtime" is not touched when it is allocated).

This could also cause a crash if we are at the end of the ret_stack when
this happens.

By reversing the order of the allocating the ret_stack and then calling the
callbacks attached to a function being traced, the ret_stack entry is no
longer used before it is allocated.

Cc: stable@kernel.org
Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback")
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace_functions_graph.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 4f0d72ae63622..2561460d7baf8 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -188,15 +188,17 @@ int function_graph_enter(unsigned long ret, unsigned long func,
 	trace.func = func;
 	trace.depth = ++current->curr_ret_depth;
 
-	/* Only trace if the calling function expects to */
-	if (!ftrace_graph_entry(&trace))
-		goto out;
-
 	if (ftrace_push_return_trace(ret, func,
 				     frame_pointer, retp))
 		goto out;
 
+	/* Only trace if the calling function expects to */
+	if (!ftrace_graph_entry(&trace))
+		goto out_ret;
+
 	return 0;
+ out_ret:
+	current->curr_ret_stack--;
  out:
 	current->curr_ret_depth--;
 	return -EBUSY;
-- 
GitLab


From b1b35f2e218a5b57d03bbc3b0667d5064570dc60 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Tue, 20 Nov 2018 12:51:07 -0500
Subject: [PATCH 1498/1890] function_graph: Have profiler use curr_ret_stack
 and not depth

The profiler uses trace->depth to find its entry on the ret_stack, but the
depth may not match the actual location of where its entry is (if an
interrupt were to preempt the processing of the profiler for another
function, the depth and the curr_ret_stack will be different).

Have it use the curr_ret_stack as the index to find its ret_stack entry
instead of using the depth variable, as that is no longer guaranteed to be
the same.

Cc: stable@kernel.org
Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback")
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 48513954713c4..77734451cb05d 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -817,7 +817,7 @@ function_profile_call(unsigned long ip, unsigned long parent_ip,
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 static int profile_graph_entry(struct ftrace_graph_ent *trace)
 {
-	int index = trace->depth;
+	int index = current->curr_ret_stack;
 
 	function_profile_call(trace->func, 0, NULL, NULL);
 
@@ -852,7 +852,7 @@ static void profile_graph_return(struct ftrace_graph_ret *trace)
 	if (!fgraph_graph_time) {
 		int index;
 
-		index = trace->depth;
+		index = current->curr_ret_stack;
 
 		/* Append this call time to the parent time to subtract */
 		if (index)
-- 
GitLab


From 3054426dc68e5d63aa6a6e9b91ac4ec78e3f3805 Mon Sep 17 00:00:00 2001
From: Pavankumar Kondeti <pkondeti@codeaurora.org>
Date: Tue, 30 Oct 2018 12:24:33 +0530
Subject: [PATCH 1499/1890] sched, trace: Fix prev_state output in sched_switch
 tracepoint

commit 3f5fe9fef5b2 ("sched/debug: Fix task state recording/printout")
tried to fix the problem introduced by a previous commit efb40f588b43
("sched/tracing: Fix trace_sched_switch task-state printing"). However
the prev_state output in sched_switch is still broken.

task_state_index() uses fls() which considers the LSB as 1. Left
shifting 1 by this value gives an incorrect mapping to the task state.
Fix this by decrementing the value returned by __get_task_state()
before shifting.

Link: http://lkml.kernel.org/r/1540882473-1103-1-git-send-email-pkondeti@codeaurora.org

Cc: stable@vger.kernel.org
Fixes: 3f5fe9fef5b2 ("sched/debug: Fix task state recording/printout")
Signed-off-by: Pavankumar Kondeti <pkondeti@codeaurora.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/trace/events/sched.h | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index f07b270d4fc4f..9a4bdfadab077 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -107,6 +107,8 @@ DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new,
 #ifdef CREATE_TRACE_POINTS
 static inline long __trace_sched_switch_state(bool preempt, struct task_struct *p)
 {
+	unsigned int state;
+
 #ifdef CONFIG_SCHED_DEBUG
 	BUG_ON(p != current);
 #endif /* CONFIG_SCHED_DEBUG */
@@ -118,7 +120,15 @@ static inline long __trace_sched_switch_state(bool preempt, struct task_struct *
 	if (preempt)
 		return TASK_REPORT_MAX;
 
-	return 1 << task_state_index(p);
+	/*
+	 * task_state_index() uses fls() and returns a value from 0-8 range.
+	 * Decrement it by 1 (except TASK_RUNNING state i.e 0) before using
+	 * it for left shift operation to get the correct task->state
+	 * mapping.
+	 */
+	state = task_state_index(p);
+
+	return state ? (1 << (state - 1)) : state;
 }
 #endif /* CREATE_TRACE_POINTS */
 
-- 
GitLab


From d206e6b7ea3fdc8ec8f6be9a2ecfe58142b49e37 Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Mon, 5 Nov 2018 21:50:13 -0800
Subject: [PATCH 1500/1890] arm64: dts: qcom: sdm845-mtp: Mark protected gcc
 clocks

As of v4.20-rc1 probing the GCC driver on a SDM845 device with the
standard security implementation causes an access violation and an
immediate system restart. Use the protected-clocks property to mark the
offending clocks protected for the MTP, in order to allow it to boot.

Cc: Stephen Boyd <sboyd@kernel.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Acked-by: Andy Gross <andy.gross@linaro.org>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 arch/arm64/boot/dts/qcom/sdm845-mtp.dts | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/sdm845-mtp.dts b/arch/arm64/boot/dts/qcom/sdm845-mtp.dts
index eedfaf8922e2a..ef2d059c2db13 100644
--- a/arch/arm64/boot/dts/qcom/sdm845-mtp.dts
+++ b/arch/arm64/boot/dts/qcom/sdm845-mtp.dts
@@ -343,6 +343,12 @@ vreg_s3c_0p6: smps3 {
 	};
 };
 
+&gcc {
+	protected-clocks = <GCC_QSPI_CORE_CLK>,
+			   <GCC_QSPI_CORE_CLK_SRC>,
+			   <GCC_QSPI_CNOC_PERIPH_AHB_CLK>;
+};
+
 &i2c10 {
 	status = "okay";
 	clock-frequency = <400000>;
-- 
GitLab


From c9287fa657b3328b4549c0ab39ea7f197a3d6a50 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Mon, 19 Nov 2018 16:49:05 +0100
Subject: [PATCH 1501/1890] usb: gadget: u_ether: fix unsafe list iteration

list_for_each_entry_safe() is not safe for deleting entries from the
list if the spin lock, which protects it, is released and reacquired during
the list iteration. Fix this issue by replacing this construction with
a simple check if list is empty and removing the first entry in each
iteration. This is almost equivalent to a revert of the commit mentioned in
the Fixes: tag.

This patch fixes following issue:
--->8---
Unable to handle kernel NULL pointer dereference at virtual address 00000104
pgd = (ptrval)
[00000104] *pgd=00000000
Internal error: Oops: 817 [#1] PREEMPT SMP ARM
Modules linked in:
CPU: 1 PID: 84 Comm: kworker/1:1 Not tainted 4.20.0-rc2-next-20181114-00009-g8266b35ec404 #1061
Hardware name: SAMSUNG EXYNOS (Flattened Device Tree)
Workqueue: events eth_work
PC is at rx_fill+0x60/0xac
LR is at _raw_spin_lock_irqsave+0x50/0x5c
pc : [<c065fee0>]    lr : [<c0a056b8>]    psr: 80000093
sp : ee7fbee8  ip : 00000100  fp : 00000000
r10: 006000c0  r9 : c10b0ab0  r8 : ee7eb5c0
r7 : ee7eb614  r6 : ee7eb5ec  r5 : 000000dc  r4 : ee12ac00
r3 : ee12ac24  r2 : 00000200  r1 : 60000013  r0 : ee7eb5ec
Flags: Nzcv  IRQs off  FIQs on  Mode SVC_32  ISA ARM  Segment none
Control: 10c5387d  Table: 6d5dc04a  DAC: 00000051
Process kworker/1:1 (pid: 84, stack limit = 0x(ptrval))
Stack: (0xee7fbee8 to 0xee7fc000)
...
[<c065fee0>] (rx_fill) from [<c0143b7c>] (process_one_work+0x200/0x738)
[<c0143b7c>] (process_one_work) from [<c0144118>] (worker_thread+0x2c/0x4c8)
[<c0144118>] (worker_thread) from [<c014a8a4>] (kthread+0x128/0x164)
[<c014a8a4>] (kthread) from [<c01010b4>] (ret_from_fork+0x14/0x20)
Exception stack(0xee7fbfb0 to 0xee7fbff8)
...
---[ end trace 64480bc835eba7d6 ]---

Fixes: fea14e68ff5e ("usb: gadget: u_ether: use better list accessors")
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/function/u_ether.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c
index 1000d864929c3..0f026d445e316 100644
--- a/drivers/usb/gadget/function/u_ether.c
+++ b/drivers/usb/gadget/function/u_ether.c
@@ -401,12 +401,12 @@ static int alloc_requests(struct eth_dev *dev, struct gether *link, unsigned n)
 static void rx_fill(struct eth_dev *dev, gfp_t gfp_flags)
 {
 	struct usb_request	*req;
-	struct usb_request	*tmp;
 	unsigned long		flags;
 
 	/* fill unused rxq slots with some skb */
 	spin_lock_irqsave(&dev->req_lock, flags);
-	list_for_each_entry_safe(req, tmp, &dev->rx_reqs, list) {
+	while (!list_empty(&dev->rx_reqs)) {
+		req = list_first_entry(&dev->rx_reqs, struct usb_request, list);
 		list_del_init(&req->list);
 		spin_unlock_irqrestore(&dev->req_lock, flags);
 
@@ -1125,7 +1125,6 @@ void gether_disconnect(struct gether *link)
 {
 	struct eth_dev		*dev = link->ioport;
 	struct usb_request	*req;
-	struct usb_request	*tmp;
 
 	WARN_ON(!dev);
 	if (!dev)
@@ -1142,7 +1141,8 @@ void gether_disconnect(struct gether *link)
 	 */
 	usb_ep_disable(link->in_ep);
 	spin_lock(&dev->req_lock);
-	list_for_each_entry_safe(req, tmp, &dev->tx_reqs, list) {
+	while (!list_empty(&dev->tx_reqs)) {
+		req = list_first_entry(&dev->tx_reqs, struct usb_request, list);
 		list_del(&req->list);
 
 		spin_unlock(&dev->req_lock);
@@ -1154,7 +1154,8 @@ void gether_disconnect(struct gether *link)
 
 	usb_ep_disable(link->out_ep);
 	spin_lock(&dev->req_lock);
-	list_for_each_entry_safe(req, tmp, &dev->rx_reqs, list) {
+	while (!list_empty(&dev->rx_reqs)) {
+		req = list_first_entry(&dev->rx_reqs, struct usb_request, list);
 		list_del(&req->list);
 
 		spin_unlock(&dev->req_lock);
-- 
GitLab


From dc92843159a7d01e37cf68750745333e944a9e55 Mon Sep 17 00:00:00 2001
From: Tudor Ambarus <tudor.ambarus@microchip.com>
Date: Mon, 26 Nov 2018 12:45:44 +0000
Subject: [PATCH 1502/1890] mtd: spi-nor: fix erase_type array to indicate
 current map conf

BFPT advertises all the erase types supported by all the possible
map configurations. Mask out the erase types that are not supported
by the current map configuration.

Backward compatibility test done on sst26vf064b.

Fixes: b038e8e3be72 ("mtd: spi-nor: parse SFDP Sector Map Parameter Table")
Reported-by: Alexander Sverdlin <alexander.sverdlin@nokia.com>
Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Tested-by: Alexander Sverdlin <alexander.sverdlin@nokia.com>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
---
 drivers/mtd/spi-nor/spi-nor.c | 31 +++++++++++++++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 93c9bc8931fcd..1fdd2834fbcb1 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -2995,12 +2995,13 @@ static int spi_nor_init_non_uniform_erase_map(struct spi_nor *nor,
 					      const u32 *smpt)
 {
 	struct spi_nor_erase_map *map = &nor->erase_map;
-	const struct spi_nor_erase_type *erase = map->erase_type;
+	struct spi_nor_erase_type *erase = map->erase_type;
 	struct spi_nor_erase_region *region;
 	u64 offset;
 	u32 region_count;
 	int i, j;
-	u8 erase_type, uniform_erase_type;
+	u8 uniform_erase_type, save_uniform_erase_type;
+	u8 erase_type, regions_erase_type;
 
 	region_count = SMPT_MAP_REGION_COUNT(*smpt);
 	/*
@@ -3014,6 +3015,7 @@ static int spi_nor_init_non_uniform_erase_map(struct spi_nor *nor,
 	map->regions = region;
 
 	uniform_erase_type = 0xff;
+	regions_erase_type = 0;
 	offset = 0;
 	/* Populate regions. */
 	for (i = 0; i < region_count; i++) {
@@ -3030,13 +3032,38 @@ static int spi_nor_init_non_uniform_erase_map(struct spi_nor *nor,
 		 */
 		uniform_erase_type &= erase_type;
 
+		/*
+		 * regions_erase_type mask will indicate all the erase types
+		 * supported in this configuration map.
+		 */
+		regions_erase_type |= erase_type;
+
 		offset = (region[i].offset & ~SNOR_ERASE_FLAGS_MASK) +
 			 region[i].size;
 	}
 
+	save_uniform_erase_type = map->uniform_erase_type;
 	map->uniform_erase_type = spi_nor_sort_erase_mask(map,
 							  uniform_erase_type);
 
+	if (!regions_erase_type) {
+		/*
+		 * Roll back to the previous uniform_erase_type mask, SMPT is
+		 * broken.
+		 */
+		map->uniform_erase_type = save_uniform_erase_type;
+		return -EINVAL;
+	}
+
+	/*
+	 * BFPT advertises all the erase types supported by all the possible
+	 * map configurations. Mask out the erase types that are not supported
+	 * by the current map configuration.
+	 */
+	for (i = 0; i < SNOR_ERASE_TYPE_MAX; i++)
+		if (!(regions_erase_type & BIT(erase[i].idx)))
+			spi_nor_set_erase_type(&erase[i], 0, 0xFF);
+
 	spi_nor_region_mark_end(&region[i - 1]);
 
 	return 0;
-- 
GitLab


From 60c8144afc287ef09ce8c1230c6aa972659ba1bb Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Tue, 27 Nov 2018 14:41:37 +0100
Subject: [PATCH 1503/1890] x86/MCE/AMD: Fix the thresholding machinery
 initialization order
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently, the code sets up the thresholding interrupt vector and only
then goes about initializing the thresholding banks. Which is wrong,
because an early thresholding interrupt would cause a NULL pointer
dereference when accessing those banks and prevent the machine from
booting.

Therefore, set the thresholding interrupt vector only *after* having
initialized the banks successfully.

Fixes: 18807ddb7f88 ("x86/mce/AMD: Reset Threshold Limit after logging error")
Reported-by: RafaÅ‚ MiÅ‚ecki <rafal@milecki.pl>
Reported-by: John Clemens <clemej@gmail.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Tested-by: RafaÅ‚ MiÅ‚ecki <rafal@milecki.pl>
Tested-by: John Clemens <john@deater.net>
Cc: Aravind Gopalakrishnan <aravindksg.lkml@gmail.com>
Cc: linux-edac@vger.kernel.org
Cc: stable@vger.kernel.org
Cc: Tony Luck <tony.luck@intel.com>
Cc: x86@kernel.org
Cc: Yazen Ghannam <Yazen.Ghannam@amd.com>
Link: https://lkml.kernel.org/r/20181127101700.2964-1-zajec5@gmail.com
Link: https://bugzilla.kernel.org/show_bug.cgi?id=201291
---
 arch/x86/kernel/cpu/mcheck/mce_amd.c | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index dd33c357548f1..e12454e21b8a5 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -56,7 +56,7 @@
 /* Threshold LVT offset is at MSR0xC0000410[15:12] */
 #define SMCA_THR_LVT_OFF	0xF000
 
-static bool thresholding_en;
+static bool thresholding_irq_en;
 
 static const char * const th_names[] = {
 	"load_store",
@@ -534,9 +534,8 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
 
 set_offset:
 	offset = setup_APIC_mce_threshold(offset, new);
-
-	if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt))
-		mce_threshold_vector = amd_threshold_interrupt;
+	if (offset == new)
+		thresholding_irq_en = true;
 
 done:
 	mce_threshold_block_init(&b, offset);
@@ -1357,9 +1356,6 @@ int mce_threshold_remove_device(unsigned int cpu)
 {
 	unsigned int bank;
 
-	if (!thresholding_en)
-		return 0;
-
 	for (bank = 0; bank < mca_cfg.banks; ++bank) {
 		if (!(per_cpu(bank_map, cpu) & (1 << bank)))
 			continue;
@@ -1377,9 +1373,6 @@ int mce_threshold_create_device(unsigned int cpu)
 	struct threshold_bank **bp;
 	int err = 0;
 
-	if (!thresholding_en)
-		return 0;
-
 	bp = per_cpu(threshold_banks, cpu);
 	if (bp)
 		return 0;
@@ -1408,9 +1401,6 @@ static __init int threshold_init_device(void)
 {
 	unsigned lcpu = 0;
 
-	if (mce_threshold_vector == amd_threshold_interrupt)
-		thresholding_en = true;
-
 	/* to hit CPUs online before the notifier is up */
 	for_each_online_cpu(lcpu) {
 		int err = mce_threshold_create_device(lcpu);
@@ -1419,6 +1409,9 @@ static __init int threshold_init_device(void)
 			return err;
 	}
 
+	if (thresholding_irq_en)
+		mce_threshold_vector = amd_threshold_interrupt;
+
 	return 0;
 }
 /*
-- 
GitLab


From ca08987885a147643817d02bf260bc4756ce8cd4 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Wed, 28 Nov 2018 11:27:28 +0900
Subject: [PATCH 1504/1890] netfilter: nf_tables: deactivate expressions in
 rule replecement routine

There is no expression deactivation call from the rule replacement path,
hence, chain counter is not decremented. A few steps to reproduce the
problem:

   %nft add table ip filter
   %nft add chain ip filter c1
   %nft add chain ip filter c1
   %nft add rule ip filter c1 jump c2
   %nft replace rule ip filter c1 handle 3 accept
   %nft flush ruleset

<jump c2> expression means immediate NFT_JUMP to chain c2.
Reference count of chain c2 is increased when the rule is added.

When rule is deleted or replaced, the reference counter of c2 should be
decreased via nft_rule_expr_deactivate() which calls
nft_immediate_deactivate().

Splat looks like:
[  214.396453] WARNING: CPU: 1 PID: 21 at net/netfilter/nf_tables_api.c:1432 nf_tables_chain_destroy.isra.38+0x2f9/0x3a0 [nf_tables]
[  214.398983] Modules linked in: nf_tables nfnetlink
[  214.398983] CPU: 1 PID: 21 Comm: kworker/1:1 Not tainted 4.20.0-rc2+ #44
[  214.398983] Workqueue: events nf_tables_trans_destroy_work [nf_tables]
[  214.398983] RIP: 0010:nf_tables_chain_destroy.isra.38+0x2f9/0x3a0 [nf_tables]
[  214.398983] Code: 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 80 3c 02 00 0f 85 8e 00 00 00 48 8b 7b 58 e8 e1 2c 4e c6 48 89 df e8 d9 2c 4e c6 eb 9a <0f> 0b eb 96 0f 0b e9 7e fe ff ff e8 a7 7e 4e c6 e9 a4 fe ff ff e8
[  214.398983] RSP: 0018:ffff8881152874e8 EFLAGS: 00010202
[  214.398983] RAX: 0000000000000001 RBX: ffff88810ef9fc28 RCX: ffff8881152876f0
[  214.398983] RDX: dffffc0000000000 RSI: 1ffff11022a50ede RDI: ffff88810ef9fc78
[  214.398983] RBP: 1ffff11022a50e9d R08: 0000000080000000 R09: 0000000000000000
[  214.398983] R10: 0000000000000000 R11: 0000000000000000 R12: 1ffff11022a50eba
[  214.398983] R13: ffff888114446e08 R14: ffff8881152876f0 R15: ffffed1022a50ed6
[  214.398983] FS:  0000000000000000(0000) GS:ffff888116400000(0000) knlGS:0000000000000000
[  214.398983] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  214.398983] CR2: 00007fab9bb5f868 CR3: 000000012aa16000 CR4: 00000000001006e0
[  214.398983] Call Trace:
[  214.398983]  ? nf_tables_table_destroy.isra.37+0x100/0x100 [nf_tables]
[  214.398983]  ? __kasan_slab_free+0x145/0x180
[  214.398983]  ? nf_tables_trans_destroy_work+0x439/0x830 [nf_tables]
[  214.398983]  ? kfree+0xdb/0x280
[  214.398983]  nf_tables_trans_destroy_work+0x5f5/0x830 [nf_tables]
[ ... ]

Fixes: bb7b40aecbf7 ("netfilter: nf_tables: bogus EBUSY in chain deletions")
Reported by: Christoph Anton Mitterer <calestyo@scientia.net>
Link: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=914505
Link: https://bugzilla.kernel.org/show_bug.cgi?id=201791
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index ddeaa1990e1e9..2e61aab6ed731 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2667,21 +2667,14 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
 	}
 
 	if (nlh->nlmsg_flags & NLM_F_REPLACE) {
-		if (!nft_is_active_next(net, old_rule)) {
-			err = -ENOENT;
-			goto err2;
-		}
-		trans = nft_trans_rule_add(&ctx, NFT_MSG_DELRULE,
-					   old_rule);
+		trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule);
 		if (trans == NULL) {
 			err = -ENOMEM;
 			goto err2;
 		}
-		nft_deactivate_next(net, old_rule);
-		chain->use--;
-
-		if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) {
-			err = -ENOMEM;
+		err = nft_delrule(&ctx, old_rule);
+		if (err < 0) {
+			nft_trans_destroy(trans);
 			goto err2;
 		}
 
-- 
GitLab


From 8159a6a4a7d2a092d5375f695ecfca22b4562b5f Mon Sep 17 00:00:00 2001
From: Hui Wang <hui.wang@canonical.com>
Date: Wed, 28 Nov 2018 17:11:26 +0800
Subject: [PATCH 1505/1890] ALSA: usb-audio: Add vendor and product name for
 Dell WD19 Dock

Like the Dell WD15 Dock, the WD19 Dock (0bda:402e) doens't provide
useful string for the vendor and product names too. In order to share
the UCM with WD15, here we keep the profile_name same as the WD15.

Signed-off-by: Hui Wang <hui.wang@canonical.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/usb/quirks-table.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h
index 849953e5775c2..37fc0447c0710 100644
--- a/sound/usb/quirks-table.h
+++ b/sound/usb/quirks-table.h
@@ -3382,5 +3382,15 @@ AU0828_DEVICE(0x2040, 0x7270, "Hauppauge", "HVR-950Q"),
 		.ifnum = QUIRK_NO_INTERFACE
 	}
 },
+/* Dell WD19 Dock */
+{
+	USB_DEVICE(0x0bda, 0x402e),
+	.driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) {
+		.vendor_name = "Dell",
+		.product_name = "WD19 Dock",
+		.profile_name = "Dell-WD15-Dock",
+		.ifnum = QUIRK_NO_INTERFACE
+	}
+},
 
 #undef USB_DEVICE_VENDOR_SPEC
-- 
GitLab


From 4cd24de3a0980bf3100c9dcb08ef65ca7c31af48 Mon Sep 17 00:00:00 2001
From: Zhenzhong Duan <zhenzhong.duan@oracle.com>
Date: Fri, 2 Nov 2018 01:45:41 -0700
Subject: [PATCH 1506/1890] x86/retpoline: Make CONFIG_RETPOLINE depend on
 compiler support

Since retpoline capable compilers are widely available, make
CONFIG_RETPOLINE hard depend on the compiler capability.

Break the build when CONFIG_RETPOLINE is enabled and the compiler does not
support it. Emit an error message in that case:

 "arch/x86/Makefile:226: *** You are building kernel with non-retpoline
  compiler, please update your compiler..  Stop."

[dwmw: Fail the build with non-retpoline compiler]

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Borislav Petkov <bp@suse.de>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Michal Marek <michal.lkml@markovi.net>
Cc: <srinivas.eeda@oracle.com>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/cca0cb20-f9e2-4094-840b-fb0f8810cd34@default
---
 arch/x86/Kconfig                     |  4 ----
 arch/x86/Makefile                    |  5 +++--
 arch/x86/include/asm/nospec-branch.h | 10 ++++++----
 arch/x86/kernel/cpu/bugs.c           |  2 +-
 scripts/Makefile.build               |  2 --
 5 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 9d734f3c8234d..b5286ad2a9822 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -444,10 +444,6 @@ config RETPOLINE
 	  branches. Requires a compiler with -mindirect-branch=thunk-extern
 	  support for full protection. The kernel may run slower.
 
-	  Without compiler support, at least indirect branches in assembler
-	  code are eliminated. Since this includes the syscall entry path,
-	  it is not entirely pointless.
-
 config INTEL_RDT
 	bool "Intel Resource Director Technology support"
 	depends on X86 && CPU_SUP_INTEL
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 88398fdf81291..f5d7f41345249 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -220,9 +220,10 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
 
 # Avoid indirect branches in kernel to deal with Spectre
 ifdef CONFIG_RETPOLINE
-ifneq ($(RETPOLINE_CFLAGS),)
-  KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE
+ifeq ($(RETPOLINE_CFLAGS),)
+  $(error You are building kernel with non-retpoline compiler, please update your compiler.)
 endif
+  KBUILD_CFLAGS += $(RETPOLINE_CFLAGS)
 endif
 
 archscripts: scripts_basic
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 80dc144224955..8b09cbb2d52cd 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -162,11 +162,12 @@
 	_ASM_PTR " 999b\n\t"					\
 	".popsection\n\t"
 
-#if defined(CONFIG_X86_64) && defined(RETPOLINE)
+#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_X86_64
 
 /*
- * Since the inline asm uses the %V modifier which is only in newer GCC,
- * the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE.
+ * Inline asm uses the %V modifier which is only in newer GCC
+ * which is ensured when CONFIG_RETPOLINE is defined.
  */
 # define CALL_NOSPEC						\
 	ANNOTATE_NOSPEC_ALTERNATIVE				\
@@ -181,7 +182,7 @@
 	X86_FEATURE_RETPOLINE_AMD)
 # define THUNK_TARGET(addr) [thunk_target] "r" (addr)
 
-#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE)
+#else /* CONFIG_X86_32 */
 /*
  * For i386 we use the original ret-equivalent retpoline, because
  * otherwise we'll run out of registers. We don't care about CET
@@ -211,6 +212,7 @@
 	X86_FEATURE_RETPOLINE_AMD)
 
 # define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
+#endif
 #else /* No retpoline for C / inline asm */
 # define CALL_NOSPEC "call *%[thunk_target]\n"
 # define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index c37e66e493bff..d0108fb6e4ddb 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -252,7 +252,7 @@ static void __init spec2_print_if_secure(const char *reason)
 
 static inline bool retp_compiler(void)
 {
-	return __is_defined(RETPOLINE);
+	return __is_defined(CONFIG_RETPOLINE);
 }
 
 static inline bool match_option(const char *arg, int arglen, const char *opt)
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index a8e7ba9f73e87..6a6be9f440cf9 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -236,10 +236,8 @@ ifdef CONFIG_GCOV_KERNEL
 objtool_args += --no-unreachable
 endif
 ifdef CONFIG_RETPOLINE
-ifneq ($(RETPOLINE_CFLAGS),)
   objtool_args += --retpoline
 endif
-endif
 
 
 ifdef CONFIG_MODVERSIONS
-- 
GitLab


From ef014aae8f1cd2793e4e014bbb102bed53f852b7 Mon Sep 17 00:00:00 2001
From: Zhenzhong Duan <zhenzhong.duan@oracle.com>
Date: Fri, 2 Nov 2018 01:45:41 -0700
Subject: [PATCH 1507/1890] x86/retpoline: Remove minimal retpoline support

Now that CONFIG_RETPOLINE hard depends on compiler support, there is no
reason to keep the minimal retpoline support around which only provided
basic protection in the assembly files.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Borislav Petkov <bp@suse.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: <srinivas.eeda@oracle.com>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/f06f0a89-5587-45db-8ed2-0a9d6638d5c0@default
---
 arch/x86/include/asm/nospec-branch.h |  2 --
 arch/x86/kernel/cpu/bugs.c           | 13 ++-----------
 2 files changed, 2 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 8b09cbb2d52cd..c202a64edd957 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -221,8 +221,6 @@
 /* The Spectre V2 mitigation variants */
 enum spectre_v2_mitigation {
 	SPECTRE_V2_NONE,
-	SPECTRE_V2_RETPOLINE_MINIMAL,
-	SPECTRE_V2_RETPOLINE_MINIMAL_AMD,
 	SPECTRE_V2_RETPOLINE_GENERIC,
 	SPECTRE_V2_RETPOLINE_AMD,
 	SPECTRE_V2_IBRS_ENHANCED,
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index d0108fb6e4ddb..7f6d8159398e0 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -135,8 +135,6 @@ enum spectre_v2_mitigation_cmd {
 
 static const char *spectre_v2_strings[] = {
 	[SPECTRE_V2_NONE]			= "Vulnerable",
-	[SPECTRE_V2_RETPOLINE_MINIMAL]		= "Vulnerable: Minimal generic ASM retpoline",
-	[SPECTRE_V2_RETPOLINE_MINIMAL_AMD]	= "Vulnerable: Minimal AMD ASM retpoline",
 	[SPECTRE_V2_RETPOLINE_GENERIC]		= "Mitigation: Full generic retpoline",
 	[SPECTRE_V2_RETPOLINE_AMD]		= "Mitigation: Full AMD retpoline",
 	[SPECTRE_V2_IBRS_ENHANCED]		= "Mitigation: Enhanced IBRS",
@@ -250,11 +248,6 @@ static void __init spec2_print_if_secure(const char *reason)
 		pr_info("%s selected on command line.\n", reason);
 }
 
-static inline bool retp_compiler(void)
-{
-	return __is_defined(CONFIG_RETPOLINE);
-}
-
 static inline bool match_option(const char *arg, int arglen, const char *opt)
 {
 	int len = strlen(opt);
@@ -417,14 +410,12 @@ static void __init spectre_v2_select_mitigation(void)
 			pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n");
 			goto retpoline_generic;
 		}
-		mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD :
-					 SPECTRE_V2_RETPOLINE_MINIMAL_AMD;
+		mode = SPECTRE_V2_RETPOLINE_AMD;
 		setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
 		setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
 	} else {
 	retpoline_generic:
-		mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC :
-					 SPECTRE_V2_RETPOLINE_MINIMAL;
+		mode = SPECTRE_V2_RETPOLINE_GENERIC;
 		setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
 	}
 
-- 
GitLab


From 8eb729b77faf83ac4c1f363a9ad68d042415f24c Mon Sep 17 00:00:00 2001
From: Tim Chen <tim.c.chen@linux.intel.com>
Date: Sun, 25 Nov 2018 19:33:29 +0100
Subject: [PATCH 1508/1890] x86/speculation: Update the TIF_SSBD comment

"Reduced Data Speculation" is an obsolete term. The correct new name is
"Speculative store bypass disable" - which is abbreviated into SSBD.

Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Casey Schaufler <casey.schaufler@intel.com>
Cc: Asit Mallick <asit.k.mallick@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Jon Masters <jcm@redhat.com>
Cc: Waiman Long <longman9394@gmail.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Dave Stewart <david.c.stewart@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20181125185003.593893901@linutronix.de
---
 arch/x86/include/asm/thread_info.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 2ff2a30a264f4..523c69efc38a8 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -79,7 +79,7 @@ struct thread_info {
 #define TIF_SIGPENDING		2	/* signal pending */
 #define TIF_NEED_RESCHED	3	/* rescheduling necessary */
 #define TIF_SINGLESTEP		4	/* reenable singlestep on user return*/
-#define TIF_SSBD			5	/* Reduced data speculation */
+#define TIF_SSBD		5	/* Speculative store bypass disable */
 #define TIF_SYSCALL_EMU		6	/* syscall emulation active */
 #define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
 #define TIF_SECCOMP		8	/* secure computing */
-- 
GitLab


From 24848509aa55eac39d524b587b051f4e86df3c12 Mon Sep 17 00:00:00 2001
From: Tim Chen <tim.c.chen@linux.intel.com>
Date: Sun, 25 Nov 2018 19:33:30 +0100
Subject: [PATCH 1509/1890] x86/speculation: Clean up
 spectre_v2_parse_cmdline()

Remove the unnecessary 'else' statement in spectre_v2_parse_cmdline()
to save an indentation level.

Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Casey Schaufler <casey.schaufler@intel.com>
Cc: Asit Mallick <asit.k.mallick@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Jon Masters <jcm@redhat.com>
Cc: Waiman Long <longman9394@gmail.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Dave Stewart <david.c.stewart@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20181125185003.688010903@linutronix.de
---
 arch/x86/kernel/cpu/bugs.c | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 7f6d8159398e0..839ab4103e895 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -276,22 +276,21 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
 
 	if (cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
 		return SPECTRE_V2_CMD_NONE;
-	else {
-		ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg));
-		if (ret < 0)
-			return SPECTRE_V2_CMD_AUTO;
 
-		for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) {
-			if (!match_option(arg, ret, mitigation_options[i].option))
-				continue;
-			cmd = mitigation_options[i].cmd;
-			break;
-		}
+	ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg));
+	if (ret < 0)
+		return SPECTRE_V2_CMD_AUTO;
 
-		if (i >= ARRAY_SIZE(mitigation_options)) {
-			pr_err("unknown option (%s). Switching to AUTO select\n", arg);
-			return SPECTRE_V2_CMD_AUTO;
-		}
+	for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) {
+		if (!match_option(arg, ret, mitigation_options[i].option))
+			continue;
+		cmd = mitigation_options[i].cmd;
+		break;
+	}
+
+	if (i >= ARRAY_SIZE(mitigation_options)) {
+		pr_err("unknown option (%s). Switching to AUTO select\n", arg);
+		return SPECTRE_V2_CMD_AUTO;
 	}
 
 	if ((cmd == SPECTRE_V2_CMD_RETPOLINE ||
-- 
GitLab


From b86bda0426853bfe8a3506c7d2a5b332760ae46b Mon Sep 17 00:00:00 2001
From: Tim Chen <tim.c.chen@linux.intel.com>
Date: Sun, 25 Nov 2018 19:33:31 +0100
Subject: [PATCH 1510/1890] x86/speculation: Remove unnecessary ret variable in
 cpu_show_common()

Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Casey Schaufler <casey.schaufler@intel.com>
Cc: Asit Mallick <asit.k.mallick@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Jon Masters <jcm@redhat.com>
Cc: Waiman Long <longman9394@gmail.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Dave Stewart <david.c.stewart@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20181125185003.783903657@linutronix.de
---
 arch/x86/kernel/cpu/bugs.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 839ab4103e895..b52a48966e016 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -847,8 +847,6 @@ static ssize_t l1tf_show_state(char *buf)
 static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
 			       char *buf, unsigned int bug)
 {
-	int ret;
-
 	if (!boot_cpu_has_bug(bug))
 		return sprintf(buf, "Not affected\n");
 
@@ -866,13 +864,12 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
 		return sprintf(buf, "Mitigation: __user pointer sanitization\n");
 
 	case X86_BUG_SPECTRE_V2:
-		ret = sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
+		return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
 			       boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
 			       boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
 			       (x86_spec_ctrl_base & SPEC_CTRL_STIBP) ? ", STIBP" : "",
 			       boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
 			       spectre_v2_module_string());
-		return ret;
 
 	case X86_BUG_SPEC_STORE_BYPASS:
 		return sprintf(buf, "%s\n", ssb_strings[ssb_mode]);
-- 
GitLab


From a8f76ae41cd633ac00be1b3019b1eb4741be3828 Mon Sep 17 00:00:00 2001
From: Tim Chen <tim.c.chen@linux.intel.com>
Date: Sun, 25 Nov 2018 19:33:32 +0100
Subject: [PATCH 1511/1890] x86/speculation: Move STIPB/IBPB string
 conditionals out of cpu_show_common()

The Spectre V2 printout in cpu_show_common() handles conditionals for the
various mitigation methods directly in the sprintf() argument list. That's
hard to read and will become unreadable if more complex decisions need to
be made for a particular method.

Move the conditionals for STIBP and IBPB string selection into helper
functions, so they can be extended later on.

Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Casey Schaufler <casey.schaufler@intel.com>
Cc: Asit Mallick <asit.k.mallick@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Jon Masters <jcm@redhat.com>
Cc: Waiman Long <longman9394@gmail.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Dave Stewart <david.c.stewart@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20181125185003.874479208@linutronix.de
---
 arch/x86/kernel/cpu/bugs.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index b52a48966e016..a1502bce9eb82 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -844,6 +844,22 @@ static ssize_t l1tf_show_state(char *buf)
 }
 #endif
 
+static char *stibp_state(void)
+{
+	if (x86_spec_ctrl_base & SPEC_CTRL_STIBP)
+		return ", STIBP";
+	else
+		return "";
+}
+
+static char *ibpb_state(void)
+{
+	if (boot_cpu_has(X86_FEATURE_USE_IBPB))
+		return ", IBPB";
+	else
+		return "";
+}
+
 static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
 			       char *buf, unsigned int bug)
 {
@@ -865,9 +881,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
 
 	case X86_BUG_SPECTRE_V2:
 		return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
-			       boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
+			       ibpb_state(),
 			       boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
-			       (x86_spec_ctrl_base & SPEC_CTRL_STIBP) ? ", STIBP" : "",
+			       stibp_state(),
 			       boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
 			       spectre_v2_module_string());
 
-- 
GitLab


From 34bce7c9690b1d897686aac89604ba7adc365556 Mon Sep 17 00:00:00 2001
From: Tim Chen <tim.c.chen@linux.intel.com>
Date: Sun, 25 Nov 2018 19:33:33 +0100
Subject: [PATCH 1512/1890] x86/speculation: Disable STIBP when enhanced IBRS
 is in use

If enhanced IBRS is active, STIBP is redundant for mitigating Spectre v2
user space exploits from hyperthread sibling.

Disable STIBP when enhanced IBRS is used.

Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Casey Schaufler <casey.schaufler@intel.com>
Cc: Asit Mallick <asit.k.mallick@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Jon Masters <jcm@redhat.com>
Cc: Waiman Long <longman9394@gmail.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Dave Stewart <david.c.stewart@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20181125185003.966801480@linutronix.de
---
 arch/x86/kernel/cpu/bugs.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index a1502bce9eb82..924cd06dd43b3 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -321,6 +321,10 @@ static bool stibp_needed(void)
 	if (spectre_v2_enabled == SPECTRE_V2_NONE)
 		return false;
 
+	/* Enhanced IBRS makes using STIBP unnecessary. */
+	if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
+		return false;
+
 	if (!boot_cpu_has(X86_FEATURE_STIBP))
 		return false;
 
@@ -846,6 +850,9 @@ static ssize_t l1tf_show_state(char *buf)
 
 static char *stibp_state(void)
 {
+	if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
+		return "";
+
 	if (x86_spec_ctrl_base & SPEC_CTRL_STIBP)
 		return ", STIBP";
 	else
-- 
GitLab


From 26c4d75b234040c11728a8acb796b3a85ba7507c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 25 Nov 2018 19:33:34 +0100
Subject: [PATCH 1513/1890] x86/speculation: Rename SSBD update functions

During context switch, the SSBD bit in SPEC_CTRL MSR is updated according
to changes of the TIF_SSBD flag in the current and next running task.

Currently, only the bit controlling speculative store bypass disable in
SPEC_CTRL MSR is updated and the related update functions all have
"speculative_store" or "ssb" in their names.

For enhanced mitigation control other bits in SPEC_CTRL MSR need to be
updated as well, which makes the SSB names inadequate.

Rename the "speculative_store*" functions to a more generic name. No
functional change.

Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Casey Schaufler <casey.schaufler@intel.com>
Cc: Asit Mallick <asit.k.mallick@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Jon Masters <jcm@redhat.com>
Cc: Waiman Long <longman9394@gmail.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Dave Stewart <david.c.stewart@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20181125185004.058866968@linutronix.de
---
 arch/x86/include/asm/spec-ctrl.h |  6 +++---
 arch/x86/kernel/cpu/bugs.c       |  4 ++--
 arch/x86/kernel/process.c        | 12 ++++++------
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h
index ae7c2c5cd7f0e..8e2f8411c7a76 100644
--- a/arch/x86/include/asm/spec-ctrl.h
+++ b/arch/x86/include/asm/spec-ctrl.h
@@ -70,11 +70,11 @@ extern void speculative_store_bypass_ht_init(void);
 static inline void speculative_store_bypass_ht_init(void) { }
 #endif
 
-extern void speculative_store_bypass_update(unsigned long tif);
+extern void speculation_ctrl_update(unsigned long tif);
 
-static inline void speculative_store_bypass_update_current(void)
+static inline void speculation_ctrl_update_current(void)
 {
-	speculative_store_bypass_update(current_thread_info()->flags);
+	speculation_ctrl_update(current_thread_info()->flags);
 }
 
 #endif
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 924cd06dd43b3..a723af0c44008 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -200,7 +200,7 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
 		tif = setguest ? ssbd_spec_ctrl_to_tif(guestval) :
 				 ssbd_spec_ctrl_to_tif(hostval);
 
-		speculative_store_bypass_update(tif);
+		speculation_ctrl_update(tif);
 	}
 }
 EXPORT_SYMBOL_GPL(x86_virt_spec_ctrl);
@@ -632,7 +632,7 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
 	 * mitigation until it is next scheduled.
 	 */
 	if (task == current && update)
-		speculative_store_bypass_update_current();
+		speculation_ctrl_update_current();
 
 	return 0;
 }
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index c93fcfdf16734..8aa49604f9ae2 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -395,27 +395,27 @@ static __always_inline void amd_set_ssb_virt_state(unsigned long tifn)
 	wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn));
 }
 
-static __always_inline void intel_set_ssb_state(unsigned long tifn)
+static __always_inline void spec_ctrl_update_msr(unsigned long tifn)
 {
 	u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn);
 
 	wrmsrl(MSR_IA32_SPEC_CTRL, msr);
 }
 
-static __always_inline void __speculative_store_bypass_update(unsigned long tifn)
+static __always_inline void __speculation_ctrl_update(unsigned long tifn)
 {
 	if (static_cpu_has(X86_FEATURE_VIRT_SSBD))
 		amd_set_ssb_virt_state(tifn);
 	else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD))
 		amd_set_core_ssb_state(tifn);
 	else
-		intel_set_ssb_state(tifn);
+		spec_ctrl_update_msr(tifn);
 }
 
-void speculative_store_bypass_update(unsigned long tif)
+void speculation_ctrl_update(unsigned long tif)
 {
 	preempt_disable();
-	__speculative_store_bypass_update(tif);
+	__speculation_ctrl_update(tif);
 	preempt_enable();
 }
 
@@ -452,7 +452,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
 		set_cpuid_faulting(!!(tifn & _TIF_NOCPUID));
 
 	if ((tifp ^ tifn) & _TIF_SSBD)
-		__speculative_store_bypass_update(tifn);
+		__speculation_ctrl_update(tifn);
 }
 
 /*
-- 
GitLab


From 01daf56875ee0cd50ed496a09b20eb369b45dfa5 Mon Sep 17 00:00:00 2001
From: Tim Chen <tim.c.chen@linux.intel.com>
Date: Sun, 25 Nov 2018 19:33:35 +0100
Subject: [PATCH 1514/1890] x86/speculation: Reorganize speculation control
 MSRs update

The logic to detect whether there's a change in the previous and next
task's flag relevant to update speculation control MSRs is spread out
across multiple functions.

Consolidate all checks needed for updating speculation control MSRs into
the new __speculation_ctrl_update() helper function.

This makes it easy to pick the right speculation control MSR and the bits
in MSR_IA32_SPEC_CTRL that need updating based on TIF flags changes.

Originally-by: Thomas Lendacky <Thomas.Lendacky@amd.com>
Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Casey Schaufler <casey.schaufler@intel.com>
Cc: Asit Mallick <asit.k.mallick@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Jon Masters <jcm@redhat.com>
Cc: Waiman Long <longman9394@gmail.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Dave Stewart <david.c.stewart@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20181125185004.151077005@linutronix.de
---
 arch/x86/kernel/process.c | 46 ++++++++++++++++++++++++---------------
 1 file changed, 29 insertions(+), 17 deletions(-)

diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 8aa49604f9ae2..70e9832379e13 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -395,27 +395,40 @@ static __always_inline void amd_set_ssb_virt_state(unsigned long tifn)
 	wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn));
 }
 
-static __always_inline void spec_ctrl_update_msr(unsigned long tifn)
-{
-	u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn);
-
-	wrmsrl(MSR_IA32_SPEC_CTRL, msr);
-}
+/*
+ * Update the MSRs managing speculation control, during context switch.
+ *
+ * tifp: Previous task's thread flags
+ * tifn: Next task's thread flags
+ */
+static __always_inline void __speculation_ctrl_update(unsigned long tifp,
+						      unsigned long tifn)
+{
+	u64 msr = x86_spec_ctrl_base;
+	bool updmsr = false;
+
+	/* If TIF_SSBD is different, select the proper mitigation method */
+	if ((tifp ^ tifn) & _TIF_SSBD) {
+		if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) {
+			amd_set_ssb_virt_state(tifn);
+		} else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) {
+			amd_set_core_ssb_state(tifn);
+		} else if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
+			   static_cpu_has(X86_FEATURE_AMD_SSBD)) {
+			msr |= ssbd_tif_to_spec_ctrl(tifn);
+			updmsr  = true;
+		}
+	}
 
-static __always_inline void __speculation_ctrl_update(unsigned long tifn)
-{
-	if (static_cpu_has(X86_FEATURE_VIRT_SSBD))
-		amd_set_ssb_virt_state(tifn);
-	else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD))
-		amd_set_core_ssb_state(tifn);
-	else
-		spec_ctrl_update_msr(tifn);
+	if (updmsr)
+		wrmsrl(MSR_IA32_SPEC_CTRL, msr);
 }
 
 void speculation_ctrl_update(unsigned long tif)
 {
+	/* Forced update. Make sure all relevant TIF flags are different */
 	preempt_disable();
-	__speculation_ctrl_update(tif);
+	__speculation_ctrl_update(~tif, tif);
 	preempt_enable();
 }
 
@@ -451,8 +464,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
 	if ((tifp ^ tifn) & _TIF_NOCPUID)
 		set_cpuid_faulting(!!(tifn & _TIF_NOCPUID));
 
-	if ((tifp ^ tifn) & _TIF_SSBD)
-		__speculation_ctrl_update(tifn);
+	__speculation_ctrl_update(tifp, tifn);
 }
 
 /*
-- 
GitLab


From c5511d03ec090980732e929c318a7a6374b5550e Mon Sep 17 00:00:00 2001
From: "Peter Zijlstra (Intel)" <peterz@infradead.org>
Date: Sun, 25 Nov 2018 19:33:36 +0100
Subject: [PATCH 1515/1890] sched/smt: Make sched_smt_present track topology

Currently the 'sched_smt_present' static key is enabled when at CPU bringup
SMT topology is observed, but it is never disabled. However there is demand
to also disable the key when the topology changes such that there is no SMT
present anymore.

Implement this by making the key count the number of cores that have SMT
enabled.

In particular, the SMT topology bits are set before interrrupts are enabled
and similarly, are cleared after interrupts are disabled for the last time
and the CPU dies.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Casey Schaufler <casey.schaufler@intel.com>
Cc: Asit Mallick <asit.k.mallick@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Jon Masters <jcm@redhat.com>
Cc: Waiman Long <longman9394@gmail.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Dave Stewart <david.c.stewart@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20181125185004.246110444@linutronix.de
---
 kernel/sched/core.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 091e089063be1..6fedf3a98581b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5738,15 +5738,10 @@ int sched_cpu_activate(unsigned int cpu)
 
 #ifdef CONFIG_SCHED_SMT
 	/*
-	 * The sched_smt_present static key needs to be evaluated on every
-	 * hotplug event because at boot time SMT might be disabled when
-	 * the number of booted CPUs is limited.
-	 *
-	 * If then later a sibling gets hotplugged, then the key would stay
-	 * off and SMT scheduling would never be functional.
+	 * When going up, increment the number of cores with SMT present.
 	 */
-	if (cpumask_weight(cpu_smt_mask(cpu)) > 1)
-		static_branch_enable_cpuslocked(&sched_smt_present);
+	if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
+		static_branch_inc_cpuslocked(&sched_smt_present);
 #endif
 	set_cpu_active(cpu, true);
 
@@ -5790,6 +5785,14 @@ int sched_cpu_deactivate(unsigned int cpu)
 	 */
 	synchronize_rcu_mult(call_rcu, call_rcu_sched);
 
+#ifdef CONFIG_SCHED_SMT
+	/*
+	 * When going down, decrement the number of cores with SMT present.
+	 */
+	if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
+		static_branch_dec_cpuslocked(&sched_smt_present);
+#endif
+
 	if (!sched_smp_initialized)
 		return 0;
 
-- 
GitLab


From dbe733642e01dd108f71436aaea7b328cb28fd87 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 25 Nov 2018 19:33:37 +0100
Subject: [PATCH 1516/1890] x86/Kconfig: Select SCHED_SMT if SMP enabled

CONFIG_SCHED_SMT is enabled by all distros, so there is not a real point to
have it configurable. The runtime overhead in the core scheduler code is
minimal because the actual SMT scheduling parts are conditional on a static
key.

This allows to expose the scheduler's SMT state static key to the
speculation control code. Alternatively the scheduler's static key could be
made always available when CONFIG_SMP is enabled, but that's just adding an
unused static key to every other architecture for nothing.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Casey Schaufler <casey.schaufler@intel.com>
Cc: Asit Mallick <asit.k.mallick@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Jon Masters <jcm@redhat.com>
Cc: Waiman Long <longman9394@gmail.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Dave Stewart <david.c.stewart@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20181125185004.337452245@linutronix.de
---
 arch/x86/Kconfig | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b5286ad2a9822..8689e794a43c8 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1000,13 +1000,7 @@ config NR_CPUS
 	  to the kernel image.
 
 config SCHED_SMT
-	bool "SMT (Hyperthreading) scheduler support"
-	depends on SMP
-	---help---
-	  SMT scheduler support improves the CPU scheduler's decision making
-	  when dealing with Intel Pentium 4 chips with HyperThreading at a
-	  cost of slightly increased overhead in some places. If unsure say
-	  N here.
+	def_bool y if SMP
 
 config SCHED_MC
 	def_bool y
-- 
GitLab


From 321a874a7ef85655e93b3206d0f36b4a6097f948 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 25 Nov 2018 19:33:38 +0100
Subject: [PATCH 1517/1890] sched/smt: Expose sched_smt_present static key

Make the scheduler's 'sched_smt_present' static key globaly available, so
it can be used in the x86 speculation control code.

Provide a query function and a stub for the CONFIG_SMP=n case.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Casey Schaufler <casey.schaufler@intel.com>
Cc: Asit Mallick <asit.k.mallick@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Jon Masters <jcm@redhat.com>
Cc: Waiman Long <longman9394@gmail.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Dave Stewart <david.c.stewart@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20181125185004.430168326@linutronix.de
---
 include/linux/sched/smt.h | 18 ++++++++++++++++++
 kernel/sched/sched.h      |  4 +---
 2 files changed, 19 insertions(+), 3 deletions(-)
 create mode 100644 include/linux/sched/smt.h

diff --git a/include/linux/sched/smt.h b/include/linux/sched/smt.h
new file mode 100644
index 0000000000000..c9e0be5141104
--- /dev/null
+++ b/include/linux/sched/smt.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_SCHED_SMT_H
+#define _LINUX_SCHED_SMT_H
+
+#include <linux/static_key.h>
+
+#ifdef CONFIG_SCHED_SMT
+extern struct static_key_false sched_smt_present;
+
+static __always_inline bool sched_smt_active(void)
+{
+	return static_branch_likely(&sched_smt_present);
+}
+#else
+static inline bool sched_smt_active(void) { return false; }
+#endif
+
+#endif
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 618577fc9aa87..4e524ab589c9b 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -23,6 +23,7 @@
 #include <linux/sched/prio.h>
 #include <linux/sched/rt.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/smt.h>
 #include <linux/sched/stat.h>
 #include <linux/sched/sysctl.h>
 #include <linux/sched/task.h>
@@ -936,9 +937,6 @@ static inline int cpu_of(struct rq *rq)
 
 
 #ifdef CONFIG_SCHED_SMT
-
-extern struct static_key_false sched_smt_present;
-
 extern void __update_idle_core(struct rq *rq);
 
 static inline void update_idle_core(struct rq *rq)
-- 
GitLab


From a74cfffb03b73d41e08f84c2e5c87dec0ce3db9f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 25 Nov 2018 19:33:39 +0100
Subject: [PATCH 1518/1890] x86/speculation: Rework SMT state change

arch_smt_update() is only called when the sysfs SMT control knob is
changed. This means that when SMT is enabled in the sysfs control knob the
system is considered to have SMT active even if all siblings are offline.

To allow finegrained control of the speculation mitigations, the actual SMT
state is more interesting than the fact that siblings could be enabled.

Rework the code, so arch_smt_update() is invoked from each individual CPU
hotplug function, and simplify the update function while at it.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Casey Schaufler <casey.schaufler@intel.com>
Cc: Asit Mallick <asit.k.mallick@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Jon Masters <jcm@redhat.com>
Cc: Waiman Long <longman9394@gmail.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Dave Stewart <david.c.stewart@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20181125185004.521974984@linutronix.de
---
 arch/x86/kernel/cpu/bugs.c | 11 +++++------
 include/linux/sched/smt.h  |  2 ++
 kernel/cpu.c               | 15 +++++++++------
 3 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index a723af0c44008..5625b323ff329 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -14,6 +14,7 @@
 #include <linux/module.h>
 #include <linux/nospec.h>
 #include <linux/prctl.h>
+#include <linux/sched/smt.h>
 
 #include <asm/spec-ctrl.h>
 #include <asm/cmdline.h>
@@ -344,16 +345,14 @@ void arch_smt_update(void)
 		return;
 
 	mutex_lock(&spec_ctrl_mutex);
-	mask = x86_spec_ctrl_base;
-	if (cpu_smt_control == CPU_SMT_ENABLED)
+
+	mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP;
+	if (sched_smt_active())
 		mask |= SPEC_CTRL_STIBP;
-	else
-		mask &= ~SPEC_CTRL_STIBP;
 
 	if (mask != x86_spec_ctrl_base) {
 		pr_info("Spectre v2 cross-process SMT mitigation: %s STIBP\n",
-				cpu_smt_control == CPU_SMT_ENABLED ?
-				"Enabling" : "Disabling");
+			mask & SPEC_CTRL_STIBP ? "Enabling" : "Disabling");
 		x86_spec_ctrl_base = mask;
 		on_each_cpu(update_stibp_msr, NULL, 1);
 	}
diff --git a/include/linux/sched/smt.h b/include/linux/sched/smt.h
index c9e0be5141104..59d3736c454cf 100644
--- a/include/linux/sched/smt.h
+++ b/include/linux/sched/smt.h
@@ -15,4 +15,6 @@ static __always_inline bool sched_smt_active(void)
 static inline bool sched_smt_active(void) { return false; }
 #endif
 
+void arch_smt_update(void);
+
 #endif
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 3c7f3b4c453cf..91d5c38eb7e5b 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -10,6 +10,7 @@
 #include <linux/sched/signal.h>
 #include <linux/sched/hotplug.h>
 #include <linux/sched/task.h>
+#include <linux/sched/smt.h>
 #include <linux/unistd.h>
 #include <linux/cpu.h>
 #include <linux/oom.h>
@@ -367,6 +368,12 @@ static void lockdep_release_cpus_lock(void)
 
 #endif	/* CONFIG_HOTPLUG_CPU */
 
+/*
+ * Architectures that need SMT-specific errata handling during SMT hotplug
+ * should override this.
+ */
+void __weak arch_smt_update(void) { }
+
 #ifdef CONFIG_HOTPLUG_SMT
 enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
 EXPORT_SYMBOL_GPL(cpu_smt_control);
@@ -1011,6 +1018,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
 	 * concurrent CPU hotplug via cpu_add_remove_lock.
 	 */
 	lockup_detector_cleanup();
+	arch_smt_update();
 	return ret;
 }
 
@@ -1139,6 +1147,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
 	ret = cpuhp_up_callbacks(cpu, st, target);
 out:
 	cpus_write_unlock();
+	arch_smt_update();
 	return ret;
 }
 
@@ -2055,12 +2064,6 @@ static void cpuhp_online_cpu_device(unsigned int cpu)
 	kobject_uevent(&dev->kobj, KOBJ_ONLINE);
 }
 
-/*
- * Architectures that need SMT-specific errata handling during SMT hotplug
- * should override this.
- */
-void __weak arch_smt_update(void) { };
-
 static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
 {
 	int cpu, ret = 0;
-- 
GitLab


From 130d6f946f6f2a972ee3ec8540b7243ab99abe97 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 25 Nov 2018 19:33:40 +0100
Subject: [PATCH 1519/1890] x86/l1tf: Show actual SMT state

Use the now exposed real SMT state, not the SMT sysfs control knob
state. This reflects the state of the system when the mitigation status is
queried.

This does not change the warning in the VMX launch code. There the
dependency on the control knob makes sense because siblings could be
brought online anytime after launching the VM.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Casey Schaufler <casey.schaufler@intel.com>
Cc: Asit Mallick <asit.k.mallick@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Jon Masters <jcm@redhat.com>
Cc: Waiman Long <longman9394@gmail.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Dave Stewart <david.c.stewart@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20181125185004.613357354@linutronix.de
---
 arch/x86/kernel/cpu/bugs.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 5625b323ff329..2dc4ee2bedcb2 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -832,13 +832,14 @@ static ssize_t l1tf_show_state(char *buf)
 
 	if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_EPT_DISABLED ||
 	    (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER &&
-	     cpu_smt_control == CPU_SMT_ENABLED))
+	     sched_smt_active())) {
 		return sprintf(buf, "%s; VMX: %s\n", L1TF_DEFAULT_MSG,
 			       l1tf_vmx_states[l1tf_vmx_mitigation]);
+	}
 
 	return sprintf(buf, "%s; VMX: %s, SMT %s\n", L1TF_DEFAULT_MSG,
 		       l1tf_vmx_states[l1tf_vmx_mitigation],
-		       cpu_smt_control == CPU_SMT_ENABLED ? "vulnerable" : "disabled");
+		       sched_smt_active() ? "vulnerable" : "disabled");
 }
 #else
 static ssize_t l1tf_show_state(char *buf)
-- 
GitLab


From 15d6b7aab0793b2de8a05d8a828777dd24db424e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 25 Nov 2018 19:33:41 +0100
Subject: [PATCH 1520/1890] x86/speculation: Reorder the spec_v2 code

Reorder the code so it is better grouped. No functional change.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Casey Schaufler <casey.schaufler@intel.com>
Cc: Asit Mallick <asit.k.mallick@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Jon Masters <jcm@redhat.com>
Cc: Waiman Long <longman9394@gmail.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Dave Stewart <david.c.stewart@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20181125185004.707122879@linutronix.de
---
 arch/x86/kernel/cpu/bugs.c | 168 ++++++++++++++++++-------------------
 1 file changed, 84 insertions(+), 84 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 2dc4ee2bedcb2..c9542b9fb3298 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -124,29 +124,6 @@ void __init check_bugs(void)
 #endif
 }
 
-/* The kernel command line selection */
-enum spectre_v2_mitigation_cmd {
-	SPECTRE_V2_CMD_NONE,
-	SPECTRE_V2_CMD_AUTO,
-	SPECTRE_V2_CMD_FORCE,
-	SPECTRE_V2_CMD_RETPOLINE,
-	SPECTRE_V2_CMD_RETPOLINE_GENERIC,
-	SPECTRE_V2_CMD_RETPOLINE_AMD,
-};
-
-static const char *spectre_v2_strings[] = {
-	[SPECTRE_V2_NONE]			= "Vulnerable",
-	[SPECTRE_V2_RETPOLINE_GENERIC]		= "Mitigation: Full generic retpoline",
-	[SPECTRE_V2_RETPOLINE_AMD]		= "Mitigation: Full AMD retpoline",
-	[SPECTRE_V2_IBRS_ENHANCED]		= "Mitigation: Enhanced IBRS",
-};
-
-#undef pr_fmt
-#define pr_fmt(fmt)     "Spectre V2 : " fmt
-
-static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
-	SPECTRE_V2_NONE;
-
 void
 x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
 {
@@ -216,6 +193,12 @@ static void x86_amd_ssb_disable(void)
 		wrmsrl(MSR_AMD64_LS_CFG, msrval);
 }
 
+#undef pr_fmt
+#define pr_fmt(fmt)     "Spectre V2 : " fmt
+
+static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
+	SPECTRE_V2_NONE;
+
 #ifdef RETPOLINE
 static bool spectre_v2_bad_module;
 
@@ -237,18 +220,6 @@ static inline const char *spectre_v2_module_string(void)
 static inline const char *spectre_v2_module_string(void) { return ""; }
 #endif
 
-static void __init spec2_print_if_insecure(const char *reason)
-{
-	if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
-		pr_info("%s selected on command line.\n", reason);
-}
-
-static void __init spec2_print_if_secure(const char *reason)
-{
-	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
-		pr_info("%s selected on command line.\n", reason);
-}
-
 static inline bool match_option(const char *arg, int arglen, const char *opt)
 {
 	int len = strlen(opt);
@@ -256,24 +227,53 @@ static inline bool match_option(const char *arg, int arglen, const char *opt)
 	return len == arglen && !strncmp(arg, opt, len);
 }
 
+/* The kernel command line selection for spectre v2 */
+enum spectre_v2_mitigation_cmd {
+	SPECTRE_V2_CMD_NONE,
+	SPECTRE_V2_CMD_AUTO,
+	SPECTRE_V2_CMD_FORCE,
+	SPECTRE_V2_CMD_RETPOLINE,
+	SPECTRE_V2_CMD_RETPOLINE_GENERIC,
+	SPECTRE_V2_CMD_RETPOLINE_AMD,
+};
+
+static const char *spectre_v2_strings[] = {
+	[SPECTRE_V2_NONE]			= "Vulnerable",
+	[SPECTRE_V2_RETPOLINE_GENERIC]		= "Mitigation: Full generic retpoline",
+	[SPECTRE_V2_RETPOLINE_AMD]		= "Mitigation: Full AMD retpoline",
+	[SPECTRE_V2_IBRS_ENHANCED]		= "Mitigation: Enhanced IBRS",
+};
+
 static const struct {
 	const char *option;
 	enum spectre_v2_mitigation_cmd cmd;
 	bool secure;
 } mitigation_options[] = {
-	{ "off",               SPECTRE_V2_CMD_NONE,              false },
-	{ "on",                SPECTRE_V2_CMD_FORCE,             true },
-	{ "retpoline",         SPECTRE_V2_CMD_RETPOLINE,         false },
-	{ "retpoline,amd",     SPECTRE_V2_CMD_RETPOLINE_AMD,     false },
-	{ "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false },
-	{ "auto",              SPECTRE_V2_CMD_AUTO,              false },
+	{ "off",		SPECTRE_V2_CMD_NONE,		  false },
+	{ "on",			SPECTRE_V2_CMD_FORCE,		  true  },
+	{ "retpoline",		SPECTRE_V2_CMD_RETPOLINE,	  false },
+	{ "retpoline,amd",	SPECTRE_V2_CMD_RETPOLINE_AMD,	  false },
+	{ "retpoline,generic",	SPECTRE_V2_CMD_RETPOLINE_GENERIC, false },
+	{ "auto",		SPECTRE_V2_CMD_AUTO,		  false },
 };
 
+static void __init spec2_print_if_insecure(const char *reason)
+{
+	if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+		pr_info("%s selected on command line.\n", reason);
+}
+
+static void __init spec2_print_if_secure(const char *reason)
+{
+	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+		pr_info("%s selected on command line.\n", reason);
+}
+
 static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
 {
+	enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO;
 	char arg[20];
 	int ret, i;
-	enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO;
 
 	if (cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
 		return SPECTRE_V2_CMD_NONE;
@@ -317,48 +317,6 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
 	return cmd;
 }
 
-static bool stibp_needed(void)
-{
-	if (spectre_v2_enabled == SPECTRE_V2_NONE)
-		return false;
-
-	/* Enhanced IBRS makes using STIBP unnecessary. */
-	if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
-		return false;
-
-	if (!boot_cpu_has(X86_FEATURE_STIBP))
-		return false;
-
-	return true;
-}
-
-static void update_stibp_msr(void *info)
-{
-	wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
-}
-
-void arch_smt_update(void)
-{
-	u64 mask;
-
-	if (!stibp_needed())
-		return;
-
-	mutex_lock(&spec_ctrl_mutex);
-
-	mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP;
-	if (sched_smt_active())
-		mask |= SPEC_CTRL_STIBP;
-
-	if (mask != x86_spec_ctrl_base) {
-		pr_info("Spectre v2 cross-process SMT mitigation: %s STIBP\n",
-			mask & SPEC_CTRL_STIBP ? "Enabling" : "Disabling");
-		x86_spec_ctrl_base = mask;
-		on_each_cpu(update_stibp_msr, NULL, 1);
-	}
-	mutex_unlock(&spec_ctrl_mutex);
-}
-
 static void __init spectre_v2_select_mitigation(void)
 {
 	enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
@@ -462,6 +420,48 @@ static void __init spectre_v2_select_mitigation(void)
 	arch_smt_update();
 }
 
+static bool stibp_needed(void)
+{
+	if (spectre_v2_enabled == SPECTRE_V2_NONE)
+		return false;
+
+	/* Enhanced IBRS makes using STIBP unnecessary. */
+	if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
+		return false;
+
+	if (!boot_cpu_has(X86_FEATURE_STIBP))
+		return false;
+
+	return true;
+}
+
+static void update_stibp_msr(void *info)
+{
+	wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+}
+
+void arch_smt_update(void)
+{
+	u64 mask;
+
+	if (!stibp_needed())
+		return;
+
+	mutex_lock(&spec_ctrl_mutex);
+
+	mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP;
+	if (sched_smt_active())
+		mask |= SPEC_CTRL_STIBP;
+
+	if (mask != x86_spec_ctrl_base) {
+		pr_info("Spectre v2 cross-process SMT mitigation: %s STIBP\n",
+			mask & SPEC_CTRL_STIBP ? "Enabling" : "Disabling");
+		x86_spec_ctrl_base = mask;
+		on_each_cpu(update_stibp_msr, NULL, 1);
+	}
+	mutex_unlock(&spec_ctrl_mutex);
+}
+
 #undef pr_fmt
 #define pr_fmt(fmt)	"Speculative Store Bypass: " fmt
 
-- 
GitLab


From 8770709f411763884535662744a3786a1806afd3 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 25 Nov 2018 19:33:42 +0100
Subject: [PATCH 1521/1890] x86/speculation: Mark string arrays const correctly

checkpatch.pl muttered when reshuffling the code:
 WARNING: static const char * array should probably be static const char * const

Fix up all the string arrays.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Casey Schaufler <casey.schaufler@intel.com>
Cc: Asit Mallick <asit.k.mallick@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Jon Masters <jcm@redhat.com>
Cc: Waiman Long <longman9394@gmail.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Dave Stewart <david.c.stewart@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20181125185004.800018931@linutronix.de
---
 arch/x86/kernel/cpu/bugs.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index c9542b9fb3298..4fcbccb162006 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -237,7 +237,7 @@ enum spectre_v2_mitigation_cmd {
 	SPECTRE_V2_CMD_RETPOLINE_AMD,
 };
 
-static const char *spectre_v2_strings[] = {
+static const char * const spectre_v2_strings[] = {
 	[SPECTRE_V2_NONE]			= "Vulnerable",
 	[SPECTRE_V2_RETPOLINE_GENERIC]		= "Mitigation: Full generic retpoline",
 	[SPECTRE_V2_RETPOLINE_AMD]		= "Mitigation: Full AMD retpoline",
@@ -476,7 +476,7 @@ enum ssb_mitigation_cmd {
 	SPEC_STORE_BYPASS_CMD_SECCOMP,
 };
 
-static const char *ssb_strings[] = {
+static const char * const ssb_strings[] = {
 	[SPEC_STORE_BYPASS_NONE]	= "Vulnerable",
 	[SPEC_STORE_BYPASS_DISABLE]	= "Mitigation: Speculative Store Bypass disabled",
 	[SPEC_STORE_BYPASS_PRCTL]	= "Mitigation: Speculative Store Bypass disabled via prctl",
@@ -816,7 +816,7 @@ early_param("l1tf", l1tf_cmdline);
 #define L1TF_DEFAULT_MSG "Mitigation: PTE Inversion"
 
 #if IS_ENABLED(CONFIG_KVM_INTEL)
-static const char *l1tf_vmx_states[] = {
+static const char * const l1tf_vmx_states[] = {
 	[VMENTER_L1D_FLUSH_AUTO]		= "auto",
 	[VMENTER_L1D_FLUSH_NEVER]		= "vulnerable",
 	[VMENTER_L1D_FLUSH_COND]		= "conditional cache flushes",
-- 
GitLab


From 30ba72a990f5096ae08f284de17986461efcc408 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 25 Nov 2018 19:33:43 +0100
Subject: [PATCH 1522/1890] x86/speculataion: Mark command line parser data
 __initdata

No point to keep that around.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Casey Schaufler <casey.schaufler@intel.com>
Cc: Asit Mallick <asit.k.mallick@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Jon Masters <jcm@redhat.com>
Cc: Waiman Long <longman9394@gmail.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Dave Stewart <david.c.stewart@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20181125185004.893886356@linutronix.de
---
 arch/x86/kernel/cpu/bugs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 4fcbccb162006..9279cbabe16eb 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -248,7 +248,7 @@ static const struct {
 	const char *option;
 	enum spectre_v2_mitigation_cmd cmd;
 	bool secure;
-} mitigation_options[] = {
+} mitigation_options[] __initdata = {
 	{ "off",		SPECTRE_V2_CMD_NONE,		  false },
 	{ "on",			SPECTRE_V2_CMD_FORCE,		  true  },
 	{ "retpoline",		SPECTRE_V2_CMD_RETPOLINE,	  false },
@@ -486,7 +486,7 @@ static const char * const ssb_strings[] = {
 static const struct {
 	const char *option;
 	enum ssb_mitigation_cmd cmd;
-} ssb_mitigation_options[] = {
+} ssb_mitigation_options[]  __initdata = {
 	{ "auto",	SPEC_STORE_BYPASS_CMD_AUTO },    /* Platform decides */
 	{ "on",		SPEC_STORE_BYPASS_CMD_ON },      /* Disable Speculative Store Bypass */
 	{ "off",	SPEC_STORE_BYPASS_CMD_NONE },    /* Don't touch Speculative Store Bypass */
-- 
GitLab


From 495d470e9828500e0155027f230449ac5e29c025 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 25 Nov 2018 19:33:44 +0100
Subject: [PATCH 1523/1890] x86/speculation: Unify conditional spectre v2 print
 functions

There is no point in having two functions and a conditional at the call
site.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Casey Schaufler <casey.schaufler@intel.com>
Cc: Asit Mallick <asit.k.mallick@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Jon Masters <jcm@redhat.com>
Cc: Waiman Long <longman9394@gmail.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Dave Stewart <david.c.stewart@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20181125185004.986890749@linutronix.de
---
 arch/x86/kernel/cpu/bugs.c | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 9279cbabe16eb..4f5a6319dca6d 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -257,15 +257,9 @@ static const struct {
 	{ "auto",		SPECTRE_V2_CMD_AUTO,		  false },
 };
 
-static void __init spec2_print_if_insecure(const char *reason)
+static void __init spec_v2_print_cond(const char *reason, bool secure)
 {
-	if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
-		pr_info("%s selected on command line.\n", reason);
-}
-
-static void __init spec2_print_if_secure(const char *reason)
-{
-	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+	if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2) != secure)
 		pr_info("%s selected on command line.\n", reason);
 }
 
@@ -309,11 +303,8 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
 		return SPECTRE_V2_CMD_AUTO;
 	}
 
-	if (mitigation_options[i].secure)
-		spec2_print_if_secure(mitigation_options[i].option);
-	else
-		spec2_print_if_insecure(mitigation_options[i].option);
-
+	spec_v2_print_cond(mitigation_options[i].option,
+			   mitigation_options[i].secure);
 	return cmd;
 }
 
-- 
GitLab


From fa1202ef224391b6f5b26cdd44cc50495e8fab54 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 25 Nov 2018 19:33:45 +0100
Subject: [PATCH 1524/1890] x86/speculation: Add command line control for
 indirect branch speculation

Add command line control for user space indirect branch speculation
mitigations. The new option is: spectre_v2_user=

The initial options are:

    -  on:   Unconditionally enabled
    - off:   Unconditionally disabled
    -auto:   Kernel selects mitigation (default off for now)

When the spectre_v2= command line argument is either 'on' or 'off' this
implies that the application to application control follows that state even
if a contradicting spectre_v2_user= argument is supplied.

Originally-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Casey Schaufler <casey.schaufler@intel.com>
Cc: Asit Mallick <asit.k.mallick@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Jon Masters <jcm@redhat.com>
Cc: Waiman Long <longman9394@gmail.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Dave Stewart <david.c.stewart@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20181125185005.082720373@linutronix.de
---
 .../admin-guide/kernel-parameters.txt         |  32 ++++-
 arch/x86/include/asm/nospec-branch.h          |  10 ++
 arch/x86/kernel/cpu/bugs.c                    | 133 +++++++++++++++---
 3 files changed, 156 insertions(+), 19 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 19f4423e70d91..b6e5b33b9d751 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4194,9 +4194,13 @@
 
 	spectre_v2=	[X86] Control mitigation of Spectre variant 2
 			(indirect branch speculation) vulnerability.
+			The default operation protects the kernel from
+			user space attacks.
 
-			on   - unconditionally enable
-			off  - unconditionally disable
+			on   - unconditionally enable, implies
+			       spectre_v2_user=on
+			off  - unconditionally disable, implies
+			       spectre_v2_user=off
 			auto - kernel detects whether your CPU model is
 			       vulnerable
 
@@ -4206,6 +4210,12 @@
 			CONFIG_RETPOLINE configuration option, and the
 			compiler with which the kernel was built.
 
+			Selecting 'on' will also enable the mitigation
+			against user space to user space task attacks.
+
+			Selecting 'off' will disable both the kernel and
+			the user space protections.
+
 			Specific mitigations can also be selected manually:
 
 			retpoline	  - replace indirect branches
@@ -4215,6 +4225,24 @@
 			Not specifying this option is equivalent to
 			spectre_v2=auto.
 
+	spectre_v2_user=
+			[X86] Control mitigation of Spectre variant 2
+		        (indirect branch speculation) vulnerability between
+		        user space tasks
+
+			on	- Unconditionally enable mitigations. Is
+				  enforced by spectre_v2=on
+
+			off     - Unconditionally disable mitigations. Is
+				  enforced by spectre_v2=off
+
+			auto    - Kernel selects the mitigation depending on
+				  the available CPU features and vulnerability.
+				  Default is off.
+
+			Not specifying this option is equivalent to
+			spectre_v2_user=auto.
+
 	spec_store_bypass_disable=
 			[HW] Control Speculative Store Bypass (SSB) Disable mitigation
 			(Speculative Store Bypass vulnerability)
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index c202a64edd957..be0b0aa780e20 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -3,6 +3,8 @@
 #ifndef _ASM_X86_NOSPEC_BRANCH_H_
 #define _ASM_X86_NOSPEC_BRANCH_H_
 
+#include <linux/static_key.h>
+
 #include <asm/alternative.h>
 #include <asm/alternative-asm.h>
 #include <asm/cpufeatures.h>
@@ -226,6 +228,12 @@ enum spectre_v2_mitigation {
 	SPECTRE_V2_IBRS_ENHANCED,
 };
 
+/* The indirect branch speculation control variants */
+enum spectre_v2_user_mitigation {
+	SPECTRE_V2_USER_NONE,
+	SPECTRE_V2_USER_STRICT,
+};
+
 /* The Speculative Store Bypass disable variants */
 enum ssb_mitigation {
 	SPEC_STORE_BYPASS_NONE,
@@ -303,6 +311,8 @@ do {									\
 	preempt_enable();						\
 } while (0)
 
+DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp);
+
 #endif /* __ASSEMBLY__ */
 
 /*
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 4f5a6319dca6d..3a223cce1fac1 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -54,6 +54,9 @@ static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS;
 u64 __ro_after_init x86_amd_ls_cfg_base;
 u64 __ro_after_init x86_amd_ls_cfg_ssbd_mask;
 
+/* Control conditional STIPB in switch_to() */
+DEFINE_STATIC_KEY_FALSE(switch_to_cond_stibp);
+
 void __init check_bugs(void)
 {
 	identify_boot_cpu();
@@ -199,6 +202,9 @@ static void x86_amd_ssb_disable(void)
 static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
 	SPECTRE_V2_NONE;
 
+static enum spectre_v2_user_mitigation spectre_v2_user __ro_after_init =
+	SPECTRE_V2_USER_NONE;
+
 #ifdef RETPOLINE
 static bool spectre_v2_bad_module;
 
@@ -237,6 +243,104 @@ enum spectre_v2_mitigation_cmd {
 	SPECTRE_V2_CMD_RETPOLINE_AMD,
 };
 
+enum spectre_v2_user_cmd {
+	SPECTRE_V2_USER_CMD_NONE,
+	SPECTRE_V2_USER_CMD_AUTO,
+	SPECTRE_V2_USER_CMD_FORCE,
+};
+
+static const char * const spectre_v2_user_strings[] = {
+	[SPECTRE_V2_USER_NONE]		= "User space: Vulnerable",
+	[SPECTRE_V2_USER_STRICT]	= "User space: Mitigation: STIBP protection",
+};
+
+static const struct {
+	const char			*option;
+	enum spectre_v2_user_cmd	cmd;
+	bool				secure;
+} v2_user_options[] __initdata = {
+	{ "auto",	SPECTRE_V2_USER_CMD_AUTO,	false },
+	{ "off",	SPECTRE_V2_USER_CMD_NONE,	false },
+	{ "on",		SPECTRE_V2_USER_CMD_FORCE,	true  },
+};
+
+static void __init spec_v2_user_print_cond(const char *reason, bool secure)
+{
+	if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2) != secure)
+		pr_info("spectre_v2_user=%s forced on command line.\n", reason);
+}
+
+static enum spectre_v2_user_cmd __init
+spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd)
+{
+	char arg[20];
+	int ret, i;
+
+	switch (v2_cmd) {
+	case SPECTRE_V2_CMD_NONE:
+		return SPECTRE_V2_USER_CMD_NONE;
+	case SPECTRE_V2_CMD_FORCE:
+		return SPECTRE_V2_USER_CMD_FORCE;
+	default:
+		break;
+	}
+
+	ret = cmdline_find_option(boot_command_line, "spectre_v2_user",
+				  arg, sizeof(arg));
+	if (ret < 0)
+		return SPECTRE_V2_USER_CMD_AUTO;
+
+	for (i = 0; i < ARRAY_SIZE(v2_user_options); i++) {
+		if (match_option(arg, ret, v2_user_options[i].option)) {
+			spec_v2_user_print_cond(v2_user_options[i].option,
+						v2_user_options[i].secure);
+			return v2_user_options[i].cmd;
+		}
+	}
+
+	pr_err("Unknown user space protection option (%s). Switching to AUTO select\n", arg);
+	return SPECTRE_V2_USER_CMD_AUTO;
+}
+
+static void __init
+spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
+{
+	enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE;
+	bool smt_possible = IS_ENABLED(CONFIG_SMP);
+
+	if (!boot_cpu_has(X86_FEATURE_IBPB) && !boot_cpu_has(X86_FEATURE_STIBP))
+		return;
+
+	if (cpu_smt_control == CPU_SMT_FORCE_DISABLED ||
+	    cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
+		smt_possible = false;
+
+	switch (spectre_v2_parse_user_cmdline(v2_cmd)) {
+	case SPECTRE_V2_USER_CMD_AUTO:
+	case SPECTRE_V2_USER_CMD_NONE:
+		goto set_mode;
+	case SPECTRE_V2_USER_CMD_FORCE:
+		mode = SPECTRE_V2_USER_STRICT;
+		break;
+	}
+
+	/* Initialize Indirect Branch Prediction Barrier */
+	if (boot_cpu_has(X86_FEATURE_IBPB)) {
+		setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
+		pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n");
+	}
+
+	/* If enhanced IBRS is enabled no STIPB required */
+	if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
+		return;
+
+set_mode:
+	spectre_v2_user = mode;
+	/* Only print the STIBP mode when SMT possible */
+	if (smt_possible)
+		pr_info("%s\n", spectre_v2_user_strings[mode]);
+}
+
 static const char * const spectre_v2_strings[] = {
 	[SPECTRE_V2_NONE]			= "Vulnerable",
 	[SPECTRE_V2_RETPOLINE_GENERIC]		= "Mitigation: Full generic retpoline",
@@ -385,12 +489,6 @@ static void __init spectre_v2_select_mitigation(void)
 	setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
 	pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n");
 
-	/* Initialize Indirect Branch Prediction Barrier if supported */
-	if (boot_cpu_has(X86_FEATURE_IBPB)) {
-		setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
-		pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n");
-	}
-
 	/*
 	 * Retpoline means the kernel is safe because it has no indirect
 	 * branches. Enhanced IBRS protects firmware too, so, enable restricted
@@ -407,23 +505,21 @@ static void __init spectre_v2_select_mitigation(void)
 		pr_info("Enabling Restricted Speculation for firmware calls\n");
 	}
 
+	/* Set up IBPB and STIBP depending on the general spectre V2 command */
+	spectre_v2_user_select_mitigation(cmd);
+
 	/* Enable STIBP if appropriate */
 	arch_smt_update();
 }
 
 static bool stibp_needed(void)
 {
-	if (spectre_v2_enabled == SPECTRE_V2_NONE)
-		return false;
-
 	/* Enhanced IBRS makes using STIBP unnecessary. */
 	if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
 		return false;
 
-	if (!boot_cpu_has(X86_FEATURE_STIBP))
-		return false;
-
-	return true;
+	/* Check for strict user mitigation mode */
+	return spectre_v2_user == SPECTRE_V2_USER_STRICT;
 }
 
 static void update_stibp_msr(void *info)
@@ -844,10 +940,13 @@ static char *stibp_state(void)
 	if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
 		return "";
 
-	if (x86_spec_ctrl_base & SPEC_CTRL_STIBP)
-		return ", STIBP";
-	else
-		return "";
+	switch (spectre_v2_user) {
+	case SPECTRE_V2_USER_NONE:
+		return ", STIBP: disabled";
+	case SPECTRE_V2_USER_STRICT:
+		return ", STIBP: forced";
+	}
+	return "";
 }
 
 static char *ibpb_state(void)
-- 
GitLab


From 5bfbe3ad5840d941b89bcac54b821ba14f50a0ba Mon Sep 17 00:00:00 2001
From: Tim Chen <tim.c.chen@linux.intel.com>
Date: Sun, 25 Nov 2018 19:33:46 +0100
Subject: [PATCH 1525/1890] x86/speculation: Prepare for per task indirect
 branch speculation control

To avoid the overhead of STIBP always on, it's necessary to allow per task
control of STIBP.

Add a new task flag TIF_SPEC_IB and evaluate it during context switch if
SMT is active and flag evaluation is enabled by the speculation control
code. Add the conditional evaluation to x86_virt_spec_ctrl() as well so the
guest/host switch works properly.

This has no effect because TIF_SPEC_IB cannot be set yet and the static key
which controls evaluation is off. Preparatory patch for adding the control
code.

[ tglx: Simplify the context switch logic and make the TIF evaluation
  	depend on SMP=y and on the static key controlling the conditional
  	update. Rename it to TIF_SPEC_IB because it controls both STIBP and
  	IBPB ]

Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Casey Schaufler <casey.schaufler@intel.com>
Cc: Asit Mallick <asit.k.mallick@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Jon Masters <jcm@redhat.com>
Cc: Waiman Long <longman9394@gmail.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Dave Stewart <david.c.stewart@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20181125185005.176917199@linutronix.de
---
 arch/x86/include/asm/msr-index.h   |  5 +++--
 arch/x86/include/asm/spec-ctrl.h   | 12 ++++++++++++
 arch/x86/include/asm/thread_info.h |  5 ++++-
 arch/x86/kernel/cpu/bugs.c         |  4 ++++
 arch/x86/kernel/process.c          | 20 ++++++++++++++++++--
 5 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 80f4a4f38c79c..c8f73efb4eceb 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -41,9 +41,10 @@
 
 #define MSR_IA32_SPEC_CTRL		0x00000048 /* Speculation Control */
 #define SPEC_CTRL_IBRS			(1 << 0)   /* Indirect Branch Restricted Speculation */
-#define SPEC_CTRL_STIBP			(1 << 1)   /* Single Thread Indirect Branch Predictors */
+#define SPEC_CTRL_STIBP_SHIFT		1	   /* Single Thread Indirect Branch Predictor (STIBP) bit */
+#define SPEC_CTRL_STIBP			(1 << SPEC_CTRL_STIBP_SHIFT)	/* STIBP mask */
 #define SPEC_CTRL_SSBD_SHIFT		2	   /* Speculative Store Bypass Disable bit */
-#define SPEC_CTRL_SSBD			(1 << SPEC_CTRL_SSBD_SHIFT)   /* Speculative Store Bypass Disable */
+#define SPEC_CTRL_SSBD			(1 << SPEC_CTRL_SSBD_SHIFT)	/* Speculative Store Bypass Disable */
 
 #define MSR_IA32_PRED_CMD		0x00000049 /* Prediction Command */
 #define PRED_CMD_IBPB			(1 << 0)   /* Indirect Branch Prediction Barrier */
diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h
index 8e2f8411c7a76..27b0bce3933ba 100644
--- a/arch/x86/include/asm/spec-ctrl.h
+++ b/arch/x86/include/asm/spec-ctrl.h
@@ -53,12 +53,24 @@ static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn)
 	return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT);
 }
 
+static inline u64 stibp_tif_to_spec_ctrl(u64 tifn)
+{
+	BUILD_BUG_ON(TIF_SPEC_IB < SPEC_CTRL_STIBP_SHIFT);
+	return (tifn & _TIF_SPEC_IB) >> (TIF_SPEC_IB - SPEC_CTRL_STIBP_SHIFT);
+}
+
 static inline unsigned long ssbd_spec_ctrl_to_tif(u64 spec_ctrl)
 {
 	BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT);
 	return (spec_ctrl & SPEC_CTRL_SSBD) << (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT);
 }
 
+static inline unsigned long stibp_spec_ctrl_to_tif(u64 spec_ctrl)
+{
+	BUILD_BUG_ON(TIF_SPEC_IB < SPEC_CTRL_STIBP_SHIFT);
+	return (spec_ctrl & SPEC_CTRL_STIBP) << (TIF_SPEC_IB - SPEC_CTRL_STIBP_SHIFT);
+}
+
 static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn)
 {
 	return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL;
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 523c69efc38a8..fa583ec99e3e9 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -83,6 +83,7 @@ struct thread_info {
 #define TIF_SYSCALL_EMU		6	/* syscall emulation active */
 #define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
 #define TIF_SECCOMP		8	/* secure computing */
+#define TIF_SPEC_IB		9	/* Indirect branch speculation mitigation */
 #define TIF_USER_RETURN_NOTIFY	11	/* notify kernel of userspace return */
 #define TIF_UPROBE		12	/* breakpointed or singlestepping */
 #define TIF_PATCH_PENDING	13	/* pending live patching update */
@@ -110,6 +111,7 @@ struct thread_info {
 #define _TIF_SYSCALL_EMU	(1 << TIF_SYSCALL_EMU)
 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
 #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
+#define _TIF_SPEC_IB		(1 << TIF_SPEC_IB)
 #define _TIF_USER_RETURN_NOTIFY	(1 << TIF_USER_RETURN_NOTIFY)
 #define _TIF_UPROBE		(1 << TIF_UPROBE)
 #define _TIF_PATCH_PENDING	(1 << TIF_PATCH_PENDING)
@@ -146,7 +148,8 @@ struct thread_info {
 
 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW							\
-	(_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_SSBD)
+	(_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP|		\
+	 _TIF_SSBD|_TIF_SPEC_IB)
 
 #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 3a223cce1fac1..1e13dbfc09197 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -148,6 +148,10 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
 		    static_cpu_has(X86_FEATURE_AMD_SSBD))
 			hostval |= ssbd_tif_to_spec_ctrl(ti->flags);
 
+		/* Conditional STIBP enabled? */
+		if (static_branch_unlikely(&switch_to_cond_stibp))
+			hostval |= stibp_tif_to_spec_ctrl(ti->flags);
+
 		if (hostval != guestval) {
 			msrval = setguest ? guestval : hostval;
 			wrmsrl(MSR_IA32_SPEC_CTRL, msrval);
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 70e9832379e13..574b144d2b536 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -404,11 +404,17 @@ static __always_inline void amd_set_ssb_virt_state(unsigned long tifn)
 static __always_inline void __speculation_ctrl_update(unsigned long tifp,
 						      unsigned long tifn)
 {
+	unsigned long tif_diff = tifp ^ tifn;
 	u64 msr = x86_spec_ctrl_base;
 	bool updmsr = false;
 
-	/* If TIF_SSBD is different, select the proper mitigation method */
-	if ((tifp ^ tifn) & _TIF_SSBD) {
+	/*
+	 * If TIF_SSBD is different, select the proper mitigation
+	 * method. Note that if SSBD mitigation is disabled or permanentely
+	 * enabled this branch can't be taken because nothing can set
+	 * TIF_SSBD.
+	 */
+	if (tif_diff & _TIF_SSBD) {
 		if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) {
 			amd_set_ssb_virt_state(tifn);
 		} else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) {
@@ -420,6 +426,16 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp,
 		}
 	}
 
+	/*
+	 * Only evaluate TIF_SPEC_IB if conditional STIBP is enabled,
+	 * otherwise avoid the MSR write.
+	 */
+	if (IS_ENABLED(CONFIG_SMP) &&
+	    static_branch_unlikely(&switch_to_cond_stibp)) {
+		updmsr |= !!(tif_diff & _TIF_SPEC_IB);
+		msr |= stibp_tif_to_spec_ctrl(tifn);
+	}
+
 	if (updmsr)
 		wrmsrl(MSR_IA32_SPEC_CTRL, msr);
 }
-- 
GitLab


From ff16701a29cba3aafa0bd1656d766813b2d0a811 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 25 Nov 2018 19:33:47 +0100
Subject: [PATCH 1526/1890] x86/process: Consolidate and simplify
 switch_to_xtra() code

Move the conditional invocation of __switch_to_xtra() into an inline
function so the logic can be shared between 32 and 64 bit.

Remove the handthrough of the TSS pointer and retrieve the pointer directly
in the bitmap handling function. Use this_cpu_ptr() instead of the
per_cpu() indirection.

This is a preparatory change so integration of conditional indirect branch
speculation optimization happens only in one place.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Casey Schaufler <casey.schaufler@intel.com>
Cc: Asit Mallick <asit.k.mallick@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Jon Masters <jcm@redhat.com>
Cc: Waiman Long <longman9394@gmail.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Dave Stewart <david.c.stewart@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20181125185005.280855518@linutronix.de
---
 arch/x86/include/asm/switch_to.h |  3 ---
 arch/x86/kernel/process.c        | 12 +++++++-----
 arch/x86/kernel/process.h        | 24 ++++++++++++++++++++++++
 arch/x86/kernel/process_32.c     | 10 +++-------
 arch/x86/kernel/process_64.c     | 10 +++-------
 5 files changed, 37 insertions(+), 22 deletions(-)
 create mode 100644 arch/x86/kernel/process.h

diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 36bd243843d6d..7cf1a270d8910 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -11,9 +11,6 @@ struct task_struct *__switch_to_asm(struct task_struct *prev,
 
 __visible struct task_struct *__switch_to(struct task_struct *prev,
 					  struct task_struct *next);
-struct tss_struct;
-void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
-		      struct tss_struct *tss);
 
 /* This runs runs on the previous thread's stack. */
 static inline void prepare_switch_to(struct task_struct *next)
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 574b144d2b536..cdf8e6694f711 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -40,6 +40,8 @@
 #include <asm/prctl.h>
 #include <asm/spec-ctrl.h>
 
+#include "process.h"
+
 /*
  * per-CPU TSS segments. Threads are completely 'soft' on Linux,
  * no more per-task TSS's. The TSS size is kept cacheline-aligned
@@ -252,11 +254,12 @@ void arch_setup_new_exec(void)
 		enable_cpuid();
 }
 
-static inline void switch_to_bitmap(struct tss_struct *tss,
-				    struct thread_struct *prev,
+static inline void switch_to_bitmap(struct thread_struct *prev,
 				    struct thread_struct *next,
 				    unsigned long tifp, unsigned long tifn)
 {
+	struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
+
 	if (tifn & _TIF_IO_BITMAP) {
 		/*
 		 * Copy the relevant range of the IO bitmap.
@@ -448,8 +451,7 @@ void speculation_ctrl_update(unsigned long tif)
 	preempt_enable();
 }
 
-void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
-		      struct tss_struct *tss)
+void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p)
 {
 	struct thread_struct *prev, *next;
 	unsigned long tifp, tifn;
@@ -459,7 +461,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
 
 	tifn = READ_ONCE(task_thread_info(next_p)->flags);
 	tifp = READ_ONCE(task_thread_info(prev_p)->flags);
-	switch_to_bitmap(tss, prev, next, tifp, tifn);
+	switch_to_bitmap(prev, next, tifp, tifn);
 
 	propagate_user_return_notify(prev_p, next_p);
 
diff --git a/arch/x86/kernel/process.h b/arch/x86/kernel/process.h
new file mode 100644
index 0000000000000..020fbfac3a27a
--- /dev/null
+++ b/arch/x86/kernel/process.h
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Code shared between 32 and 64 bit
+
+void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p);
+
+/*
+ * This needs to be inline to optimize for the common case where no extra
+ * work needs to be done.
+ */
+static inline void switch_to_extra(struct task_struct *prev,
+				   struct task_struct *next)
+{
+	unsigned long next_tif = task_thread_info(next)->flags;
+	unsigned long prev_tif = task_thread_info(prev)->flags;
+
+	/*
+	 * __switch_to_xtra() handles debug registers, i/o bitmaps,
+	 * speculation mitigations etc.
+	 */
+	if (unlikely(next_tif & _TIF_WORK_CTXSW_NEXT ||
+		     prev_tif & _TIF_WORK_CTXSW_PREV))
+		__switch_to_xtra(prev, next);
+}
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 5046a3c9dec2f..d3e593eb189f0 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -59,6 +59,8 @@
 #include <asm/intel_rdt_sched.h>
 #include <asm/proto.h>
 
+#include "process.h"
+
 void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
 {
 	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
@@ -232,7 +234,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	struct fpu *prev_fpu = &prev->fpu;
 	struct fpu *next_fpu = &next->fpu;
 	int cpu = smp_processor_id();
-	struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
 
 	/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
 
@@ -264,12 +265,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl))
 		set_iopl_mask(next->iopl);
 
-	/*
-	 * Now maybe handle debug registers and/or IO bitmaps
-	 */
-	if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV ||
-		     task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
-		__switch_to_xtra(prev_p, next_p, tss);
+	switch_to_extra(prev_p, next_p);
 
 	/*
 	 * Leave lazy mode, flushing any hypercalls made here.
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 0e0b4288a4b2b..bbfbf017065c3 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -60,6 +60,8 @@
 #include <asm/unistd_32_ia32.h>
 #endif
 
+#include "process.h"
+
 /* Prints also some state that isn't saved in the pt_regs */
 void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
 {
@@ -553,7 +555,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	struct fpu *prev_fpu = &prev->fpu;
 	struct fpu *next_fpu = &next->fpu;
 	int cpu = smp_processor_id();
-	struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
 
 	WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
 		     this_cpu_read(irq_count) != -1);
@@ -617,12 +618,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	/* Reload sp0. */
 	update_task_stack(next_p);
 
-	/*
-	 * Now maybe reload the debug registers and handle I/O bitmaps
-	 */
-	if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT ||
-		     task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
-		__switch_to_xtra(prev_p, next_p, tss);
+	switch_to_extra(prev_p, next_p);
 
 #ifdef CONFIG_XEN_PV
 	/*
-- 
GitLab


From 5635d99953f04b550738f6f4c1c532667c3fd872 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 25 Nov 2018 19:33:48 +0100
Subject: [PATCH 1527/1890] x86/speculation: Avoid __switch_to_xtra() calls

The TIF_SPEC_IB bit does not need to be evaluated in the decision to invoke
__switch_to_xtra() when:

 - CONFIG_SMP is disabled

 - The conditional STIPB mode is disabled

The TIF_SPEC_IB bit still controls IBPB in both cases so the TIF work mask
checks might invoke __switch_to_xtra() for nothing if TIF_SPEC_IB is the
only set bit in the work masks.

Optimize it out by masking the bit at compile time for CONFIG_SMP=n and at
run time when the static key controlling the conditional STIBP mode is
disabled.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Casey Schaufler <casey.schaufler@intel.com>
Cc: Asit Mallick <asit.k.mallick@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Jon Masters <jcm@redhat.com>
Cc: Waiman Long <longman9394@gmail.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Dave Stewart <david.c.stewart@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20181125185005.374062201@linutronix.de
---
 arch/x86/include/asm/thread_info.h | 13 +++++++++++--
 arch/x86/kernel/process.h          | 15 +++++++++++++++
 2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index fa583ec99e3e9..6d201699c6511 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -147,9 +147,18 @@ struct thread_info {
 	 _TIF_FSCHECK)
 
 /* flags to check in __switch_to() */
-#define _TIF_WORK_CTXSW							\
+#define _TIF_WORK_CTXSW_BASE						\
 	(_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP|		\
-	 _TIF_SSBD|_TIF_SPEC_IB)
+	 _TIF_SSBD)
+
+/*
+ * Avoid calls to __switch_to_xtra() on UP as STIBP is not evaluated.
+ */
+#ifdef CONFIG_SMP
+# define _TIF_WORK_CTXSW	(_TIF_WORK_CTXSW_BASE | _TIF_SPEC_IB)
+#else
+# define _TIF_WORK_CTXSW	(_TIF_WORK_CTXSW_BASE)
+#endif
 
 #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
diff --git a/arch/x86/kernel/process.h b/arch/x86/kernel/process.h
index 020fbfac3a27a..898e97cf6629d 100644
--- a/arch/x86/kernel/process.h
+++ b/arch/x86/kernel/process.h
@@ -2,6 +2,8 @@
 //
 // Code shared between 32 and 64 bit
 
+#include <asm/spec-ctrl.h>
+
 void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p);
 
 /*
@@ -14,6 +16,19 @@ static inline void switch_to_extra(struct task_struct *prev,
 	unsigned long next_tif = task_thread_info(next)->flags;
 	unsigned long prev_tif = task_thread_info(prev)->flags;
 
+	if (IS_ENABLED(CONFIG_SMP)) {
+		/*
+		 * Avoid __switch_to_xtra() invocation when conditional
+		 * STIPB is disabled and the only different bit is
+		 * TIF_SPEC_IB. For CONFIG_SMP=n TIF_SPEC_IB is not
+		 * in the TIF_WORK_CTXSW masks.
+		 */
+		if (!static_branch_likely(&switch_to_cond_stibp)) {
+			prev_tif &= ~_TIF_SPEC_IB;
+			next_tif &= ~_TIF_SPEC_IB;
+		}
+	}
+
 	/*
 	 * __switch_to_xtra() handles debug registers, i/o bitmaps,
 	 * speculation mitigations etc.
-- 
GitLab


From 4c71a2b6fd7e42814aa68a6dec88abf3b42ea573 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 25 Nov 2018 19:33:49 +0100
Subject: [PATCH 1528/1890] x86/speculation: Prepare for conditional IBPB in
 switch_mm()

The IBPB speculation barrier is issued from switch_mm() when the kernel
switches to a user space task with a different mm than the user space task
which ran last on the same CPU.

An additional optimization is to avoid IBPB when the incoming task can be
ptraced by the outgoing task. This optimization only works when switching
directly between two user space tasks. When switching from a kernel task to
a user space task the optimization fails because the previous task cannot
be accessed anymore. So for quite some scenarios the optimization is just
adding overhead.

The upcoming conditional IBPB support will issue IBPB only for user space
tasks which have the TIF_SPEC_IB bit set. This requires to handle the
following cases:

  1) Switch from a user space task (potential attacker) which has
     TIF_SPEC_IB set to a user space task (potential victim) which has
     TIF_SPEC_IB not set.

  2) Switch from a user space task (potential attacker) which has
     TIF_SPEC_IB not set to a user space task (potential victim) which has
     TIF_SPEC_IB set.

This needs to be optimized for the case where the IBPB can be avoided when
only kernel threads ran in between user space tasks which belong to the
same process.

The current check whether two tasks belong to the same context is using the
tasks context id. While correct, it's simpler to use the mm pointer because
it allows to mangle the TIF_SPEC_IB bit into it. The context id based
mechanism requires extra storage, which creates worse code.

When a task is scheduled out its TIF_SPEC_IB bit is mangled as bit 0 into
the per CPU storage which is used to track the last user space mm which was
running on a CPU. This bit can be used together with the TIF_SPEC_IB bit of
the incoming task to make the decision whether IBPB needs to be issued or
not to cover the two cases above.

As conditional IBPB is going to be the default, remove the dubious ptrace
check for the IBPB always case and simply issue IBPB always when the
process changes.

Move the storage to a different place in the struct as the original one
created a hole.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Casey Schaufler <casey.schaufler@intel.com>
Cc: Asit Mallick <asit.k.mallick@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Jon Masters <jcm@redhat.com>
Cc: Waiman Long <longman9394@gmail.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Dave Stewart <david.c.stewart@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20181125185005.466447057@linutronix.de
---
 arch/x86/include/asm/nospec-branch.h |   2 +
 arch/x86/include/asm/tlbflush.h      |   8 +-
 arch/x86/kernel/cpu/bugs.c           |  29 +++++--
 arch/x86/mm/tlb.c                    | 115 ++++++++++++++++++++-------
 4 files changed, 118 insertions(+), 36 deletions(-)

diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index be0b0aa780e20..d4d35baf04308 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -312,6 +312,8 @@ do {									\
 } while (0)
 
 DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp);
+DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
+DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
 
 #endif /* __ASSEMBLY__ */
 
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index d760611cfc351..f4204bf377fcf 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -169,10 +169,14 @@ struct tlb_state {
 
 #define LOADED_MM_SWITCHING ((struct mm_struct *)1)
 
+	/* Last user mm for optimizing IBPB */
+	union {
+		struct mm_struct	*last_user_mm;
+		unsigned long		last_user_mm_ibpb;
+	};
+
 	u16 loaded_mm_asid;
 	u16 next_asid;
-	/* last user mm's ctx id */
-	u64 last_ctx_id;
 
 	/*
 	 * We can be in one of several states:
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 1e13dbfc09197..7c946a9af947e 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -56,6 +56,10 @@ u64 __ro_after_init x86_amd_ls_cfg_ssbd_mask;
 
 /* Control conditional STIPB in switch_to() */
 DEFINE_STATIC_KEY_FALSE(switch_to_cond_stibp);
+/* Control conditional IBPB in switch_mm() */
+DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
+/* Control unconditional IBPB in switch_mm() */
+DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
 
 void __init check_bugs(void)
 {
@@ -331,7 +335,17 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
 	/* Initialize Indirect Branch Prediction Barrier */
 	if (boot_cpu_has(X86_FEATURE_IBPB)) {
 		setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
-		pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n");
+
+		switch (mode) {
+		case SPECTRE_V2_USER_STRICT:
+			static_branch_enable(&switch_mm_always_ibpb);
+			break;
+		default:
+			break;
+		}
+
+		pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n",
+			mode == SPECTRE_V2_USER_STRICT ? "always-on" : "conditional");
 	}
 
 	/* If enhanced IBRS is enabled no STIPB required */
@@ -955,10 +969,15 @@ static char *stibp_state(void)
 
 static char *ibpb_state(void)
 {
-	if (boot_cpu_has(X86_FEATURE_USE_IBPB))
-		return ", IBPB";
-	else
-		return "";
+	if (boot_cpu_has(X86_FEATURE_IBPB)) {
+		switch (spectre_v2_user) {
+		case SPECTRE_V2_USER_NONE:
+			return ", IBPB: disabled";
+		case SPECTRE_V2_USER_STRICT:
+			return ", IBPB: always-on";
+		}
+	}
+	return "";
 }
 
 static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index bddd6b3cee1de..03b6b4c2238da 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -7,7 +7,6 @@
 #include <linux/export.h>
 #include <linux/cpu.h>
 #include <linux/debugfs.h>
-#include <linux/ptrace.h>
 
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
@@ -30,6 +29,12 @@
  *	Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
  */
 
+/*
+ * Use bit 0 to mangle the TIF_SPEC_IB state into the mm pointer which is
+ * stored in cpu_tlb_state.last_user_mm_ibpb.
+ */
+#define LAST_USER_MM_IBPB	0x1UL
+
 /*
  * We get here when we do something requiring a TLB invalidation
  * but could not go invalidate all of the contexts.  We do the
@@ -181,17 +186,87 @@ static void sync_current_stack_to_mm(struct mm_struct *mm)
 	}
 }
 
-static bool ibpb_needed(struct task_struct *tsk, u64 last_ctx_id)
+static inline unsigned long mm_mangle_tif_spec_ib(struct task_struct *next)
+{
+	unsigned long next_tif = task_thread_info(next)->flags;
+	unsigned long ibpb = (next_tif >> TIF_SPEC_IB) & LAST_USER_MM_IBPB;
+
+	return (unsigned long)next->mm | ibpb;
+}
+
+static void cond_ibpb(struct task_struct *next)
 {
+	if (!next || !next->mm)
+		return;
+
 	/*
-	 * Check if the current (previous) task has access to the memory
-	 * of the @tsk (next) task. If access is denied, make sure to
-	 * issue a IBPB to stop user->user Spectre-v2 attacks.
-	 *
-	 * Note: __ptrace_may_access() returns 0 or -ERRNO.
+	 * Both, the conditional and the always IBPB mode use the mm
+	 * pointer to avoid the IBPB when switching between tasks of the
+	 * same process. Using the mm pointer instead of mm->context.ctx_id
+	 * opens a hypothetical hole vs. mm_struct reuse, which is more or
+	 * less impossible to control by an attacker. Aside of that it
+	 * would only affect the first schedule so the theoretically
+	 * exposed data is not really interesting.
 	 */
-	return (tsk && tsk->mm && tsk->mm->context.ctx_id != last_ctx_id &&
-		ptrace_may_access_sched(tsk, PTRACE_MODE_SPEC_IBPB));
+	if (static_branch_likely(&switch_mm_cond_ibpb)) {
+		unsigned long prev_mm, next_mm;
+
+		/*
+		 * This is a bit more complex than the always mode because
+		 * it has to handle two cases:
+		 *
+		 * 1) Switch from a user space task (potential attacker)
+		 *    which has TIF_SPEC_IB set to a user space task
+		 *    (potential victim) which has TIF_SPEC_IB not set.
+		 *
+		 * 2) Switch from a user space task (potential attacker)
+		 *    which has TIF_SPEC_IB not set to a user space task
+		 *    (potential victim) which has TIF_SPEC_IB set.
+		 *
+		 * This could be done by unconditionally issuing IBPB when
+		 * a task which has TIF_SPEC_IB set is either scheduled in
+		 * or out. Though that results in two flushes when:
+		 *
+		 * - the same user space task is scheduled out and later
+		 *   scheduled in again and only a kernel thread ran in
+		 *   between.
+		 *
+		 * - a user space task belonging to the same process is
+		 *   scheduled in after a kernel thread ran in between
+		 *
+		 * - a user space task belonging to the same process is
+		 *   scheduled in immediately.
+		 *
+		 * Optimize this with reasonably small overhead for the
+		 * above cases. Mangle the TIF_SPEC_IB bit into the mm
+		 * pointer of the incoming task which is stored in
+		 * cpu_tlbstate.last_user_mm_ibpb for comparison.
+		 */
+		next_mm = mm_mangle_tif_spec_ib(next);
+		prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_ibpb);
+
+		/*
+		 * Issue IBPB only if the mm's are different and one or
+		 * both have the IBPB bit set.
+		 */
+		if (next_mm != prev_mm &&
+		    (next_mm | prev_mm) & LAST_USER_MM_IBPB)
+			indirect_branch_prediction_barrier();
+
+		this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, next_mm);
+	}
+
+	if (static_branch_unlikely(&switch_mm_always_ibpb)) {
+		/*
+		 * Only flush when switching to a user space task with a
+		 * different context than the user space task which ran
+		 * last on this CPU.
+		 */
+		if (this_cpu_read(cpu_tlbstate.last_user_mm) != next->mm) {
+			indirect_branch_prediction_barrier();
+			this_cpu_write(cpu_tlbstate.last_user_mm, next->mm);
+		}
+	}
 }
 
 void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
@@ -292,22 +367,12 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 		new_asid = prev_asid;
 		need_flush = true;
 	} else {
-		u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id);
-
 		/*
 		 * Avoid user/user BTB poisoning by flushing the branch
 		 * predictor when switching between processes. This stops
 		 * one process from doing Spectre-v2 attacks on another.
-		 *
-		 * As an optimization, flush indirect branches only when
-		 * switching into a processes that can't be ptrace by the
-		 * current one (as in such case, attacker has much more
-		 * convenient way how to tamper with the next process than
-		 * branch buffer poisoning).
 		 */
-		if (static_cpu_has(X86_FEATURE_USE_IBPB) &&
-				ibpb_needed(tsk, last_ctx_id))
-			indirect_branch_prediction_barrier();
+		cond_ibpb(tsk);
 
 		if (IS_ENABLED(CONFIG_VMAP_STACK)) {
 			/*
@@ -365,14 +430,6 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 		trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
 	}
 
-	/*
-	 * Record last user mm's context id, so we can avoid
-	 * flushing branch buffer with IBPB if we switch back
-	 * to the same user.
-	 */
-	if (next != &init_mm)
-		this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id);
-
 	/* Make sure we write CR3 before loaded_mm. */
 	barrier();
 
@@ -441,7 +498,7 @@ void initialize_tlbstate_and_flush(void)
 	write_cr3(build_cr3(mm->pgd, 0));
 
 	/* Reinitialize tlbstate. */
-	this_cpu_write(cpu_tlbstate.last_ctx_id, mm->context.ctx_id);
+	this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, LAST_USER_MM_IBPB);
 	this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
 	this_cpu_write(cpu_tlbstate.next_asid, 1);
 	this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
-- 
GitLab


From 46f7ecb1e7359f183f5bbd1e08b90e10e52164f9 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 25 Nov 2018 19:33:50 +0100
Subject: [PATCH 1529/1890] ptrace: Remove unused ptrace_may_access_sched() and
 MODE_IBRS

The IBPB control code in x86 removed the usage. Remove the functionality
which was introduced for this.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Casey Schaufler <casey.schaufler@intel.com>
Cc: Asit Mallick <asit.k.mallick@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Jon Masters <jcm@redhat.com>
Cc: Waiman Long <longman9394@gmail.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Dave Stewart <david.c.stewart@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20181125185005.559149393@linutronix.de
---
 include/linux/ptrace.h | 17 -----------------
 kernel/ptrace.c        | 10 ----------
 2 files changed, 27 deletions(-)

diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 6c2ffed907f5f..de20ede2c5c81 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -64,15 +64,12 @@ extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead);
 #define PTRACE_MODE_NOAUDIT	0x04
 #define PTRACE_MODE_FSCREDS	0x08
 #define PTRACE_MODE_REALCREDS	0x10
-#define PTRACE_MODE_SCHED	0x20
-#define PTRACE_MODE_IBPB	0x40
 
 /* shorthands for READ/ATTACH and FSCREDS/REALCREDS combinations */
 #define PTRACE_MODE_READ_FSCREDS (PTRACE_MODE_READ | PTRACE_MODE_FSCREDS)
 #define PTRACE_MODE_READ_REALCREDS (PTRACE_MODE_READ | PTRACE_MODE_REALCREDS)
 #define PTRACE_MODE_ATTACH_FSCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_FSCREDS)
 #define PTRACE_MODE_ATTACH_REALCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_REALCREDS)
-#define PTRACE_MODE_SPEC_IBPB (PTRACE_MODE_ATTACH_REALCREDS | PTRACE_MODE_IBPB)
 
 /**
  * ptrace_may_access - check whether the caller is permitted to access
@@ -90,20 +87,6 @@ extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead);
  */
 extern bool ptrace_may_access(struct task_struct *task, unsigned int mode);
 
-/**
- * ptrace_may_access - check whether the caller is permitted to access
- * a target task.
- * @task: target task
- * @mode: selects type of access and caller credentials
- *
- * Returns true on success, false on denial.
- *
- * Similar to ptrace_may_access(). Only to be called from context switch
- * code. Does not call into audit and the regular LSM hooks due to locking
- * constraints.
- */
-extern bool ptrace_may_access_sched(struct task_struct *task, unsigned int mode);
-
 static inline int ptrace_reparented(struct task_struct *child)
 {
 	return !same_thread_group(child->real_parent, child->parent);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 80b34dffdfb9b..c2cee9db52040 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -261,9 +261,6 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state)
 
 static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode)
 {
-	if (mode & PTRACE_MODE_SCHED)
-		return false;
-
 	if (mode & PTRACE_MODE_NOAUDIT)
 		return has_ns_capability_noaudit(current, ns, CAP_SYS_PTRACE);
 	else
@@ -331,16 +328,9 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode)
 	     !ptrace_has_cap(mm->user_ns, mode)))
 	    return -EPERM;
 
-	if (mode & PTRACE_MODE_SCHED)
-		return 0;
 	return security_ptrace_access_check(task, mode);
 }
 
-bool ptrace_may_access_sched(struct task_struct *task, unsigned int mode)
-{
-	return __ptrace_may_access(task, mode | PTRACE_MODE_SCHED);
-}
-
 bool ptrace_may_access(struct task_struct *task, unsigned int mode)
 {
 	int err;
-- 
GitLab


From e6da8bb6f9abb2628381904b24163c770e630bac Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 25 Nov 2018 19:33:51 +0100
Subject: [PATCH 1530/1890] x86/speculation: Split out TIF update

The update of the TIF_SSBD flag and the conditional speculation control MSR
update is done in the ssb_prctl_set() function directly. The upcoming prctl
support for controlling indirect branch speculation via STIBP needs the
same mechanism.

Split the code out and make it reusable. Reword the comment about updates
for other tasks.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Casey Schaufler <casey.schaufler@intel.com>
Cc: Asit Mallick <asit.k.mallick@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Jon Masters <jcm@redhat.com>
Cc: Waiman Long <longman9394@gmail.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Dave Stewart <david.c.stewart@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20181125185005.652305076@linutronix.de
---
 arch/x86/kernel/cpu/bugs.c | 35 +++++++++++++++++++++++------------
 1 file changed, 23 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 7c946a9af947e..3b65a53d2c33e 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -702,10 +702,29 @@ static void ssb_select_mitigation(void)
 #undef pr_fmt
 #define pr_fmt(fmt)     "Speculation prctl: " fmt
 
-static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
+static void task_update_spec_tif(struct task_struct *tsk, int tifbit, bool on)
 {
 	bool update;
 
+	if (on)
+		update = !test_and_set_tsk_thread_flag(tsk, tifbit);
+	else
+		update = test_and_clear_tsk_thread_flag(tsk, tifbit);
+
+	/*
+	 * Immediately update the speculation control MSRs for the current
+	 * task, but for a non-current task delay setting the CPU
+	 * mitigation until it is scheduled next.
+	 *
+	 * This can only happen for SECCOMP mitigation. For PRCTL it's
+	 * always the current task.
+	 */
+	if (tsk == current && update)
+		speculation_ctrl_update_current();
+}
+
+static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
+{
 	if (ssb_mode != SPEC_STORE_BYPASS_PRCTL &&
 	    ssb_mode != SPEC_STORE_BYPASS_SECCOMP)
 		return -ENXIO;
@@ -716,28 +735,20 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
 		if (task_spec_ssb_force_disable(task))
 			return -EPERM;
 		task_clear_spec_ssb_disable(task);
-		update = test_and_clear_tsk_thread_flag(task, TIF_SSBD);
+		task_update_spec_tif(task, TIF_SSBD, false);
 		break;
 	case PR_SPEC_DISABLE:
 		task_set_spec_ssb_disable(task);
-		update = !test_and_set_tsk_thread_flag(task, TIF_SSBD);
+		task_update_spec_tif(task, TIF_SSBD, true);
 		break;
 	case PR_SPEC_FORCE_DISABLE:
 		task_set_spec_ssb_disable(task);
 		task_set_spec_ssb_force_disable(task);
-		update = !test_and_set_tsk_thread_flag(task, TIF_SSBD);
+		task_update_spec_tif(task, TIF_SSBD, true);
 		break;
 	default:
 		return -ERANGE;
 	}
-
-	/*
-	 * If being set on non-current task, delay setting the CPU
-	 * mitigation until it is next scheduled.
-	 */
-	if (task == current && update)
-		speculation_ctrl_update_current();
-
 	return 0;
 }
 
-- 
GitLab


From 6d991ba509ebcfcc908e009d1db51972a4f7a064 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 28 Nov 2018 10:56:57 +0100
Subject: [PATCH 1531/1890] x86/speculation: Prevent stale SPEC_CTRL msr
 content

The seccomp speculation control operates on all tasks of a process, but
only the current task of a process can update the MSR immediately. For the
other threads the update is deferred to the next context switch.

This creates the following situation with Process A and B:

Process A task 2 and Process B task 1 are pinned on CPU1. Process A task 2
does not have the speculation control TIF bit set. Process B task 1 has the
speculation control TIF bit set.

CPU0					CPU1
					MSR bit is set
					ProcB.T1 schedules out
					ProcA.T2 schedules in
					MSR bit is cleared
ProcA.T1
  seccomp_update()
  set TIF bit on ProcA.T2
					ProcB.T1 schedules in
					MSR is not updated  <-- FAIL

This happens because the context switch code tries to avoid the MSR update
if the speculation control TIF bits of the incoming and the outgoing task
are the same. In the worst case ProcB.T1 and ProcA.T2 are the only tasks
scheduling back and forth on CPU1, which keeps the MSR stale forever.

In theory this could be remedied by IPIs, but chasing the remote task which
could be migrated is complex and full of races.

The straight forward solution is to avoid the asychronous update of the TIF
bit and defer it to the next context switch. The speculation control state
is stored in task_struct::atomic_flags by the prctl and seccomp updates
already.

Add a new TIF_SPEC_FORCE_UPDATE bit and set this after updating the
atomic_flags. Check the bit on context switch and force a synchronous
update of the speculation control if set. Use the same mechanism for
updating the current task.

Reported-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Casey Schaufler <casey.schaufler@intel.com>
Cc: Asit Mallick <asit.k.mallick@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Jon Masters <jcm@redhat.com>
Cc: Waiman Long <longman9394@gmail.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Dave Stewart <david.c.stewart@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1811272247140.1875@nanos.tec.linutronix.de
---
 arch/x86/include/asm/spec-ctrl.h   |  6 +-----
 arch/x86/include/asm/thread_info.h |  4 +++-
 arch/x86/kernel/cpu/bugs.c         | 18 +++++++-----------
 arch/x86/kernel/process.c          | 30 +++++++++++++++++++++++++++++-
 4 files changed, 40 insertions(+), 18 deletions(-)

diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h
index 27b0bce3933ba..5393babc05989 100644
--- a/arch/x86/include/asm/spec-ctrl.h
+++ b/arch/x86/include/asm/spec-ctrl.h
@@ -83,10 +83,6 @@ static inline void speculative_store_bypass_ht_init(void) { }
 #endif
 
 extern void speculation_ctrl_update(unsigned long tif);
-
-static inline void speculation_ctrl_update_current(void)
-{
-	speculation_ctrl_update(current_thread_info()->flags);
-}
+extern void speculation_ctrl_update_current(void);
 
 #endif
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 6d201699c6511..82b73b75d67ca 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -84,6 +84,7 @@ struct thread_info {
 #define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
 #define TIF_SECCOMP		8	/* secure computing */
 #define TIF_SPEC_IB		9	/* Indirect branch speculation mitigation */
+#define TIF_SPEC_FORCE_UPDATE	10	/* Force speculation MSR update in context switch */
 #define TIF_USER_RETURN_NOTIFY	11	/* notify kernel of userspace return */
 #define TIF_UPROBE		12	/* breakpointed or singlestepping */
 #define TIF_PATCH_PENDING	13	/* pending live patching update */
@@ -112,6 +113,7 @@ struct thread_info {
 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
 #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
 #define _TIF_SPEC_IB		(1 << TIF_SPEC_IB)
+#define _TIF_SPEC_FORCE_UPDATE	(1 << TIF_SPEC_FORCE_UPDATE)
 #define _TIF_USER_RETURN_NOTIFY	(1 << TIF_USER_RETURN_NOTIFY)
 #define _TIF_UPROBE		(1 << TIF_UPROBE)
 #define _TIF_PATCH_PENDING	(1 << TIF_PATCH_PENDING)
@@ -149,7 +151,7 @@ struct thread_info {
 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW_BASE						\
 	(_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP|		\
-	 _TIF_SSBD)
+	 _TIF_SSBD | _TIF_SPEC_FORCE_UPDATE)
 
 /*
  * Avoid calls to __switch_to_xtra() on UP as STIBP is not evaluated.
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 3b65a53d2c33e..29f40a92f5a89 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -702,14 +702,10 @@ static void ssb_select_mitigation(void)
 #undef pr_fmt
 #define pr_fmt(fmt)     "Speculation prctl: " fmt
 
-static void task_update_spec_tif(struct task_struct *tsk, int tifbit, bool on)
+static void task_update_spec_tif(struct task_struct *tsk)
 {
-	bool update;
-
-	if (on)
-		update = !test_and_set_tsk_thread_flag(tsk, tifbit);
-	else
-		update = test_and_clear_tsk_thread_flag(tsk, tifbit);
+	/* Force the update of the real TIF bits */
+	set_tsk_thread_flag(tsk, TIF_SPEC_FORCE_UPDATE);
 
 	/*
 	 * Immediately update the speculation control MSRs for the current
@@ -719,7 +715,7 @@ static void task_update_spec_tif(struct task_struct *tsk, int tifbit, bool on)
 	 * This can only happen for SECCOMP mitigation. For PRCTL it's
 	 * always the current task.
 	 */
-	if (tsk == current && update)
+	if (tsk == current)
 		speculation_ctrl_update_current();
 }
 
@@ -735,16 +731,16 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
 		if (task_spec_ssb_force_disable(task))
 			return -EPERM;
 		task_clear_spec_ssb_disable(task);
-		task_update_spec_tif(task, TIF_SSBD, false);
+		task_update_spec_tif(task);
 		break;
 	case PR_SPEC_DISABLE:
 		task_set_spec_ssb_disable(task);
-		task_update_spec_tif(task, TIF_SSBD, true);
+		task_update_spec_tif(task);
 		break;
 	case PR_SPEC_FORCE_DISABLE:
 		task_set_spec_ssb_disable(task);
 		task_set_spec_ssb_force_disable(task);
-		task_update_spec_tif(task, TIF_SSBD, true);
+		task_update_spec_tif(task);
 		break;
 	default:
 		return -ERANGE;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index cdf8e6694f711..afbe2eb4a1c6d 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -443,6 +443,18 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp,
 		wrmsrl(MSR_IA32_SPEC_CTRL, msr);
 }
 
+static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk)
+{
+	if (test_and_clear_tsk_thread_flag(tsk, TIF_SPEC_FORCE_UPDATE)) {
+		if (task_spec_ssb_disable(tsk))
+			set_tsk_thread_flag(tsk, TIF_SSBD);
+		else
+			clear_tsk_thread_flag(tsk, TIF_SSBD);
+	}
+	/* Return the updated threadinfo flags*/
+	return task_thread_info(tsk)->flags;
+}
+
 void speculation_ctrl_update(unsigned long tif)
 {
 	/* Forced update. Make sure all relevant TIF flags are different */
@@ -451,6 +463,14 @@ void speculation_ctrl_update(unsigned long tif)
 	preempt_enable();
 }
 
+/* Called from seccomp/prctl update */
+void speculation_ctrl_update_current(void)
+{
+	preempt_disable();
+	speculation_ctrl_update(speculation_ctrl_update_tif(current));
+	preempt_enable();
+}
+
 void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p)
 {
 	struct thread_struct *prev, *next;
@@ -482,7 +502,15 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p)
 	if ((tifp ^ tifn) & _TIF_NOCPUID)
 		set_cpuid_faulting(!!(tifn & _TIF_NOCPUID));
 
-	__speculation_ctrl_update(tifp, tifn);
+	if (likely(!((tifp | tifn) & _TIF_SPEC_FORCE_UPDATE))) {
+		__speculation_ctrl_update(tifp, tifn);
+	} else {
+		speculation_ctrl_update_tif(prev_p);
+		tifn = speculation_ctrl_update_tif(next_p);
+
+		/* Enforce MSR update to ensure consistent state */
+		__speculation_ctrl_update(~tifn, tifn);
+	}
 }
 
 /*
-- 
GitLab


From 6893a959d7fdebbab5f5aa112c277d5a44435ba1 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 25 Nov 2018 19:33:52 +0100
Subject: [PATCH 1532/1890] x86/speculation: Prepare arch_smt_update() for
 PRCTL mode

The upcoming fine grained per task STIBP control needs to be updated on CPU
hotplug as well.

Split out the code which controls the strict mode so the prctl control code
can be added later. Mark the SMP function call argument __unused while at it.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Casey Schaufler <casey.schaufler@intel.com>
Cc: Asit Mallick <asit.k.mallick@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Jon Masters <jcm@redhat.com>
Cc: Waiman Long <longman9394@gmail.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Dave Stewart <david.c.stewart@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20181125185005.759457117@linutronix.de
---
 arch/x86/kernel/cpu/bugs.c | 46 +++++++++++++++++++++-----------------
 1 file changed, 25 insertions(+), 21 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 29f40a92f5a89..9cab538e10f1d 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -530,40 +530,44 @@ static void __init spectre_v2_select_mitigation(void)
 	arch_smt_update();
 }
 
-static bool stibp_needed(void)
+static void update_stibp_msr(void * __unused)
 {
-	/* Enhanced IBRS makes using STIBP unnecessary. */
-	if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
-		return false;
-
-	/* Check for strict user mitigation mode */
-	return spectre_v2_user == SPECTRE_V2_USER_STRICT;
+	wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
 }
 
-static void update_stibp_msr(void *info)
+/* Update x86_spec_ctrl_base in case SMT state changed. */
+static void update_stibp_strict(void)
 {
-	wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+	u64 mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP;
+
+	if (sched_smt_active())
+		mask |= SPEC_CTRL_STIBP;
+
+	if (mask == x86_spec_ctrl_base)
+		return;
+
+	pr_info("Update user space SMT mitigation: STIBP %s\n",
+		mask & SPEC_CTRL_STIBP ? "always-on" : "off");
+	x86_spec_ctrl_base = mask;
+	on_each_cpu(update_stibp_msr, NULL, 1);
 }
 
 void arch_smt_update(void)
 {
-	u64 mask;
-
-	if (!stibp_needed())
+	/* Enhanced IBRS implies STIBP. No update required. */
+	if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
 		return;
 
 	mutex_lock(&spec_ctrl_mutex);
 
-	mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP;
-	if (sched_smt_active())
-		mask |= SPEC_CTRL_STIBP;
-
-	if (mask != x86_spec_ctrl_base) {
-		pr_info("Spectre v2 cross-process SMT mitigation: %s STIBP\n",
-			mask & SPEC_CTRL_STIBP ? "Enabling" : "Disabling");
-		x86_spec_ctrl_base = mask;
-		on_each_cpu(update_stibp_msr, NULL, 1);
+	switch (spectre_v2_user) {
+	case SPECTRE_V2_USER_NONE:
+		break;
+	case SPECTRE_V2_USER_STRICT:
+		update_stibp_strict();
+		break;
 	}
+
 	mutex_unlock(&spec_ctrl_mutex);
 }
 
-- 
GitLab


From 9137bb27e60e554dab694eafa4cca241fa3a694f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 25 Nov 2018 19:33:53 +0100
Subject: [PATCH 1533/1890] x86/speculation: Add prctl() control for indirect
 branch speculation

Add the PR_SPEC_INDIRECT_BRANCH option for the PR_GET_SPECULATION_CTRL and
PR_SET_SPECULATION_CTRL prctls to allow fine grained per task control of
indirect branch speculation via STIBP and IBPB.

Invocations:
 Check indirect branch speculation status with
 - prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, 0, 0, 0);

 Enable indirect branch speculation with
 - prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_ENABLE, 0, 0);

 Disable indirect branch speculation with
 - prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_DISABLE, 0, 0);

 Force disable indirect branch speculation with
 - prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_FORCE_DISABLE, 0, 0);

See Documentation/userspace-api/spec_ctrl.rst.

Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Casey Schaufler <casey.schaufler@intel.com>
Cc: Asit Mallick <asit.k.mallick@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Jon Masters <jcm@redhat.com>
Cc: Waiman Long <longman9394@gmail.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Dave Stewart <david.c.stewart@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20181125185005.866780996@linutronix.de
---
 Documentation/userspace-api/spec_ctrl.rst |  9 +++
 arch/x86/include/asm/nospec-branch.h      |  1 +
 arch/x86/kernel/cpu/bugs.c                | 67 +++++++++++++++++++++++
 arch/x86/kernel/process.c                 |  5 ++
 include/linux/sched.h                     |  9 +++
 include/uapi/linux/prctl.h                |  1 +
 tools/include/uapi/linux/prctl.h          |  1 +
 7 files changed, 93 insertions(+)

diff --git a/Documentation/userspace-api/spec_ctrl.rst b/Documentation/userspace-api/spec_ctrl.rst
index 32f3d55c54b75..c4dbe6f7cdae8 100644
--- a/Documentation/userspace-api/spec_ctrl.rst
+++ b/Documentation/userspace-api/spec_ctrl.rst
@@ -92,3 +92,12 @@ Speculation misfeature controls
    * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_ENABLE, 0, 0);
    * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE, 0, 0);
    * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_FORCE_DISABLE, 0, 0);
+
+- PR_SPEC_INDIR_BRANCH: Indirect Branch Speculation in User Processes
+                        (Mitigate Spectre V2 style attacks against user processes)
+
+  Invocations:
+   * prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, 0, 0, 0);
+   * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_ENABLE, 0, 0);
+   * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_DISABLE, 0, 0);
+   * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_FORCE_DISABLE, 0, 0);
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index d4d35baf04308..2adbe7b047fa2 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -232,6 +232,7 @@ enum spectre_v2_mitigation {
 enum spectre_v2_user_mitigation {
 	SPECTRE_V2_USER_NONE,
 	SPECTRE_V2_USER_STRICT,
+	SPECTRE_V2_USER_PRCTL,
 };
 
 /* The Speculative Store Bypass disable variants */
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 9cab538e10f1d..74359fff87fdb 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -566,6 +566,8 @@ void arch_smt_update(void)
 	case SPECTRE_V2_USER_STRICT:
 		update_stibp_strict();
 		break;
+	case SPECTRE_V2_USER_PRCTL:
+		break;
 	}
 
 	mutex_unlock(&spec_ctrl_mutex);
@@ -752,12 +754,50 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
 	return 0;
 }
 
+static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
+{
+	switch (ctrl) {
+	case PR_SPEC_ENABLE:
+		if (spectre_v2_user == SPECTRE_V2_USER_NONE)
+			return 0;
+		/*
+		 * Indirect branch speculation is always disabled in strict
+		 * mode.
+		 */
+		if (spectre_v2_user == SPECTRE_V2_USER_STRICT)
+			return -EPERM;
+		task_clear_spec_ib_disable(task);
+		task_update_spec_tif(task);
+		break;
+	case PR_SPEC_DISABLE:
+	case PR_SPEC_FORCE_DISABLE:
+		/*
+		 * Indirect branch speculation is always allowed when
+		 * mitigation is force disabled.
+		 */
+		if (spectre_v2_user == SPECTRE_V2_USER_NONE)
+			return -EPERM;
+		if (spectre_v2_user == SPECTRE_V2_USER_STRICT)
+			return 0;
+		task_set_spec_ib_disable(task);
+		if (ctrl == PR_SPEC_FORCE_DISABLE)
+			task_set_spec_ib_force_disable(task);
+		task_update_spec_tif(task);
+		break;
+	default:
+		return -ERANGE;
+	}
+	return 0;
+}
+
 int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
 			     unsigned long ctrl)
 {
 	switch (which) {
 	case PR_SPEC_STORE_BYPASS:
 		return ssb_prctl_set(task, ctrl);
+	case PR_SPEC_INDIRECT_BRANCH:
+		return ib_prctl_set(task, ctrl);
 	default:
 		return -ENODEV;
 	}
@@ -790,11 +830,34 @@ static int ssb_prctl_get(struct task_struct *task)
 	}
 }
 
+static int ib_prctl_get(struct task_struct *task)
+{
+	if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
+		return PR_SPEC_NOT_AFFECTED;
+
+	switch (spectre_v2_user) {
+	case SPECTRE_V2_USER_NONE:
+		return PR_SPEC_ENABLE;
+	case SPECTRE_V2_USER_PRCTL:
+		if (task_spec_ib_force_disable(task))
+			return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
+		if (task_spec_ib_disable(task))
+			return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
+		return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
+	case SPECTRE_V2_USER_STRICT:
+		return PR_SPEC_DISABLE;
+	default:
+		return PR_SPEC_NOT_AFFECTED;
+	}
+}
+
 int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
 {
 	switch (which) {
 	case PR_SPEC_STORE_BYPASS:
 		return ssb_prctl_get(task);
+	case PR_SPEC_INDIRECT_BRANCH:
+		return ib_prctl_get(task);
 	default:
 		return -ENODEV;
 	}
@@ -974,6 +1037,8 @@ static char *stibp_state(void)
 		return ", STIBP: disabled";
 	case SPECTRE_V2_USER_STRICT:
 		return ", STIBP: forced";
+	case SPECTRE_V2_USER_PRCTL:
+		return "";
 	}
 	return "";
 }
@@ -986,6 +1051,8 @@ static char *ibpb_state(void)
 			return ", IBPB: disabled";
 		case SPECTRE_V2_USER_STRICT:
 			return ", IBPB: always-on";
+		case SPECTRE_V2_USER_PRCTL:
+			return "";
 		}
 	}
 	return "";
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index afbe2eb4a1c6d..7d31192296a87 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -450,6 +450,11 @@ static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk)
 			set_tsk_thread_flag(tsk, TIF_SSBD);
 		else
 			clear_tsk_thread_flag(tsk, TIF_SSBD);
+
+		if (task_spec_ib_disable(tsk))
+			set_tsk_thread_flag(tsk, TIF_SPEC_IB);
+		else
+			clear_tsk_thread_flag(tsk, TIF_SPEC_IB);
 	}
 	/* Return the updated threadinfo flags*/
 	return task_thread_info(tsk)->flags;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a51c13c2b1a03..d607db5fcc6a8 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1453,6 +1453,8 @@ static inline bool is_percpu_thread(void)
 #define PFA_SPREAD_SLAB			2	/* Spread some slab caches over cpuset */
 #define PFA_SPEC_SSB_DISABLE		3	/* Speculative Store Bypass disabled */
 #define PFA_SPEC_SSB_FORCE_DISABLE	4	/* Speculative Store Bypass force disabled*/
+#define PFA_SPEC_IB_DISABLE		5	/* Indirect branch speculation restricted */
+#define PFA_SPEC_IB_FORCE_DISABLE	6	/* Indirect branch speculation permanently restricted */
 
 #define TASK_PFA_TEST(name, func)					\
 	static inline bool task_##func(struct task_struct *p)		\
@@ -1484,6 +1486,13 @@ TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable)
 TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
 TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
 
+TASK_PFA_TEST(SPEC_IB_DISABLE, spec_ib_disable)
+TASK_PFA_SET(SPEC_IB_DISABLE, spec_ib_disable)
+TASK_PFA_CLEAR(SPEC_IB_DISABLE, spec_ib_disable)
+
+TASK_PFA_TEST(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)
+TASK_PFA_SET(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable)
+
 static inline void
 current_restore_flags(unsigned long orig_flags, unsigned long flags)
 {
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index c0d7ea0bf5b62..b17201edfa09a 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -212,6 +212,7 @@ struct prctl_mm_map {
 #define PR_SET_SPECULATION_CTRL		53
 /* Speculation control variants */
 # define PR_SPEC_STORE_BYPASS		0
+# define PR_SPEC_INDIRECT_BRANCH	1
 /* Return and control values for PR_SET/GET_SPECULATION_CTRL */
 # define PR_SPEC_NOT_AFFECTED		0
 # define PR_SPEC_PRCTL			(1UL << 0)
diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
index c0d7ea0bf5b62..b17201edfa09a 100644
--- a/tools/include/uapi/linux/prctl.h
+++ b/tools/include/uapi/linux/prctl.h
@@ -212,6 +212,7 @@ struct prctl_mm_map {
 #define PR_SET_SPECULATION_CTRL		53
 /* Speculation control variants */
 # define PR_SPEC_STORE_BYPASS		0
+# define PR_SPEC_INDIRECT_BRANCH	1
 /* Return and control values for PR_SET/GET_SPECULATION_CTRL */
 # define PR_SPEC_NOT_AFFECTED		0
 # define PR_SPEC_PRCTL			(1UL << 0)
-- 
GitLab


From 7cc765a67d8e04ef7d772425ca5a2a1e2b894c15 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 25 Nov 2018 19:33:54 +0100
Subject: [PATCH 1534/1890] x86/speculation: Enable prctl mode for
 spectre_v2_user

Now that all prerequisites are in place:

 - Add the prctl command line option

 - Default the 'auto' mode to 'prctl'

 - When SMT state changes, update the static key which controls the
   conditional STIBP evaluation on context switch.

 - At init update the static key which controls the conditional IBPB
   evaluation on context switch.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Casey Schaufler <casey.schaufler@intel.com>
Cc: Asit Mallick <asit.k.mallick@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Jon Masters <jcm@redhat.com>
Cc: Waiman Long <longman9394@gmail.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Dave Stewart <david.c.stewart@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20181125185005.958421388@linutronix.de
---
 .../admin-guide/kernel-parameters.txt         |  7 +++-
 arch/x86/kernel/cpu/bugs.c                    | 41 +++++++++++++++----
 2 files changed, 38 insertions(+), 10 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index b6e5b33b9d751..a9b98a4e87899 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4236,9 +4236,14 @@
 			off     - Unconditionally disable mitigations. Is
 				  enforced by spectre_v2=off
 
+			prctl   - Indirect branch speculation is enabled,
+				  but mitigation can be enabled via prctl
+				  per thread.  The mitigation control state
+				  is inherited on fork.
+
 			auto    - Kernel selects the mitigation depending on
 				  the available CPU features and vulnerability.
-				  Default is off.
+				  Default is prctl.
 
 			Not specifying this option is equivalent to
 			spectre_v2_user=auto.
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 74359fff87fdb..d0137d10f9a6e 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -255,11 +255,13 @@ enum spectre_v2_user_cmd {
 	SPECTRE_V2_USER_CMD_NONE,
 	SPECTRE_V2_USER_CMD_AUTO,
 	SPECTRE_V2_USER_CMD_FORCE,
+	SPECTRE_V2_USER_CMD_PRCTL,
 };
 
 static const char * const spectre_v2_user_strings[] = {
 	[SPECTRE_V2_USER_NONE]		= "User space: Vulnerable",
 	[SPECTRE_V2_USER_STRICT]	= "User space: Mitigation: STIBP protection",
+	[SPECTRE_V2_USER_PRCTL]		= "User space: Mitigation: STIBP via prctl",
 };
 
 static const struct {
@@ -270,6 +272,7 @@ static const struct {
 	{ "auto",	SPECTRE_V2_USER_CMD_AUTO,	false },
 	{ "off",	SPECTRE_V2_USER_CMD_NONE,	false },
 	{ "on",		SPECTRE_V2_USER_CMD_FORCE,	true  },
+	{ "prctl",	SPECTRE_V2_USER_CMD_PRCTL,	false },
 };
 
 static void __init spec_v2_user_print_cond(const char *reason, bool secure)
@@ -324,12 +327,15 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
 		smt_possible = false;
 
 	switch (spectre_v2_parse_user_cmdline(v2_cmd)) {
-	case SPECTRE_V2_USER_CMD_AUTO:
 	case SPECTRE_V2_USER_CMD_NONE:
 		goto set_mode;
 	case SPECTRE_V2_USER_CMD_FORCE:
 		mode = SPECTRE_V2_USER_STRICT;
 		break;
+	case SPECTRE_V2_USER_CMD_AUTO:
+	case SPECTRE_V2_USER_CMD_PRCTL:
+		mode = SPECTRE_V2_USER_PRCTL;
+		break;
 	}
 
 	/* Initialize Indirect Branch Prediction Barrier */
@@ -340,6 +346,9 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
 		case SPECTRE_V2_USER_STRICT:
 			static_branch_enable(&switch_mm_always_ibpb);
 			break;
+		case SPECTRE_V2_USER_PRCTL:
+			static_branch_enable(&switch_mm_cond_ibpb);
+			break;
 		default:
 			break;
 		}
@@ -352,6 +361,12 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
 	if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
 		return;
 
+	/*
+	 * If SMT is not possible or STIBP is not available clear the STIPB
+	 * mode.
+	 */
+	if (!smt_possible || !boot_cpu_has(X86_FEATURE_STIBP))
+		mode = SPECTRE_V2_USER_NONE;
 set_mode:
 	spectre_v2_user = mode;
 	/* Only print the STIBP mode when SMT possible */
@@ -552,6 +567,15 @@ static void update_stibp_strict(void)
 	on_each_cpu(update_stibp_msr, NULL, 1);
 }
 
+/* Update the static key controlling the evaluation of TIF_SPEC_IB */
+static void update_indir_branch_cond(void)
+{
+	if (sched_smt_active())
+		static_branch_enable(&switch_to_cond_stibp);
+	else
+		static_branch_disable(&switch_to_cond_stibp);
+}
+
 void arch_smt_update(void)
 {
 	/* Enhanced IBRS implies STIBP. No update required. */
@@ -567,6 +591,7 @@ void arch_smt_update(void)
 		update_stibp_strict();
 		break;
 	case SPECTRE_V2_USER_PRCTL:
+		update_indir_branch_cond();
 		break;
 	}
 
@@ -1038,7 +1063,8 @@ static char *stibp_state(void)
 	case SPECTRE_V2_USER_STRICT:
 		return ", STIBP: forced";
 	case SPECTRE_V2_USER_PRCTL:
-		return "";
+		if (static_key_enabled(&switch_to_cond_stibp))
+			return ", STIBP: conditional";
 	}
 	return "";
 }
@@ -1046,14 +1072,11 @@ static char *stibp_state(void)
 static char *ibpb_state(void)
 {
 	if (boot_cpu_has(X86_FEATURE_IBPB)) {
-		switch (spectre_v2_user) {
-		case SPECTRE_V2_USER_NONE:
-			return ", IBPB: disabled";
-		case SPECTRE_V2_USER_STRICT:
+		if (static_key_enabled(&switch_mm_always_ibpb))
 			return ", IBPB: always-on";
-		case SPECTRE_V2_USER_PRCTL:
-			return "";
-		}
+		if (static_key_enabled(&switch_mm_cond_ibpb))
+			return ", IBPB: conditional";
+		return ", IBPB: disabled";
 	}
 	return "";
 }
-- 
GitLab


From 6b3e64c237c072797a9ec918654a60e3a46488e2 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 25 Nov 2018 19:33:55 +0100
Subject: [PATCH 1535/1890] x86/speculation: Add seccomp Spectre v2 user space
 protection mode

If 'prctl' mode of user space protection from spectre v2 is selected
on the kernel command-line, STIBP and IBPB are applied on tasks which
restrict their indirect branch speculation via prctl.

SECCOMP enables the SSBD mitigation for sandboxed tasks already, so it
makes sense to prevent spectre v2 user space to user space attacks as
well.

The Intel mitigation guide documents how STIPB works:

   Setting bit 1 (STIBP) of the IA32_SPEC_CTRL MSR on a logical processor
   prevents the predicted targets of indirect branches on any logical
   processor of that core from being controlled by software that executes
   (or executed previously) on another logical processor of the same core.

Ergo setting STIBP protects the task itself from being attacked from a task
running on a different hyper-thread and protects the tasks running on
different hyper-threads from being attacked.

While the document suggests that the branch predictors are shielded between
the logical processors, the observed performance regressions suggest that
STIBP simply disables the branch predictor more or less completely. Of
course the document wording is vague, but the fact that there is also no
requirement for issuing IBPB when STIBP is used points clearly in that
direction. The kernel still issues IBPB even when STIBP is used until Intel
clarifies the whole mechanism.

IBPB is issued when the task switches out, so malicious sandbox code cannot
mistrain the branch predictor for the next user space task on the same
logical processor.

Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Casey Schaufler <casey.schaufler@intel.com>
Cc: Asit Mallick <asit.k.mallick@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Jon Masters <jcm@redhat.com>
Cc: Waiman Long <longman9394@gmail.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Dave Stewart <david.c.stewart@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20181125185006.051663132@linutronix.de
---
 Documentation/admin-guide/kernel-parameters.txt |  9 ++++++++-
 arch/x86/include/asm/nospec-branch.h            |  1 +
 arch/x86/kernel/cpu/bugs.c                      | 17 ++++++++++++++++-
 3 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a9b98a4e87899..f405281bb202c 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4241,9 +4241,16 @@
 				  per thread.  The mitigation control state
 				  is inherited on fork.
 
+			seccomp
+				- Same as "prctl" above, but all seccomp
+				  threads will enable the mitigation unless
+				  they explicitly opt out.
+
 			auto    - Kernel selects the mitigation depending on
 				  the available CPU features and vulnerability.
-				  Default is prctl.
+
+			Default mitigation:
+			If CONFIG_SECCOMP=y then "seccomp", otherwise "prctl"
 
 			Not specifying this option is equivalent to
 			spectre_v2_user=auto.
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 2adbe7b047fa2..032b6009baab4 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -233,6 +233,7 @@ enum spectre_v2_user_mitigation {
 	SPECTRE_V2_USER_NONE,
 	SPECTRE_V2_USER_STRICT,
 	SPECTRE_V2_USER_PRCTL,
+	SPECTRE_V2_USER_SECCOMP,
 };
 
 /* The Speculative Store Bypass disable variants */
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index d0137d10f9a6e..c9e3049605346 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -256,12 +256,14 @@ enum spectre_v2_user_cmd {
 	SPECTRE_V2_USER_CMD_AUTO,
 	SPECTRE_V2_USER_CMD_FORCE,
 	SPECTRE_V2_USER_CMD_PRCTL,
+	SPECTRE_V2_USER_CMD_SECCOMP,
 };
 
 static const char * const spectre_v2_user_strings[] = {
 	[SPECTRE_V2_USER_NONE]		= "User space: Vulnerable",
 	[SPECTRE_V2_USER_STRICT]	= "User space: Mitigation: STIBP protection",
 	[SPECTRE_V2_USER_PRCTL]		= "User space: Mitigation: STIBP via prctl",
+	[SPECTRE_V2_USER_SECCOMP]	= "User space: Mitigation: STIBP via seccomp and prctl",
 };
 
 static const struct {
@@ -273,6 +275,7 @@ static const struct {
 	{ "off",	SPECTRE_V2_USER_CMD_NONE,	false },
 	{ "on",		SPECTRE_V2_USER_CMD_FORCE,	true  },
 	{ "prctl",	SPECTRE_V2_USER_CMD_PRCTL,	false },
+	{ "seccomp",	SPECTRE_V2_USER_CMD_SECCOMP,	false },
 };
 
 static void __init spec_v2_user_print_cond(const char *reason, bool secure)
@@ -332,10 +335,16 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
 	case SPECTRE_V2_USER_CMD_FORCE:
 		mode = SPECTRE_V2_USER_STRICT;
 		break;
-	case SPECTRE_V2_USER_CMD_AUTO:
 	case SPECTRE_V2_USER_CMD_PRCTL:
 		mode = SPECTRE_V2_USER_PRCTL;
 		break;
+	case SPECTRE_V2_USER_CMD_AUTO:
+	case SPECTRE_V2_USER_CMD_SECCOMP:
+		if (IS_ENABLED(CONFIG_SECCOMP))
+			mode = SPECTRE_V2_USER_SECCOMP;
+		else
+			mode = SPECTRE_V2_USER_PRCTL;
+		break;
 	}
 
 	/* Initialize Indirect Branch Prediction Barrier */
@@ -347,6 +356,7 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
 			static_branch_enable(&switch_mm_always_ibpb);
 			break;
 		case SPECTRE_V2_USER_PRCTL:
+		case SPECTRE_V2_USER_SECCOMP:
 			static_branch_enable(&switch_mm_cond_ibpb);
 			break;
 		default:
@@ -591,6 +601,7 @@ void arch_smt_update(void)
 		update_stibp_strict();
 		break;
 	case SPECTRE_V2_USER_PRCTL:
+	case SPECTRE_V2_USER_SECCOMP:
 		update_indir_branch_cond();
 		break;
 	}
@@ -833,6 +844,8 @@ void arch_seccomp_spec_mitigate(struct task_struct *task)
 {
 	if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP)
 		ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE);
+	if (spectre_v2_user == SPECTRE_V2_USER_SECCOMP)
+		ib_prctl_set(task, PR_SPEC_FORCE_DISABLE);
 }
 #endif
 
@@ -864,6 +877,7 @@ static int ib_prctl_get(struct task_struct *task)
 	case SPECTRE_V2_USER_NONE:
 		return PR_SPEC_ENABLE;
 	case SPECTRE_V2_USER_PRCTL:
+	case SPECTRE_V2_USER_SECCOMP:
 		if (task_spec_ib_force_disable(task))
 			return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
 		if (task_spec_ib_disable(task))
@@ -1063,6 +1077,7 @@ static char *stibp_state(void)
 	case SPECTRE_V2_USER_STRICT:
 		return ", STIBP: forced";
 	case SPECTRE_V2_USER_PRCTL:
+	case SPECTRE_V2_USER_SECCOMP:
 		if (static_key_enabled(&switch_to_cond_stibp))
 			return ", STIBP: conditional";
 	}
-- 
GitLab


From 55a974021ec952ee460dc31ca08722158639de72 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 25 Nov 2018 19:33:56 +0100
Subject: [PATCH 1536/1890] x86/speculation: Provide IBPB always command line
 options

Provide the possibility to enable IBPB always in combination with 'prctl'
and 'seccomp'.

Add the extra command line options and rework the IBPB selection to
evaluate the command instead of the mode selected by the STIPB switch case.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Casey Schaufler <casey.schaufler@intel.com>
Cc: Asit Mallick <asit.k.mallick@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Jon Masters <jcm@redhat.com>
Cc: Waiman Long <longman9394@gmail.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Dave Stewart <david.c.stewart@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20181125185006.144047038@linutronix.de
---
 .../admin-guide/kernel-parameters.txt         | 12 +++++++
 arch/x86/kernel/cpu/bugs.c                    | 34 +++++++++++++------
 2 files changed, 35 insertions(+), 11 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index f405281bb202c..05a252e5178d7 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4241,11 +4241,23 @@
 				  per thread.  The mitigation control state
 				  is inherited on fork.
 
+			prctl,ibpb
+				- Like "prctl" above, but only STIBP is
+				  controlled per thread. IBPB is issued
+				  always when switching between different user
+				  space processes.
+
 			seccomp
 				- Same as "prctl" above, but all seccomp
 				  threads will enable the mitigation unless
 				  they explicitly opt out.
 
+			seccomp,ibpb
+				- Like "seccomp" above, but only STIBP is
+				  controlled per thread. IBPB is issued
+				  always when switching between different
+				  user space processes.
+
 			auto    - Kernel selects the mitigation depending on
 				  the available CPU features and vulnerability.
 
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index c9e3049605346..500278f5308ee 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -256,7 +256,9 @@ enum spectre_v2_user_cmd {
 	SPECTRE_V2_USER_CMD_AUTO,
 	SPECTRE_V2_USER_CMD_FORCE,
 	SPECTRE_V2_USER_CMD_PRCTL,
+	SPECTRE_V2_USER_CMD_PRCTL_IBPB,
 	SPECTRE_V2_USER_CMD_SECCOMP,
+	SPECTRE_V2_USER_CMD_SECCOMP_IBPB,
 };
 
 static const char * const spectre_v2_user_strings[] = {
@@ -271,11 +273,13 @@ static const struct {
 	enum spectre_v2_user_cmd	cmd;
 	bool				secure;
 } v2_user_options[] __initdata = {
-	{ "auto",	SPECTRE_V2_USER_CMD_AUTO,	false },
-	{ "off",	SPECTRE_V2_USER_CMD_NONE,	false },
-	{ "on",		SPECTRE_V2_USER_CMD_FORCE,	true  },
-	{ "prctl",	SPECTRE_V2_USER_CMD_PRCTL,	false },
-	{ "seccomp",	SPECTRE_V2_USER_CMD_SECCOMP,	false },
+	{ "auto",		SPECTRE_V2_USER_CMD_AUTO,		false },
+	{ "off",		SPECTRE_V2_USER_CMD_NONE,		false },
+	{ "on",			SPECTRE_V2_USER_CMD_FORCE,		true  },
+	{ "prctl",		SPECTRE_V2_USER_CMD_PRCTL,		false },
+	{ "prctl,ibpb",		SPECTRE_V2_USER_CMD_PRCTL_IBPB,		false },
+	{ "seccomp",		SPECTRE_V2_USER_CMD_SECCOMP,		false },
+	{ "seccomp,ibpb",	SPECTRE_V2_USER_CMD_SECCOMP_IBPB,	false },
 };
 
 static void __init spec_v2_user_print_cond(const char *reason, bool secure)
@@ -321,6 +325,7 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
 {
 	enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE;
 	bool smt_possible = IS_ENABLED(CONFIG_SMP);
+	enum spectre_v2_user_cmd cmd;
 
 	if (!boot_cpu_has(X86_FEATURE_IBPB) && !boot_cpu_has(X86_FEATURE_STIBP))
 		return;
@@ -329,17 +334,20 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
 	    cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
 		smt_possible = false;
 
-	switch (spectre_v2_parse_user_cmdline(v2_cmd)) {
+	cmd = spectre_v2_parse_user_cmdline(v2_cmd);
+	switch (cmd) {
 	case SPECTRE_V2_USER_CMD_NONE:
 		goto set_mode;
 	case SPECTRE_V2_USER_CMD_FORCE:
 		mode = SPECTRE_V2_USER_STRICT;
 		break;
 	case SPECTRE_V2_USER_CMD_PRCTL:
+	case SPECTRE_V2_USER_CMD_PRCTL_IBPB:
 		mode = SPECTRE_V2_USER_PRCTL;
 		break;
 	case SPECTRE_V2_USER_CMD_AUTO:
 	case SPECTRE_V2_USER_CMD_SECCOMP:
+	case SPECTRE_V2_USER_CMD_SECCOMP_IBPB:
 		if (IS_ENABLED(CONFIG_SECCOMP))
 			mode = SPECTRE_V2_USER_SECCOMP;
 		else
@@ -351,12 +359,15 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
 	if (boot_cpu_has(X86_FEATURE_IBPB)) {
 		setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
 
-		switch (mode) {
-		case SPECTRE_V2_USER_STRICT:
+		switch (cmd) {
+		case SPECTRE_V2_USER_CMD_FORCE:
+		case SPECTRE_V2_USER_CMD_PRCTL_IBPB:
+		case SPECTRE_V2_USER_CMD_SECCOMP_IBPB:
 			static_branch_enable(&switch_mm_always_ibpb);
 			break;
-		case SPECTRE_V2_USER_PRCTL:
-		case SPECTRE_V2_USER_SECCOMP:
+		case SPECTRE_V2_USER_CMD_PRCTL:
+		case SPECTRE_V2_USER_CMD_AUTO:
+		case SPECTRE_V2_USER_CMD_SECCOMP:
 			static_branch_enable(&switch_mm_cond_ibpb);
 			break;
 		default:
@@ -364,7 +375,8 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
 		}
 
 		pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n",
-			mode == SPECTRE_V2_USER_STRICT ? "always-on" : "conditional");
+			static_key_enabled(&switch_mm_always_ibpb) ?
+			"always-on" : "conditional");
 	}
 
 	/* If enhanced IBRS is enabled no STIPB required */
-- 
GitLab


From e6bc06faf64a83384cc0abc537df954c9d3ff942 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 27 Nov 2018 16:34:55 +0000
Subject: [PATCH 1537/1890] cachefiles: Fix an assertion failure when trying to
 update a failed object

If cachefiles gets an error other then ENOENT when trying to look up an
object in the cache (in this case, EACCES), the object state machine will
eventually transition to the DROP_OBJECT state.

This state invokes fscache_drop_object() which tries to sync the auxiliary
data with the cache (this is done lazily since commit 402cb8dda949d) on an
incomplete cache object struct.

The problem comes when cachefiles_update_object_xattr() is called to
rewrite the xattr holding the data.  There's an assertion there that the
cache object points to a dentry as we're going to update its xattr.  The
assertion trips, however, as dentry didn't get set.

Fix the problem by skipping the update in cachefiles if the object doesn't
refer to a dentry.  A better way to do it could be to skip the update from
the DROP_OBJECT state handler in fscache, but that might deny the cache the
opportunity to update intermediate state.

If this error occurs, the kernel log includes lines that look like the
following:

 CacheFiles: Lookup failed error -13
 CacheFiles:
 CacheFiles: Assertion failed
 ------------[ cut here ]------------
 kernel BUG at fs/cachefiles/xattr.c:138!
 ...
 Workqueue: fscache_object fscache_object_work_func [fscache]
 RIP: 0010:cachefiles_update_object_xattr.cold.4+0x18/0x1a [cachefiles]
 ...
 Call Trace:
  cachefiles_update_object+0xdd/0x1c0 [cachefiles]
  fscache_update_aux_data+0x23/0x30 [fscache]
  fscache_drop_object+0x18e/0x1c0 [fscache]
  fscache_object_work_func+0x74/0x2b0 [fscache]
  process_one_work+0x18d/0x340
  worker_thread+0x2e/0x390
  ? pwq_unbound_release_workfn+0xd0/0xd0
  kthread+0x112/0x130
  ? kthread_bind+0x30/0x30
  ret_from_fork+0x35/0x40

Note that there are actually two issues here: (1) EACCES happened on a
cache object and (2) an oops occurred.  I think that the second is a
consequence of the first (it certainly looks like it ought to be).  This
patch only deals with the second.

Fixes: 402cb8dda949 ("fscache: Attach the index key and aux data to the cookie")
Reported-by: Zhibin Li <zhibli@redhat.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/cachefiles/xattr.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c
index 0a29a00aed2eb..511e6c68156a7 100644
--- a/fs/cachefiles/xattr.c
+++ b/fs/cachefiles/xattr.c
@@ -135,7 +135,8 @@ int cachefiles_update_object_xattr(struct cachefiles_object *object,
 	struct dentry *dentry = object->dentry;
 	int ret;
 
-	ASSERT(dentry);
+	if (!dentry)
+		return -ESTALE;
 
 	_enter("%p,#%d", object, auxdata->len);
 
-- 
GitLab


From 3f2b7b9035107d6096ea438ea3d97dcf0481b6d2 Mon Sep 17 00:00:00 2001
From: "kiran.modukuri" <kiran.modukuri@gmail.com>
Date: Mon, 26 Nov 2018 15:41:48 +0000
Subject: [PATCH 1538/1890] fscache: Fix race in fscache_op_complete() due to
 split atomic_sub & read

The code in fscache_retrieval_complete is using atomic_sub followed by an
atomic_read:

        atomic_sub(n_pages, &op->n_pages);
        if (atomic_read(&op->n_pages) <= 0)
                fscache_op_complete(&op->op, true);

This causes two threads doing a decrement of n_pages to race with each
other seeing the op->refcount 0 at same time - and they end up calling
fscache_op_complete() in both the threads leading to an assertion failure.

Fix this by using atomic_sub_return_relaxed() instead of two calls.  Note
that I'm using 'relaxed' rather than, say, 'release' as there aren't
multiple variables that appear to need ordering across the release.

The oops looks something like:

FS-Cache: Assertion failed
FS-Cache: 0 > 0 is false
...
kernel BUG at /usr/src/linux-4.4.0/fs/fscache/operation.c:449!
...
Workqueue: fscache_operation fscache_op_work_func [fscache]
...
RIP: 0010:[<ffffffffc037eacd>] fscache_op_complete+0x10d/0x180 [fscache]
...
Call Trace:
 [<ffffffffc1464cf9>] cachefiles_read_copier+0x3a9/0x410 [cachefiles]
 [<ffffffffc037e272>] fscache_op_work_func+0x22/0x50 [fscache]
 [<ffffffff81096da0>] process_one_work+0x150/0x3f0
 [<ffffffff8109751a>] worker_thread+0x11a/0x470
 [<ffffffff81808e59>] ? __schedule+0x359/0x980
 [<ffffffff81097400>] ? rescuer_thread+0x310/0x310
 [<ffffffff8109cdd6>] kthread+0xd6/0xf0
 [<ffffffff8109cd00>] ? kthread_park+0x60/0x60
 [<ffffffff8180d0cf>] ret_from_fork+0x3f/0x70
 [<ffffffff8109cd00>] ? kthread_park+0x60/0x60

This seen this in 4.4.x kernels and the same bug affects fscache in latest
upstreams kernels.

Fixes: 1bb4b7f98f36 ("FS-Cache: The retrieval remaining-pages counter needs to be atomic_t")
Signed-off-by: Kiran Kumar Modukuri <kiran.modukuri@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/linux/fscache-cache.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
index 34cf0fdd7dc76..610815e3f1aae 100644
--- a/include/linux/fscache-cache.h
+++ b/include/linux/fscache-cache.h
@@ -196,8 +196,7 @@ static inline void fscache_enqueue_retrieval(struct fscache_retrieval *op)
 static inline void fscache_retrieval_complete(struct fscache_retrieval *op,
 					      int n_pages)
 {
-	atomic_sub(n_pages, &op->n_pages);
-	if (atomic_read(&op->n_pages) <= 0)
+	if (atomic_sub_return_relaxed(n_pages, &op->n_pages) <= 0)
 		fscache_op_complete(&op->op, false);
 }
 
-- 
GitLab


From 40b412897ccb4b98b2cfb2a0aaabed58dd9e2086 Mon Sep 17 00:00:00 2001
From: Frieder Schrempf <frieder.schrempf@kontron.de>
Date: Tue, 27 Nov 2018 07:44:52 +0000
Subject: [PATCH 1539/1890] mtd: nand: Fix memory allocation in
 nanddev_bbt_init()

Fix the size of the buffer allocated to store the in-memory BBT.
This bug was previously hidden by a different bug, that was fixed in
commit d098093ba06e ("mtd: nand: Fix nanddev_neraseblocks()").

Fixes: 9c3736a3de21 ("mtd: nand: Add core infrastructure to deal with NAND devices")
Cc: <stable@vger.kernel.org>
Signed-off-by: Frieder Schrempf <frieder.schrempf@kontron.de>
Acked-by: Miquel Raynal <miquel.raynal@bootlin.com>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
---
 drivers/mtd/nand/bbt.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/bbt.c b/drivers/mtd/nand/bbt.c
index 56cde38b92c03..044adf9138546 100644
--- a/drivers/mtd/nand/bbt.c
+++ b/drivers/mtd/nand/bbt.c
@@ -27,7 +27,8 @@ int nanddev_bbt_init(struct nand_device *nand)
 	unsigned int nwords = DIV_ROUND_UP(nblocks * bits_per_block,
 					   BITS_PER_LONG);
 
-	nand->bbt.cache = kzalloc(nwords, GFP_KERNEL);
+	nand->bbt.cache = kcalloc(nwords, sizeof(*nand->bbt.cache),
+				  GFP_KERNEL);
 	if (!nand->bbt.cache)
 		return -ENOMEM;
 
-- 
GitLab


From 9a24ce5b66f9c8190d63b15f4473600db4935f1f Mon Sep 17 00:00:00 2001
From: Kiran Kumar Modukuri <kiran.modukuri@gmail.com>
Date: Mon, 24 Sep 2018 12:02:39 +1000
Subject: [PATCH 1540/1890] cachefiles: Fix page leak in
 cachefiles_read_backing_file while vmscan is active

[Description]

In a heavily loaded system where the system pagecache is nearing memory
limits and fscache is enabled, pages can be leaked by fscache while trying
read pages from cachefiles backend.  This can happen because two
applications can be reading same page from a single mount, two threads can
be trying to read the backing page at same time.  This results in one of
the threads finding that a page for the backing file or netfs file is
already in the radix tree.  During the error handling cachefiles does not
clean up the reference on backing page, leading to page leak.

[Fix]
The fix is straightforward, to decrement the reference when error is
encountered.

  [dhowells: Note that I've removed the clearance and put of newpage as
   they aren't attested in the commit message and don't appear to actually
   achieve anything since a new page is only allocated is newpage!=NULL and
   any residual new page is cleared before returning.]

[Testing]
I have tested the fix using following method for 12+ hrs.

1) mkdir -p /mnt/nfs ; mount -o vers=3,fsc <server_ip>:/export /mnt/nfs
2) create 10000 files of 2.8MB in a NFS mount.
3) start a thread to simulate heavy VM presssure
   (while true ; do echo 3 > /proc/sys/vm/drop_caches ; sleep 1 ; done)&
4) start multiple parallel reader for data set at same time
   find /mnt/nfs -type f | xargs -P 80 cat > /dev/null &
   find /mnt/nfs -type f | xargs -P 80 cat > /dev/null &
   find /mnt/nfs -type f | xargs -P 80 cat > /dev/null &
   ..
   ..
   find /mnt/nfs -type f | xargs -P 80 cat > /dev/null &
   find /mnt/nfs -type f | xargs -P 80 cat > /dev/null &
5) finally check using cat /proc/fs/fscache/stats | grep -i pages ;
   free -h , cat /proc/meminfo and page-types -r -b lru
   to ensure all pages are freed.

Reviewed-by: Daniel Axtens <dja@axtens.net>
Signed-off-by: Shantanu Goel <sgoel01@yahoo.com>
Signed-off-by: Kiran Kumar Modukuri <kiran.modukuri@gmail.com>
[dja: forward ported to current upstream]
Signed-off-by: Daniel Axtens <dja@axtens.net>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/cachefiles/rdwr.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index 40f7595aad10f..db233588a69af 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -535,7 +535,10 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object,
 					    netpage->index, cachefiles_gfp);
 		if (ret < 0) {
 			if (ret == -EEXIST) {
+				put_page(backpage);
+				backpage = NULL;
 				put_page(netpage);
+				netpage = NULL;
 				fscache_retrieval_complete(op, 1);
 				continue;
 			}
@@ -608,7 +611,10 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object,
 					    netpage->index, cachefiles_gfp);
 		if (ret < 0) {
 			if (ret == -EEXIST) {
+				put_page(backpage);
+				backpage = NULL;
 				put_page(netpage);
+				netpage = NULL;
 				fscache_retrieval_complete(op, 1);
 				continue;
 			}
-- 
GitLab


From e4c39f7926b4de355f7df75651d75003806aae09 Mon Sep 17 00:00:00 2001
From: Yunjian Wang <wangyunjian@huawei.com>
Date: Tue, 6 Nov 2018 16:27:12 +0800
Subject: [PATCH 1541/1890] igb: fix uninitialized variables

This patch fixes the variable 'phy_word' may be used uninitialized.

Signed-off-by: Yunjian Wang <wangyunjian@huawei.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/e1000_i210.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/intel/igb/e1000_i210.c b/drivers/net/ethernet/intel/igb/e1000_i210.c
index c54ebedca6da9..c393cb2c0f168 100644
--- a/drivers/net/ethernet/intel/igb/e1000_i210.c
+++ b/drivers/net/ethernet/intel/igb/e1000_i210.c
@@ -842,6 +842,7 @@ s32 igb_pll_workaround_i210(struct e1000_hw *hw)
 		nvm_word = E1000_INVM_DEFAULT_AL;
 	tmp_nvm = nvm_word | E1000_INVM_PLL_WO_VAL;
 	igb_write_phy_reg_82580(hw, I347AT4_PAGE_SELECT, E1000_PHY_PLL_FREQ_PAGE);
+	phy_word = E1000_PHY_PLL_UNCONF;
 	for (i = 0; i < E1000_MAX_PLL_TRIES; i++) {
 		/* check current state directly from internal PHY */
 		igb_read_phy_reg_82580(hw, E1000_PHY_PLL_FREQ_REG, &phy_word);
-- 
GitLab


From eab077aa84331afbda071a213925d4cdbca58941 Mon Sep 17 00:00:00 2001
From: Lihong Yang <lihong.yang@intel.com>
Date: Wed, 21 Nov 2018 09:15:37 -0800
Subject: [PATCH 1542/1890] i40e: Fix deletion of MAC filters

In __i40e_del_filter function, the flag __I40E_MACVLAN_SYNC_PENDING for
the PF state is wrongly set for the VSI. Deleting any of the MAC filters
has caused the incorrect syncing for the PF. Fix it by setting this state
flag to the intended PF.

CC: stable <stable@vger.kernel.org>
Signed-off-by: Lihong Yang <lihong.yang@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 21c2688d63082..a3f45335437c3 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -1413,7 +1413,7 @@ void __i40e_del_filter(struct i40e_vsi *vsi, struct i40e_mac_filter *f)
 	}
 
 	vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
-	set_bit(__I40E_MACVLAN_SYNC_PENDING, vsi->state);
+	set_bit(__I40E_MACVLAN_SYNC_PENDING, vsi->back->state);
 }
 
 /**
-- 
GitLab


From a8bf879af7b1999eba36303ce9cc60e0e7dd816c Mon Sep 17 00:00:00 2001
From: Josh Elsasser <jelsasser@appneta.com>
Date: Sat, 24 Nov 2018 12:57:33 -0800
Subject: [PATCH 1543/1890] ixgbe: recognize 1000BaseLX SFP modules as 1Gbps
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add the two 1000BaseLX enum values to the X550's check for 1Gbps modules,
allowing the core driver code to establish a link over this SFP type.

This is done by the out-of-tree driver but the fix wasn't in mainline.

Fixes: e23f33367882 ("ixgbe: Fix 1G and 10G link stability for X550EM_x SFP+â€)
Fixes: 6a14ee0cfb19 ("ixgbe: Add X550 support function pointers")
Signed-off-by: Josh Elsasser <jelsasser@appneta.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
index 10dbaf4f6e808..9c42f741ed5ef 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
@@ -2262,7 +2262,9 @@ static s32 ixgbe_get_link_capabilities_X550em(struct ixgbe_hw *hw,
 		*autoneg = false;
 
 		if (hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0 ||
-		    hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1) {
+		    hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1 ||
+		    hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core0 ||
+		    hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core1) {
 			*speed = IXGBE_LINK_SPEED_1GB_FULL;
 			return 0;
 		}
-- 
GitLab


From 529eb362a3477189224e7b7d24983d49448a85db Mon Sep 17 00:00:00 2001
From: Jan Sokolowski <jan.sokolowski@intel.com>
Date: Tue, 27 Nov 2018 09:35:35 -0800
Subject: [PATCH 1544/1890] i40e: fix kerneldoc for xsk methods

One method, xsk_umem_setup, had an incorrect kernel doc
description, which has been corrected.

Also fixes small typos found in the comments.

Signed-off-by: Jan Sokolowski <jan.sokolowski@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_xsk.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index add1e457886df..433c8e688c78d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -33,7 +33,7 @@ static int i40e_alloc_xsk_umems(struct i40e_vsi *vsi)
 }
 
 /**
- * i40e_add_xsk_umem - Store an UMEM for a certain ring/qid
+ * i40e_add_xsk_umem - Store a UMEM for a certain ring/qid
  * @vsi: Current VSI
  * @umem: UMEM to store
  * @qid: Ring/qid to associate with the UMEM
@@ -56,7 +56,7 @@ static int i40e_add_xsk_umem(struct i40e_vsi *vsi, struct xdp_umem *umem,
 }
 
 /**
- * i40e_remove_xsk_umem - Remove an UMEM for a certain ring/qid
+ * i40e_remove_xsk_umem - Remove a UMEM for a certain ring/qid
  * @vsi: Current VSI
  * @qid: Ring/qid associated with the UMEM
  **/
@@ -130,7 +130,7 @@ static void i40e_xsk_umem_dma_unmap(struct i40e_vsi *vsi, struct xdp_umem *umem)
 }
 
 /**
- * i40e_xsk_umem_enable - Enable/associate an UMEM to a certain ring/qid
+ * i40e_xsk_umem_enable - Enable/associate a UMEM to a certain ring/qid
  * @vsi: Current VSI
  * @umem: UMEM
  * @qid: Rx ring to associate UMEM to
@@ -189,7 +189,7 @@ static int i40e_xsk_umem_enable(struct i40e_vsi *vsi, struct xdp_umem *umem,
 }
 
 /**
- * i40e_xsk_umem_disable - Diassociate an UMEM from a certain ring/qid
+ * i40e_xsk_umem_disable - Disassociate a UMEM from a certain ring/qid
  * @vsi: Current VSI
  * @qid: Rx ring to associate UMEM to
  *
@@ -255,12 +255,12 @@ int i40e_xsk_umem_query(struct i40e_vsi *vsi, struct xdp_umem **umem,
 }
 
 /**
- * i40e_xsk_umem_query - Queries a certain ring/qid for its UMEM
+ * i40e_xsk_umem_setup - Enable/disassociate a UMEM to/from a ring/qid
  * @vsi: Current VSI
  * @umem: UMEM to enable/associate to a ring, or NULL to disable
  * @qid: Rx ring to (dis)associate UMEM (from)to
  *
- * This function enables or disables an UMEM to a certain ring.
+ * This function enables or disables a UMEM to a certain ring.
  *
  * Returns 0 on success, <0 on failure
  **/
@@ -276,7 +276,7 @@ int i40e_xsk_umem_setup(struct i40e_vsi *vsi, struct xdp_umem *umem,
  * @rx_ring: Rx ring
  * @xdp: xdp_buff used as input to the XDP program
  *
- * This function enables or disables an UMEM to a certain ring.
+ * This function enables or disables a UMEM to a certain ring.
  *
  * Returns any of I40E_XDP_{PASS, CONSUMED, TX, REDIR}
  **/
-- 
GitLab


From dfb7513374c1f8e7cd595106fbdba3fd07ebaf30 Mon Sep 17 00:00:00 2001
From: Martin Wilck <mwilck@suse.com>
Date: Mon, 12 Nov 2018 09:58:37 +0100
Subject: [PATCH 1545/1890] scsi: lpfc: fix block guard enablement on SLI3
 adapters

Since f44ac12f1dcc, BG enablement is tracked with the LPFC_SLI3_BG_ENABLED
bit, which is set in lpfc_get_cfgparam before lpfc_sli_config_sli_port() is
called. The bit shouldn't be cleared before checking the feature.  Based on
problem analysis by David Bond.

Fixes: f44ac12f1dcc "scsi: lpfc: Memory allocation error during driver start-up on power8"
Tested-by: David Bond <dbond@suse.com>
Signed-off-by: Martin Wilck <mwilck@suse.com>
Cc: stable@vger.kernel.org # 4.17.x
Cc: stable@vger.kernel.org # 4.18.x
Cc: stable@vger.kernel.org # 4.19.x
Reviewed-by: Hannes Reinecke <hare@suse.com>
Acked-by: James Smart <jsmart2021@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_init.c | 6 +++++-
 drivers/scsi/lpfc/lpfc_sli.c  | 1 -
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 20fa6785a0e2e..68d62d55a3a50 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -167,7 +167,11 @@ lpfc_config_port_prep(struct lpfc_hba *phba)
 		       sizeof(phba->wwpn));
 	}
 
-	phba->sli3_options = 0x0;
+	/*
+	 * Clear all option bits except LPFC_SLI3_BG_ENABLED,
+	 * which was already set in lpfc_get_cfgparam()
+	 */
+	phba->sli3_options &= (uint32_t)LPFC_SLI3_BG_ENABLED;
 
 	/* Setup and issue mailbox READ REV command */
 	lpfc_read_rev(phba, pmb);
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 783a1540cfbea..b9e5cd79931a2 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -4965,7 +4965,6 @@ lpfc_sli_config_port(struct lpfc_hba *phba, int sli_mode)
 		phba->sli3_options &= ~(LPFC_SLI3_NPIV_ENABLED |
 					LPFC_SLI3_HBQ_ENABLED |
 					LPFC_SLI3_CRP_ENABLED |
-					LPFC_SLI3_BG_ENABLED |
 					LPFC_SLI3_DSS_ENABLED);
 		if (rc != MBX_SUCCESS) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
-- 
GitLab


From cfc435198f53a6fa1f656d98466b24967ff457d0 Mon Sep 17 00:00:00 2001
From: Pan Bian <bianpan2016@163.com>
Date: Wed, 28 Nov 2018 14:53:19 +0800
Subject: [PATCH 1546/1890] rapidio/rionet: do not free skb before reading its
 length

skb is freed via dev_kfree_skb_any, however, skb->len is read then. This
may result in a use-after-free bug.

Fixes: e6161d64263 ("rapidio/rionet: rework driver initialization and removal")
Signed-off-by: Pan Bian <bianpan2016@163.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/rionet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c
index e9f101c9bae2c..bfbb39f935545 100644
--- a/drivers/net/rionet.c
+++ b/drivers/net/rionet.c
@@ -216,9 +216,9 @@ static int rionet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 			 * it just report sending a packet to the target
 			 * (without actual packet transfer).
 			 */
-			dev_kfree_skb_any(skb);
 			ndev->stats.tx_packets++;
 			ndev->stats.tx_bytes += skb->len;
+			dev_kfree_skb_any(skb);
 		}
 	}
 
-- 
GitLab


From c758940158bf29fe14e9d0f89d5848f227b48134 Mon Sep 17 00:00:00 2001
From: Pan Bian <bianpan2016@163.com>
Date: Wed, 28 Nov 2018 15:30:24 +0800
Subject: [PATCH 1547/1890] net: hisilicon: remove unexpected free_netdev

The net device ndev is freed via free_netdev when failing to register
the device. The control flow then jumps to the error handling code
block. ndev is used and freed again. Resulting in a use-after-free bug.

Signed-off-by: Pan Bian <bianpan2016@163.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hip04_eth.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c
index be268dcde8fa2..f9a4e76c5a8b7 100644
--- a/drivers/net/ethernet/hisilicon/hip04_eth.c
+++ b/drivers/net/ethernet/hisilicon/hip04_eth.c
@@ -915,10 +915,8 @@ static int hip04_mac_probe(struct platform_device *pdev)
 	}
 
 	ret = register_netdev(ndev);
-	if (ret) {
-		free_netdev(ndev);
+	if (ret)
 		goto alloc_fail;
-	}
 
 	return 0;
 
-- 
GitLab


From c93db7bb6ef3251e0ea48ade311d3e9942748e1c Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Tue, 27 Nov 2018 13:16:33 -0800
Subject: [PATCH 1548/1890] dax: Check page->mapping isn't NULL

If we race with inode destroy, it's possible for page->mapping to be
NULL before we even enter this routine, as well as after having slept
waiting for the dax entry to become unlocked.

Fixes: c2a7d2a11552 ("filesystem-dax: Introduce dax_lock_mapping_entry()")
Cc: <stable@vger.kernel.org>
Reported-by: Jan Kara <jack@suse.cz>
Signed-off-by: Matthew Wilcox <willy@infradead.org>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 fs/dax.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/dax.c b/fs/dax.c
index 9bcce89ea18ef..e69fc231833b7 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -365,7 +365,7 @@ bool dax_lock_mapping_entry(struct page *page)
 		struct address_space *mapping = READ_ONCE(page->mapping);
 
 		locked = false;
-		if (!dax_mapping(mapping))
+		if (!mapping || !dax_mapping(mapping))
 			break;
 
 		/*
-- 
GitLab


From 55e56f06ed71d9441f3abd5b1d3c1a870812b3fe Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Tue, 27 Nov 2018 13:16:34 -0800
Subject: [PATCH 1549/1890] dax: Don't access a freed inode

After we drop the i_pages lock, the inode can be freed at any time.
The get_unlocked_entry() code has no choice but to reacquire the lock,
so it can't be used here.  Create a new wait_entry_unlocked() which takes
care not to acquire the lock or dereference the address_space in any way.

Fixes: c2a7d2a11552 ("filesystem-dax: Introduce dax_lock_mapping_entry()")
Cc: <stable@vger.kernel.org>
Signed-off-by: Matthew Wilcox <willy@infradead.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 fs/dax.c | 32 +++++++++++++++++++++++++++++---
 1 file changed, 29 insertions(+), 3 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index e69fc231833b7..3f592dc18d67c 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -232,6 +232,34 @@ static void *get_unlocked_entry(struct xa_state *xas)
 	}
 }
 
+/*
+ * The only thing keeping the address space around is the i_pages lock
+ * (it's cycled in clear_inode() after removing the entries from i_pages)
+ * After we call xas_unlock_irq(), we cannot touch xas->xa.
+ */
+static void wait_entry_unlocked(struct xa_state *xas, void *entry)
+{
+	struct wait_exceptional_entry_queue ewait;
+	wait_queue_head_t *wq;
+
+	init_wait(&ewait.wait);
+	ewait.wait.func = wake_exceptional_entry_func;
+
+	wq = dax_entry_waitqueue(xas, entry, &ewait.key);
+	prepare_to_wait_exclusive(wq, &ewait.wait, TASK_UNINTERRUPTIBLE);
+	xas_unlock_irq(xas);
+	schedule();
+	finish_wait(wq, &ewait.wait);
+
+	/*
+	 * Entry lock waits are exclusive. Wake up the next waiter since
+	 * we aren't sure we will acquire the entry lock and thus wake
+	 * the next waiter up on unlock.
+	 */
+	if (waitqueue_active(wq))
+		__wake_up(wq, TASK_NORMAL, 1, &ewait.key);
+}
+
 static void put_unlocked_entry(struct xa_state *xas, void *entry)
 {
 	/* If we were the only waiter woken, wake the next one */
@@ -389,9 +417,7 @@ bool dax_lock_mapping_entry(struct page *page)
 		entry = xas_load(&xas);
 		if (dax_is_locked(entry)) {
 			rcu_read_unlock();
-			entry = get_unlocked_entry(&xas);
-			xas_unlock_irq(&xas);
-			put_unlocked_entry(&xas, entry);
+			wait_entry_unlocked(&xas, entry);
 			rcu_read_lock();
 			continue;
 		}
-- 
GitLab


From 9a764c1e59684c0358e16ccaafd870629f2cfe67 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Wed, 28 Nov 2018 16:20:50 +0100
Subject: [PATCH 1550/1890] s390/qeth: fix length check in SNMP processing

The response for a SNMP request can consist of multiple parts, which
the cmd callback stages into a kernel buffer until all parts have been
received. If the callback detects that the staging buffer provides
insufficient space, it bails out with error.
This processing is buggy for the first part of the response - while it
initially checks for a length of 'data_len', it later copies an
additional amount of 'offsetof(struct qeth_snmp_cmd, data)' bytes.

Fix the calculation of 'data_len' for the first part of the response.
This also nicely cleans up the memcpy code.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core_main.c | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 4bce5ae65a55c..2540652718671 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -4518,8 +4518,8 @@ static int qeth_snmp_command_cb(struct qeth_card *card,
 {
 	struct qeth_ipa_cmd *cmd;
 	struct qeth_arp_query_info *qinfo;
-	struct qeth_snmp_cmd *snmp;
 	unsigned char *data;
+	void *snmp_data;
 	__u16 data_len;
 
 	QETH_CARD_TEXT(card, 3, "snpcmdcb");
@@ -4527,7 +4527,6 @@ static int qeth_snmp_command_cb(struct qeth_card *card,
 	cmd = (struct qeth_ipa_cmd *) sdata;
 	data = (unsigned char *)((char *)cmd - reply->offset);
 	qinfo = (struct qeth_arp_query_info *) reply->param;
-	snmp = &cmd->data.setadapterparms.data.snmp;
 
 	if (cmd->hdr.return_code) {
 		QETH_CARD_TEXT_(card, 4, "scer1%x", cmd->hdr.return_code);
@@ -4540,10 +4539,15 @@ static int qeth_snmp_command_cb(struct qeth_card *card,
 		return 0;
 	}
 	data_len = *((__u16 *)QETH_IPA_PDU_LEN_PDU1(data));
-	if (cmd->data.setadapterparms.hdr.seq_no == 1)
-		data_len -= (__u16)((char *)&snmp->data - (char *)cmd);
-	else
-		data_len -= (__u16)((char *)&snmp->request - (char *)cmd);
+	if (cmd->data.setadapterparms.hdr.seq_no == 1) {
+		snmp_data = &cmd->data.setadapterparms.data.snmp;
+		data_len -= offsetof(struct qeth_ipa_cmd,
+				     data.setadapterparms.data.snmp);
+	} else {
+		snmp_data = &cmd->data.setadapterparms.data.snmp.request;
+		data_len -= offsetof(struct qeth_ipa_cmd,
+				     data.setadapterparms.data.snmp.request);
+	}
 
 	/* check if there is enough room in userspace */
 	if ((qinfo->udata_len - qinfo->udata_offset) < data_len) {
@@ -4556,16 +4560,9 @@ static int qeth_snmp_command_cb(struct qeth_card *card,
 	QETH_CARD_TEXT_(card, 4, "sseqn%i",
 		cmd->data.setadapterparms.hdr.seq_no);
 	/*copy entries to user buffer*/
-	if (cmd->data.setadapterparms.hdr.seq_no == 1) {
-		memcpy(qinfo->udata + qinfo->udata_offset,
-		       (char *)snmp,
-		       data_len + offsetof(struct qeth_snmp_cmd, data));
-		qinfo->udata_offset += offsetof(struct qeth_snmp_cmd, data);
-	} else {
-		memcpy(qinfo->udata + qinfo->udata_offset,
-		       (char *)&snmp->request, data_len);
-	}
+	memcpy(qinfo->udata + qinfo->udata_offset, snmp_data, data_len);
 	qinfo->udata_offset += data_len;
+
 	/* check if all replies received ... */
 		QETH_CARD_TEXT_(card, 4, "srtot%i",
 			       cmd->data.setadapterparms.hdr.used_total);
-- 
GitLab


From 9ce2b991f7ea45b913c3c391bb652dd95dd78876 Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Sat, 24 Nov 2018 22:46:23 -0500
Subject: [PATCH 1551/1890] drm/amdgpu: Cast to uint64_t before left shift
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Avoid potential integer overflows with left shift in huge-page mapping
code by casting the operand to uin64_t first.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Christian KÃ¶nig <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index dad0e2342df9d..be3e360b04502 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -181,7 +181,7 @@ static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev,
 
 	if (level == adev->vm_manager.root_level)
 		/* For the root directory */
-		return round_up(adev->vm_manager.max_pfn, 1 << shift) >> shift;
+		return round_up(adev->vm_manager.max_pfn, 1ULL << shift) >> shift;
 	else if (level != AMDGPU_VM_PTB)
 		/* Everything in between */
 		return 512;
@@ -1666,10 +1666,10 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 		}
 
 		/* Looks good so far, calculate parameters for the update */
-		incr = AMDGPU_GPU_PAGE_SIZE << shift;
+		incr = (uint64_t)AMDGPU_GPU_PAGE_SIZE << shift;
 		mask = amdgpu_vm_entries_mask(adev, cursor.level);
 		pe_start = ((cursor.pfn >> shift) & mask) * 8;
-		entry_end = (mask + 1) << shift;
+		entry_end = (uint64_t)(mask + 1) << shift;
 		entry_end += cursor.pfn & ~(entry_end - 1);
 		entry_end = min(entry_end, end);
 
@@ -1682,7 +1682,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 					      flags | AMDGPU_PTE_FRAG(frag));
 
 			pe_start += nptes * 8;
-			dst += nptes * AMDGPU_GPU_PAGE_SIZE << shift;
+			dst += (uint64_t)nptes * AMDGPU_GPU_PAGE_SIZE << shift;
 
 			frag_start = upd_end;
 			if (frag_start >= frag_end) {
-- 
GitLab


From 1954db153d181e32017804e353e09ffe669c000b Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Sat, 24 Nov 2018 23:25:04 -0500
Subject: [PATCH 1552/1890] drm/amdgpu: Avoid endless loop in GPUVM fragment
 processing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Don't bounce back to the root level for fragment processing, because
huge pages are not supported at that level. This is unlikely to happen
with the default VM size on Vega, but can be exposed by limiting the
VM size with the amdgpu.vm_size module parameter.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Christian KÃ¶nig <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index be3e360b04502..0877ff9a95944 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1656,9 +1656,11 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 			if (!amdgpu_vm_pt_descendant(adev, &cursor))
 				return -ENOENT;
 			continue;
-		} else if (frag >= parent_shift) {
+		} else if (frag >= parent_shift &&
+			   cursor.level - 1 != adev->vm_manager.root_level) {
 			/* If the fragment size is even larger than the parent
-			 * shift we should go up one level and check it again.
+			 * shift we should go up one level and check it again
+			 * unless one level up is the root level.
 			 */
 			if (!amdgpu_vm_pt_ancestor(&cursor))
 				return -ENOENT;
-- 
GitLab


From ad97d9de45835b6a0f71983b0ae0cffd7306730a Mon Sep 17 00:00:00 2001
From: shaoyunl <shaoyun.liu@amd.com>
Date: Thu, 22 Nov 2018 11:45:24 -0500
Subject: [PATCH 1553/1890] drm/amdgpu: Add delay after enable RLC ucode

Driver shouldn't try to access any GFX registers until RLC is idle.
During the test, it took 12 seconds for RLC to clear the BUSY bit
in RLC_GPM_STAT register which is un-acceptable for driver.
As per RLC engineer, it would take RLC Ucode less than 10,000 GFXCLK
cycles to finish its critical section. In a lowest 300M enginer clock
setting(default from vbios), 50 us delay is enough.

This commit fix the hang when RLC introduce the work around for XGMI
which requires more cycles to setup more registers than normal

Signed-off-by: shaoyunl <shaoyun.liu@amd.com>
Acked-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 6d7baf59d6e11..21363b2b2ee57 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -2440,12 +2440,13 @@ static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
 #endif
 
 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
+	udelay(50);
 
 	/* carrizo do enable cp interrupt after cp inited */
-	if (!(adev->flags & AMD_IS_APU))
+	if (!(adev->flags & AMD_IS_APU)) {
 		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
-
-	udelay(50);
+		udelay(50);
+	}
 
 #ifdef AMDGPU_RLC_DEBUG_RETRY
 	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
-- 
GitLab


From 9765635b30756eb74e05e260ac812659c296cd28 Mon Sep 17 00:00:00 2001
From: Lyude Paul <lyude@redhat.com>
Date: Wed, 28 Nov 2018 16:00:05 -0500
Subject: [PATCH 1554/1890] Revert "drm/dp_mst: Skip validating ports during
 destruction, just ref"

This reverts commit:

c54c7374ff44 ("drm/dp_mst: Skip validating ports during destruction, just ref")

ugh.

In drm_dp_destroy_connector_work(), we have a pretty good chance of
freeing the actual struct drm_dp_mst_port. However, after destroying
things we send a hotplug through (*mgr->cbs->hotplug)(mgr) which is
where the problems start.

For i915, this calls all the way down to the fbcon probing helpers,
which start trying to access the port in a modeset.

[   45.062001] ==================================================================
[   45.062112] BUG: KASAN: use-after-free in ex_handler_refcount+0x146/0x180
[   45.062196] Write of size 4 at addr ffff8882b4b70968 by task kworker/3:1/53

[   45.062325] CPU: 3 PID: 53 Comm: kworker/3:1 Kdump: loaded Tainted: G           O      4.20.0-rc4Lyude-Test+ #3
[   45.062442] Hardware name: LENOVO 20BWS1KY00/20BWS1KY00, BIOS JBET71WW (1.35 ) 09/14/2018
[   45.062554] Workqueue: events drm_dp_destroy_connector_work [drm_kms_helper]
[   45.062641] Call Trace:
[   45.062685]  dump_stack+0xbd/0x15a
[   45.062735]  ? dump_stack_print_info.cold.0+0x1b/0x1b
[   45.062801]  ? printk+0x9f/0xc5
[   45.062847]  ? kmsg_dump_rewind_nolock+0xe4/0xe4
[   45.062909]  ? ex_handler_refcount+0x146/0x180
[   45.062970]  print_address_description+0x71/0x239
[   45.063036]  ? ex_handler_refcount+0x146/0x180
[   45.063095]  kasan_report.cold.5+0x242/0x30b
[   45.063155]  __asan_report_store4_noabort+0x1c/0x20
[   45.063313]  ex_handler_refcount+0x146/0x180
[   45.063371]  ? ex_handler_clear_fs+0xb0/0xb0
[   45.063428]  fixup_exception+0x98/0xd7
[   45.063484]  ? raw_notifier_call_chain+0x20/0x20
[   45.063548]  do_trap+0x6d/0x210
[   45.063605]  ? _GLOBAL__sub_I_65535_1_drm_dp_aux_unregister_devnode+0x2f/0x1c6 [drm_kms_helper]
[   45.063732]  do_error_trap+0xc0/0x170
[   45.063802]  ? _GLOBAL__sub_I_65535_1_drm_dp_aux_unregister_devnode+0x2f/0x1c6 [drm_kms_helper]
[   45.063929]  do_invalid_op+0x3b/0x50
[   45.063997]  ? _GLOBAL__sub_I_65535_1_drm_dp_aux_unregister_devnode+0x2f/0x1c6 [drm_kms_helper]
[   45.064103]  invalid_op+0x14/0x20
[   45.064162] RIP: 0010:_GLOBAL__sub_I_65535_1_drm_dp_aux_unregister_devnode+0x2f/0x1c6 [drm_kms_helper]
[   45.064274] Code: 00 48 c7 c7 80 fe 53 a0 48 89 e5 e8 5b 6f 26 e1 5d c3 48 8d 0e 0f 0b 48 8d 0b 0f 0b 48 8d 0f 0f 0b 48 8d 0f 0f 0b 49 8d 4d 00 <0f> 0b 49 8d 0e 0f 0b 48 8d 08 0f 0b 49 8d 4d 00 0f 0b 48 8d 0b 0f
[   45.064569] RSP: 0018:ffff8882b789ee10 EFLAGS: 00010282
[   45.064637] RAX: ffff8882af47ae70 RBX: ffff8882af47aa60 RCX: ffff8882b4b70968
[   45.064723] RDX: ffff8882af47ae70 RSI: 0000000000000008 RDI: ffff8882b788bdb8
[   45.064808] RBP: ffff8882b789ee28 R08: ffffed1056f13db4 R09: ffffed1056f13db3
[   45.064894] R10: ffffed1056f13db3 R11: ffff8882b789ed9f R12: ffff8882af47ad28
[   45.064980] R13: ffff8882b4b70968 R14: ffff8882acd86728 R15: ffff8882b4b75dc8
[   45.065084]  drm_dp_mst_reset_vcpi_slots+0x12/0x80 [drm_kms_helper]
[   45.065225]  intel_mst_disable_dp+0xda/0x180 [i915]
[   45.065361]  intel_encoders_disable.isra.107+0x197/0x310 [i915]
[   45.065498]  haswell_crtc_disable+0xbe/0x400 [i915]
[   45.065622]  ? i9xx_disable_plane+0x1c0/0x3e0 [i915]
[   45.065750]  intel_atomic_commit_tail+0x74e/0x3e60 [i915]
[   45.065884]  ? intel_pre_plane_update+0xbc0/0xbc0 [i915]
[   45.065968]  ? drm_atomic_helper_swap_state+0x88b/0x1d90 [drm_kms_helper]
[   45.066054]  ? kasan_check_write+0x14/0x20
[   45.066165]  ? i915_gem_track_fb+0x13a/0x330 [i915]
[   45.066277]  ? i915_sw_fence_complete+0xe9/0x140 [i915]
[   45.066406]  ? __i915_sw_fence_complete+0xc50/0xc50 [i915]
[   45.066540]  intel_atomic_commit+0x72e/0xef0 [i915]
[   45.066635]  ? drm_dev_dbg+0x200/0x200 [drm]
[   45.066764]  ? intel_atomic_commit_tail+0x3e60/0x3e60 [i915]
[   45.066898]  ? intel_atomic_commit_tail+0x3e60/0x3e60 [i915]
[   45.067001]  drm_atomic_commit+0xc4/0xf0 [drm]
[   45.067074]  restore_fbdev_mode_atomic+0x562/0x780 [drm_kms_helper]
[   45.067166]  ? drm_fb_helper_debug_leave+0x690/0x690 [drm_kms_helper]
[   45.067249]  ? kasan_check_read+0x11/0x20
[   45.067324]  restore_fbdev_mode+0x127/0x4b0 [drm_kms_helper]
[   45.067364]  ? kasan_check_read+0x11/0x20
[   45.067406]  drm_fb_helper_restore_fbdev_mode_unlocked+0x164/0x200 [drm_kms_helper]
[   45.067462]  ? drm_fb_helper_hotplug_event+0x30/0x30 [drm_kms_helper]
[   45.067508]  ? kasan_check_write+0x14/0x20
[   45.070360]  ? mutex_unlock+0x22/0x40
[   45.073748]  drm_fb_helper_set_par+0xb2/0xf0 [drm_kms_helper]
[   45.075846]  drm_fb_helper_hotplug_event.part.33+0x1cd/0x290 [drm_kms_helper]
[   45.078088]  drm_fb_helper_hotplug_event+0x1c/0x30 [drm_kms_helper]
[   45.082614]  intel_fbdev_output_poll_changed+0x9f/0x140 [i915]
[   45.087069]  drm_kms_helper_hotplug_event+0x67/0x90 [drm_kms_helper]
[   45.089319]  intel_dp_mst_hotplug+0x37/0x50 [i915]
[   45.091496]  drm_dp_destroy_connector_work+0x510/0x6f0 [drm_kms_helper]
[   45.093675]  ? drm_dp_update_payload_part1+0x1220/0x1220 [drm_kms_helper]
[   45.095851]  ? kasan_check_write+0x14/0x20
[   45.098473]  ? kasan_check_read+0x11/0x20
[   45.101155]  ? strscpy+0x17c/0x530
[   45.103808]  ? __switch_to_asm+0x34/0x70
[   45.106456]  ? syscall_return_via_sysret+0xf/0x7f
[   45.109711]  ? read_word_at_a_time+0x20/0x20
[   45.113138]  ? __switch_to_asm+0x40/0x70
[   45.116529]  ? __switch_to_asm+0x34/0x70
[   45.119891]  ? __switch_to_asm+0x40/0x70
[   45.123224]  ? __switch_to_asm+0x34/0x70
[   45.126540]  ? __switch_to_asm+0x34/0x70
[   45.129824]  process_one_work+0x88d/0x15d0
[   45.133172]  ? pool_mayday_timeout+0x850/0x850
[   45.136459]  ? pci_mmcfg_check_reserved+0x110/0x128
[   45.139739]  ? wake_q_add+0xb0/0xb0
[   45.143010]  ? check_preempt_wakeup+0x652/0x1050
[   45.146304]  ? worker_enter_idle+0x29e/0x740
[   45.149589]  ? __schedule+0x1ec0/0x1ec0
[   45.152937]  ? kasan_check_read+0x11/0x20
[   45.156179]  ? _raw_spin_lock_irq+0xa3/0x130
[   45.159382]  ? _raw_read_unlock_irqrestore+0x30/0x30
[   45.162542]  ? kasan_check_write+0x14/0x20
[   45.165657]  worker_thread+0x1a5/0x1470
[   45.168725]  ? set_load_weight+0x2e0/0x2e0
[   45.171755]  ? process_one_work+0x15d0/0x15d0
[   45.174806]  ? __switch_to_asm+0x34/0x70
[   45.177645]  ? __switch_to_asm+0x40/0x70
[   45.180323]  ? __switch_to_asm+0x34/0x70
[   45.182936]  ? __switch_to_asm+0x40/0x70
[   45.185539]  ? __switch_to_asm+0x34/0x70
[   45.188100]  ? __switch_to_asm+0x40/0x70
[   45.190628]  ? __schedule+0x7d4/0x1ec0
[   45.193143]  ? save_stack+0xa9/0xd0
[   45.195632]  ? kasan_check_write+0x10/0x20
[   45.198162]  ? kasan_kmalloc+0xc4/0xe0
[   45.200609]  ? kmem_cache_alloc_trace+0xdd/0x190
[   45.203046]  ? kthread+0x9f/0x3b0
[   45.205470]  ? ret_from_fork+0x35/0x40
[   45.207876]  ? unwind_next_frame+0x43/0x50
[   45.210273]  ? __save_stack_trace+0x82/0x100
[   45.212658]  ? deactivate_slab.isra.67+0x3d4/0x580
[   45.215026]  ? default_wake_function+0x35/0x50
[   45.217399]  ? kasan_check_read+0x11/0x20
[   45.219825]  ? _raw_spin_lock_irqsave+0xae/0x140
[   45.222174]  ? __lock_text_start+0x8/0x8
[   45.224521]  ? replenish_dl_entity.cold.62+0x4f/0x4f
[   45.226868]  ? __kthread_parkme+0x87/0xf0
[   45.229200]  kthread+0x2f7/0x3b0
[   45.231557]  ? process_one_work+0x15d0/0x15d0
[   45.233923]  ? kthread_park+0x120/0x120
[   45.236249]  ret_from_fork+0x35/0x40

[   45.240875] Allocated by task 242:
[   45.243136]  save_stack+0x43/0xd0
[   45.245385]  kasan_kmalloc+0xc4/0xe0
[   45.247597]  kmem_cache_alloc_trace+0xdd/0x190
[   45.249793]  drm_dp_add_port+0x1e0/0x2170 [drm_kms_helper]
[   45.252000]  drm_dp_send_link_address+0x4a7/0x740 [drm_kms_helper]
[   45.254389]  drm_dp_check_and_send_link_address+0x1a7/0x210 [drm_kms_helper]
[   45.256803]  drm_dp_mst_link_probe_work+0x6f/0xb0 [drm_kms_helper]
[   45.259200]  process_one_work+0x88d/0x15d0
[   45.261597]  worker_thread+0x1a5/0x1470
[   45.264038]  kthread+0x2f7/0x3b0
[   45.266371]  ret_from_fork+0x35/0x40

[   45.270937] Freed by task 53:
[   45.273170]  save_stack+0x43/0xd0
[   45.275382]  __kasan_slab_free+0x139/0x190
[   45.277604]  kasan_slab_free+0xe/0x10
[   45.279826]  kfree+0x99/0x1b0
[   45.282044]  drm_dp_free_mst_port+0x4a/0x60 [drm_kms_helper]
[   45.284330]  drm_dp_destroy_connector_work+0x43e/0x6f0 [drm_kms_helper]
[   45.286660]  process_one_work+0x88d/0x15d0
[   45.288934]  worker_thread+0x1a5/0x1470
[   45.291231]  kthread+0x2f7/0x3b0
[   45.293547]  ret_from_fork+0x35/0x40

[   45.298206] The buggy address belongs to the object at ffff8882b4b70968
                which belongs to the cache kmalloc-2k of size 2048
[   45.303047] The buggy address is located 0 bytes inside of
                2048-byte region [ffff8882b4b70968, ffff8882b4b71168)
[   45.308010] The buggy address belongs to the page:
[   45.310477] page:ffffea000ad2dc00 count:1 mapcount:0 mapping:ffff8882c080cf40 index:0x0 compound_mapcount: 0
[   45.313051] flags: 0x8000000000010200(slab|head)
[   45.315635] raw: 8000000000010200 ffffea000aac2808 ffffea000abe8608 ffff8882c080cf40
[   45.318300] raw: 0000000000000000 00000000000d000d 00000001ffffffff 0000000000000000
[   45.320966] page dumped because: kasan: bad access detected

[   45.326312] Memory state around the buggy address:
[   45.329085]  ffff8882b4b70800: fb fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[   45.331845]  ffff8882b4b70880: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
[   45.334584] >ffff8882b4b70900: fc fc fc fc fc fc fc fc fc fc fc fc fc fb fb fb
[   45.337302]                                                           ^
[   45.340061]  ffff8882b4b70980: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[   45.342910]  ffff8882b4b70a00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[   45.345748] ==================================================================

So, this definitely isn't a fix that we want. This being said; there's
no real easy fix for this problem because of some of the catch-22's of
the MST helpers current design. For starters; we always need to validate
a port with drm_dp_get_validated_port_ref(), but validation relies on
the lifetime of the port in the actual topology. So once the port is
gone, it can't be validated again.

If we were to try to make the payload helpers not use port validation,
then we'd cause another problem: if the port isn't validated, it could
be freed and we'd just start causing more KASAN issues. There are
already hacks that attempt to workaround this in
drm_dp_mst_destroy_connector_work() by re-initializing the kref so that
it can be used again and it's memory can be freed once the VCPI helpers
finish removing the port's respective payloads. But none of these really
do anything helpful since the port still can't be validated since it's
gone from the topology. Also, that workaround is immensely confusing to
read through.

What really needs to be done in order to fix this is to teach DRM how to
track the lifetime of the structs for MST ports and branch devices
separately from their lifetime in the actual topology. Simply put; this
means having two different krefs-one that removes the port/branch device
from the topology, and one that finally calls kfree(). This would let us
simplify things, since we'd now be able to keep ports around without
having to keep them in the topology at the same time, which is exactly
what we need in order to teach our VCPI helpers to only validate ports
when it's actually necessary without running the risk of trying to use
unallocated memory.

Such a fix is on it's way, but for now let's play it safe and just
revert this. If this bug has been around for well over a year, we can
wait a little while to get an actual proper fix here.

Signed-off-by: Lyude Paul <lyude@redhat.com>
Fixes: c54c7374ff44 ("drm/dp_mst: Skip validating ports during destruction, just ref")
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Sean Paul <sean@poorly.run>
Cc: Jerry Zuo <Jerry.Zuo@amd.com>
Cc: Harry Wentland <Harry.Wentland@amd.com>
Cc: stable@vger.kernel.org # v4.6+
Acked-by: Sean Paul <sean@poorly.run>
Link: https://patchwork.freedesktop.org/patch/msgid/20181128210005.24434-1-lyude@redhat.com
---
 drivers/gpu/drm/drm_dp_mst_topology.c | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
index 250d7160826ff..0e0df398222d1 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -1023,20 +1023,9 @@ static struct drm_dp_mst_port *drm_dp_mst_get_port_ref_locked(struct drm_dp_mst_
 static struct drm_dp_mst_port *drm_dp_get_validated_port_ref(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_port *port)
 {
 	struct drm_dp_mst_port *rport = NULL;
-
 	mutex_lock(&mgr->lock);
-	/*
-	 * Port may or may not be 'valid' but we don't care about that when
-	 * destroying the port and we are guaranteed that the port pointer
-	 * will be valid until we've finished
-	 */
-	if (current_work() == &mgr->destroy_connector_work) {
-		kref_get(&port->kref);
-		rport = port;
-	} else if (mgr->mst_primary) {
-		rport = drm_dp_mst_get_port_ref_locked(mgr->mst_primary,
-						       port);
-	}
+	if (mgr->mst_primary)
+		rport = drm_dp_mst_get_port_ref_locked(mgr->mst_primary, port);
 	mutex_unlock(&mgr->lock);
 	return rport;
 }
-- 
GitLab


From 5db6dd14b31397e8cccaaddab2ff44ebec1acf25 Mon Sep 17 00:00:00 2001
From: Fred Herard <fred.herard@oracle.com>
Date: Tue, 20 Nov 2018 20:22:45 -0500
Subject: [PATCH 1555/1890] scsi: libiscsi: Fix NULL pointer dereference in
 iscsi_eh_session_reset

This commit addresses NULL pointer dereference in iscsi_eh_session_reset.
Reference should not be made to session->leadconn when session->state is
set to ISCSI_STATE_TERMINATE.

Signed-off-by: Fred Herard <fred.herard@oracle.com>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Reviewed-by: Lee Duncan <lduncan@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/libiscsi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 93c66ebad907e..f78d2e5c1471d 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -2416,8 +2416,8 @@ int iscsi_eh_session_reset(struct scsi_cmnd *sc)
 failed:
 		ISCSI_DBG_EH(session,
 			     "failing session reset: Could not log back into "
-			     "%s, %s [age %d]\n", session->targetname,
-			     conn->persistent_address, session->age);
+			     "%s [age %d]\n", session->targetname,
+			     session->age);
 		spin_unlock_bh(&session->frwd_lock);
 		mutex_unlock(&session->eh_mutex);
 		return FAILED;
-- 
GitLab


From dc25ab067645eabd037f1a23d49a666f9e0b8c68 Mon Sep 17 00:00:00 2001
From: Sam Bobroff <sbobroff@linux.ibm.com>
Date: Mon, 5 Nov 2018 16:57:47 +1100
Subject: [PATCH 1556/1890] drm/ast: Fix incorrect free on ioregs

If the platform has no IO space, ioregs is placed next to the already
allocated regs. In this case, it should not be separately freed.

This prevents a kernel warning from __vunmap "Trying to vfree()
nonexistent vm area" when unloading the driver.

Fixes: 0dd68309b9c5 ("drm/ast: Try to use MMIO registers when PIO isn't supported")

Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/ast/ast_main.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c
index dac355812adcb..373700c05a00f 100644
--- a/drivers/gpu/drm/ast/ast_main.c
+++ b/drivers/gpu/drm/ast/ast_main.c
@@ -583,7 +583,8 @@ void ast_driver_unload(struct drm_device *dev)
 	drm_mode_config_cleanup(dev);
 
 	ast_mm_fini(ast);
-	pci_iounmap(dev->pdev, ast->ioregs);
+	if (ast->ioregs != ast->regs + AST_IO_MM_OFFSET)
+		pci_iounmap(dev->pdev, ast->ioregs);
 	pci_iounmap(dev->pdev, ast->regs);
 	kfree(ast);
 }
-- 
GitLab


From 300625620314194d9e6d4f6dda71f2dc9cf62d9f Mon Sep 17 00:00:00 2001
From: "Y.C. Chen" <yc_chen@aspeedtech.com>
Date: Thu, 22 Nov 2018 11:56:28 +0800
Subject: [PATCH 1557/1890] drm/ast: fixed reading monitor EDID not stable
 issue

v1: over-sample data to increase the stability with some specific monitors
v2: refine to avoid infinite loop
v3: remove un-necessary "volatile" declaration

[airlied: fix two checkpatch warnings]

Signed-off-by: Y.C. Chen <yc_chen@aspeedtech.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1542858988-1127-1-git-send-email-yc_chen@aspeedtech.com
---
 drivers/gpu/drm/ast/ast_mode.c | 36 ++++++++++++++++++++++++++++------
 1 file changed, 30 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c
index 7c6ac3cadb6b7..8bb355d5d43d8 100644
--- a/drivers/gpu/drm/ast/ast_mode.c
+++ b/drivers/gpu/drm/ast/ast_mode.c
@@ -973,9 +973,21 @@ static int get_clock(void *i2c_priv)
 {
 	struct ast_i2c_chan *i2c = i2c_priv;
 	struct ast_private *ast = i2c->dev->dev_private;
-	uint32_t val;
+	uint32_t val, val2, count, pass;
+
+	count = 0;
+	pass = 0;
+	val = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x10) >> 4) & 0x01;
+	do {
+		val2 = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x10) >> 4) & 0x01;
+		if (val == val2) {
+			pass++;
+		} else {
+			pass = 0;
+			val = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x10) >> 4) & 0x01;
+		}
+	} while ((pass < 5) && (count++ < 0x10000));
 
-	val = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x10) >> 4;
 	return val & 1 ? 1 : 0;
 }
 
@@ -983,9 +995,21 @@ static int get_data(void *i2c_priv)
 {
 	struct ast_i2c_chan *i2c = i2c_priv;
 	struct ast_private *ast = i2c->dev->dev_private;
-	uint32_t val;
+	uint32_t val, val2, count, pass;
+
+	count = 0;
+	pass = 0;
+	val = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x20) >> 5) & 0x01;
+	do {
+		val2 = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x20) >> 5) & 0x01;
+		if (val == val2) {
+			pass++;
+		} else {
+			pass = 0;
+			val = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x20) >> 5) & 0x01;
+		}
+	} while ((pass < 5) && (count++ < 0x10000));
 
-	val = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x20) >> 5;
 	return val & 1 ? 1 : 0;
 }
 
@@ -998,7 +1022,7 @@ static void set_clock(void *i2c_priv, int clock)
 
 	for (i = 0; i < 0x10000; i++) {
 		ujcrb7 = ((clock & 0x01) ? 0 : 1);
-		ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0xfe, ujcrb7);
+		ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0xf4, ujcrb7);
 		jtemp = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x01);
 		if (ujcrb7 == jtemp)
 			break;
@@ -1014,7 +1038,7 @@ static void set_data(void *i2c_priv, int data)
 
 	for (i = 0; i < 0x10000; i++) {
 		ujcrb7 = ((data & 0x01) ? 0 : 1) << 2;
-		ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0xfb, ujcrb7);
+		ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0xf1, ujcrb7);
 		jtemp = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x04);
 		if (ujcrb7 == jtemp)
 			break;
-- 
GitLab


From 02f425f811cefcc4d325d7a72272651e622dc97e Mon Sep 17 00:00:00 2001
From: Cathy Avery <cavery@redhat.com>
Date: Tue, 27 Nov 2018 14:28:53 -0500
Subject: [PATCH 1558/1890] scsi: vmw_pscsi: Rearrange code to avoid multiple
 calls to free_irq during unload

Currently pvscsi_remove calls free_irq more than once as
pvscsi_release_resources and __pvscsi_shutdown both call
pvscsi_shutdown_intr. This results in a 'Trying to free already-free IRQ'
warning and stack trace. To solve the problem pvscsi_shutdown_intr has been
moved out of pvscsi_release_resources.

Signed-off-by: Cathy Avery <cavery@redhat.com>
Reviewed-by: Ewan D. Milne <emilne@redhat.com>
Reviewed-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/vmw_pvscsi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/vmw_pvscsi.c b/drivers/scsi/vmw_pvscsi.c
index 6e491023fdd88..0d6b2a88fc8e2 100644
--- a/drivers/scsi/vmw_pvscsi.c
+++ b/drivers/scsi/vmw_pvscsi.c
@@ -1202,8 +1202,6 @@ static void pvscsi_shutdown_intr(struct pvscsi_adapter *adapter)
 
 static void pvscsi_release_resources(struct pvscsi_adapter *adapter)
 {
-	pvscsi_shutdown_intr(adapter);
-
 	if (adapter->workqueue)
 		destroy_workqueue(adapter->workqueue);
 
@@ -1534,6 +1532,7 @@ static int pvscsi_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 out_reset_adapter:
 	ll_adapter_reset(adapter);
 out_release_resources:
+	pvscsi_shutdown_intr(adapter);
 	pvscsi_release_resources(adapter);
 	scsi_host_put(host);
 out_disable_device:
@@ -1542,6 +1541,7 @@ static int pvscsi_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	return error;
 
 out_release_resources_and_disable:
+	pvscsi_shutdown_intr(adapter);
 	pvscsi_release_resources(adapter);
 	goto out_disable_device;
 }
-- 
GitLab


From cdbb096adddb3f42584cecb5ec2e07c26815b71f Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 27 Nov 2018 13:23:27 -0800
Subject: [PATCH 1559/1890] bpf: btf: implement btf_name_valid_identifier()

Function btf_name_valid_identifier() have been implemented in
bpf-next commit 2667a2626f4d ("bpf: btf: Add BTF_KIND_FUNC and
BTF_KIND_FUNC_PROTO"). Backport this function so later patch
can use it.

Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/btf.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index ee4c82667d659..93c233ab2db66 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -5,6 +5,7 @@
 #include <uapi/linux/types.h>
 #include <linux/seq_file.h>
 #include <linux/compiler.h>
+#include <linux/ctype.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
 #include <linux/anon_inodes.h>
@@ -426,6 +427,30 @@ static bool btf_name_offset_valid(const struct btf *btf, u32 offset)
 		offset < btf->hdr.str_len;
 }
 
+/* Only C-style identifier is permitted. This can be relaxed if
+ * necessary.
+ */
+static bool btf_name_valid_identifier(const struct btf *btf, u32 offset)
+{
+	/* offset must be valid */
+	const char *src = &btf->strings[offset];
+	const char *src_limit;
+
+	if (!isalpha(*src) && *src != '_')
+		return false;
+
+	/* set a limit on identifier length */
+	src_limit = src + KSYM_NAME_LEN;
+	src++;
+	while (*src && src < src_limit) {
+		if (!isalnum(*src) && *src != '_')
+			return false;
+		src++;
+	}
+
+	return !*src;
+}
+
 static const char *btf_name_by_offset(const struct btf *btf, u32 offset)
 {
 	if (!offset)
-- 
GitLab


From eb04bbb608e683f8fd3ef7f716e2fa32dd90861f Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 27 Nov 2018 13:23:28 -0800
Subject: [PATCH 1560/1890] bpf: btf: check name validity for various types

This patch added name checking for the following types:
 . BTF_KIND_PTR, BTF_KIND_ARRAY, BTF_KIND_VOLATILE,
   BTF_KIND_CONST, BTF_KIND_RESTRICT:
     the name must be null
 . BTF_KIND_STRUCT, BTF_KIND_UNION: the struct/member name
     is either null or a valid identifier
 . BTF_KIND_ENUM: the enum type name is either null or a valid
     identifier; the enumerator name must be a valid identifier.
 . BTF_KIND_FWD: the name must be a valid identifier
 . BTF_KIND_TYPEDEF: the name must be a valid identifier

For those places a valid name is required, the name must be
a valid C identifier. This can be relaxed later if we found
use cases for a different (non-C) frontend.

Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/btf.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)

diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 93c233ab2db66..4da543d6bea29 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -1168,6 +1168,22 @@ static int btf_ref_type_check_meta(struct btf_verifier_env *env,
 		return -EINVAL;
 	}
 
+	/* typedef type must have a valid name, and other ref types,
+	 * volatile, const, restrict, should have a null name.
+	 */
+	if (BTF_INFO_KIND(t->info) == BTF_KIND_TYPEDEF) {
+		if (!t->name_off ||
+		    !btf_name_valid_identifier(env->btf, t->name_off)) {
+			btf_verifier_log_type(env, t, "Invalid name");
+			return -EINVAL;
+		}
+	} else {
+		if (t->name_off) {
+			btf_verifier_log_type(env, t, "Invalid name");
+			return -EINVAL;
+		}
+	}
+
 	btf_verifier_log_type(env, t, NULL);
 
 	return 0;
@@ -1325,6 +1341,13 @@ static s32 btf_fwd_check_meta(struct btf_verifier_env *env,
 		return -EINVAL;
 	}
 
+	/* fwd type must have a valid name */
+	if (!t->name_off ||
+	    !btf_name_valid_identifier(env->btf, t->name_off)) {
+		btf_verifier_log_type(env, t, "Invalid name");
+		return -EINVAL;
+	}
+
 	btf_verifier_log_type(env, t, NULL);
 
 	return 0;
@@ -1381,6 +1404,12 @@ static s32 btf_array_check_meta(struct btf_verifier_env *env,
 		return -EINVAL;
 	}
 
+	/* array type should not have a name */
+	if (t->name_off) {
+		btf_verifier_log_type(env, t, "Invalid name");
+		return -EINVAL;
+	}
+
 	if (btf_type_vlen(t)) {
 		btf_verifier_log_type(env, t, "vlen != 0");
 		return -EINVAL;
@@ -1557,6 +1586,13 @@ static s32 btf_struct_check_meta(struct btf_verifier_env *env,
 		return -EINVAL;
 	}
 
+	/* struct type either no name or a valid one */
+	if (t->name_off &&
+	    !btf_name_valid_identifier(env->btf, t->name_off)) {
+		btf_verifier_log_type(env, t, "Invalid name");
+		return -EINVAL;
+	}
+
 	btf_verifier_log_type(env, t, NULL);
 
 	last_offset = 0;
@@ -1568,6 +1604,12 @@ static s32 btf_struct_check_meta(struct btf_verifier_env *env,
 			return -EINVAL;
 		}
 
+		/* struct member either no name or a valid one */
+		if (member->name_off &&
+		    !btf_name_valid_identifier(btf, member->name_off)) {
+			btf_verifier_log_member(env, t, member, "Invalid name");
+			return -EINVAL;
+		}
 		/* A member cannot be in type void */
 		if (!member->type || !BTF_TYPE_ID_VALID(member->type)) {
 			btf_verifier_log_member(env, t, member,
@@ -1755,6 +1797,13 @@ static s32 btf_enum_check_meta(struct btf_verifier_env *env,
 		return -EINVAL;
 	}
 
+	/* enum type either no name or a valid one */
+	if (t->name_off &&
+	    !btf_name_valid_identifier(env->btf, t->name_off)) {
+		btf_verifier_log_type(env, t, "Invalid name");
+		return -EINVAL;
+	}
+
 	btf_verifier_log_type(env, t, NULL);
 
 	for (i = 0; i < nr_enums; i++) {
@@ -1764,6 +1813,14 @@ static s32 btf_enum_check_meta(struct btf_verifier_env *env,
 			return -EINVAL;
 		}
 
+		/* enum member must have a valid name */
+		if (!enums[i].name_off ||
+		    !btf_name_valid_identifier(btf, enums[i].name_off)) {
+			btf_verifier_log_type(env, t, "Invalid name");
+			return -EINVAL;
+		}
+
+
 		btf_verifier_log(env, "\t%s val=%d\n",
 				 btf_name_by_offset(btf, enums[i].name_off),
 				 enums[i].val);
-- 
GitLab


From 8800cd031af085807028656c6ba7eb7908d78262 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Tue, 27 Nov 2018 13:23:29 -0800
Subject: [PATCH 1561/1890] tools/bpf: fix two test_btf unit test cases

There are two unit test cases, which should encode
TYPEDEF type, but instead encode PTR type.
The error is flagged out after enforcing name
checking in the previous patch.

Fixes: c0fa1b6c3efc ("bpf: btf: Add BTF tests")
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_btf.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
index f42b3396d6226..b361bb8518291 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -432,11 +432,11 @@ static struct btf_raw_test raw_tests[] = {
 		/* const void* */	/* [3] */
 		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2),
 		/* typedef const void * const_void_ptr */
-		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3),
-		/* struct A { */	/* [4] */
+		BTF_TYPEDEF_ENC(NAME_TBD, 3),	/* [4] */
+		/* struct A { */	/* [5] */
 		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)),
 		/* const_void_ptr m; */
-		BTF_MEMBER_ENC(NAME_TBD, 3, 0),
+		BTF_MEMBER_ENC(NAME_TBD, 4, 0),
 		/* } */
 		BTF_END_RAW,
 	},
@@ -494,10 +494,10 @@ static struct btf_raw_test raw_tests[] = {
 		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
 		/* const void* */	/* [3] */
 		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2),
-		/* typedef const void * const_void_ptr */	/* [4] */
-		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3),
-		/* const_void_ptr[4] */	/* [5] */
-		BTF_TYPE_ARRAY_ENC(3, 1, 4),
+		/* typedef const void * const_void_ptr */
+		BTF_TYPEDEF_ENC(NAME_TBD, 3),	/* [4] */
+		/* const_void_ptr[4] */
+		BTF_TYPE_ARRAY_ENC(4, 1, 4),	/* [5] */
 		BTF_END_RAW,
 	},
 	.str_sec = "\0const_void_ptr",
-- 
GitLab


From d08489125e04a9f73d9323caea43270fd22d395f Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 27 Nov 2018 13:23:30 -0800
Subject: [PATCH 1562/1890] tools/bpf: add addition type tests to test_btf

The following additional unit testcases are added to test_btf:
...
BTF raw test[42] (typedef (invalid name, name_off = 0)): OK
BTF raw test[43] (typedef (invalid name, invalid identifier)): OK
BTF raw test[44] (ptr type (invalid name, name_off <> 0)): OK
BTF raw test[45] (volatile type (invalid name, name_off <> 0)): OK
BTF raw test[46] (const type (invalid name, name_off <> 0)): OK
BTF raw test[47] (restrict type (invalid name, name_off <> 0)): OK
BTF raw test[48] (fwd type (invalid name, name_off = 0)): OK
BTF raw test[49] (fwd type (invalid name, invalid identifier)): OK
BTF raw test[50] (array type (invalid name, name_off <> 0)): OK
BTF raw test[51] (struct type (name_off = 0)): OK
BTF raw test[52] (struct type (invalid name, invalid identifier)): OK
BTF raw test[53] (struct member (name_off = 0)): OK
BTF raw test[54] (struct member (invalid name, invalid identifier)): OK
BTF raw test[55] (enum type (name_off = 0)): OK
BTF raw test[56] (enum type (invalid name, invalid identifier)): OK
BTF raw test[57] (enum member (invalid name, name_off = 0)): OK
BTF raw test[58] (enum member (invalid name, invalid identifier)): OK
...

Fixes: c0fa1b6c3efc ("bpf: btf: Add BTF tests")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_btf.c | 361 +++++++++++++++++++++++++
 1 file changed, 361 insertions(+)

diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
index b361bb8518291..38e1cbaaffdbb 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -1292,6 +1292,367 @@ static struct btf_raw_test raw_tests[] = {
 	.err_str = "type != 0",
 },
 
+{
+	.descr = "typedef (invalid name, name_off = 0)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_TYPEDEF_ENC(0, 1),				/* [2] */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0__int",
+	.str_sec_size = sizeof("\0__int"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "typedef_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid name",
+},
+
+{
+	.descr = "typedef (invalid name, invalid identifier)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_TYPEDEF_ENC(NAME_TBD, 1),			/* [2] */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0__!int",
+	.str_sec_size = sizeof("\0__!int"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "typedef_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid name",
+},
+
+{
+	.descr = "ptr type (invalid name, name_off <> 0)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		BTF_TYPE_ENC(NAME_TBD,
+			     BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 1),	/* [2] */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0__int",
+	.str_sec_size = sizeof("\0__int"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "ptr_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid name",
+},
+
+{
+	.descr = "volatile type (invalid name, name_off <> 0)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		BTF_TYPE_ENC(NAME_TBD,
+			     BTF_INFO_ENC(BTF_KIND_VOLATILE, 0, 0), 1),	/* [2] */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0__int",
+	.str_sec_size = sizeof("\0__int"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "volatile_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid name",
+},
+
+{
+	.descr = "const type (invalid name, name_off <> 0)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		BTF_TYPE_ENC(NAME_TBD,
+			     BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 1),	/* [2] */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0__int",
+	.str_sec_size = sizeof("\0__int"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "const_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid name",
+},
+
+{
+	.descr = "restrict type (invalid name, name_off <> 0)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 1),	/* [2] */
+		BTF_TYPE_ENC(NAME_TBD,
+			     BTF_INFO_ENC(BTF_KIND_RESTRICT, 0, 0), 2),	/* [3] */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0__int",
+	.str_sec_size = sizeof("\0__int"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "restrict_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid name",
+},
+
+{
+	.descr = "fwd type (invalid name, name_off = 0)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FWD, 0, 0), 0),	/* [2] */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0__skb",
+	.str_sec_size = sizeof("\0__skb"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "fwd_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid name",
+},
+
+{
+	.descr = "fwd type (invalid name, invalid identifier)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		BTF_TYPE_ENC(NAME_TBD,
+			     BTF_INFO_ENC(BTF_KIND_FWD, 0, 0), 0),	/* [2] */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0__!skb",
+	.str_sec_size = sizeof("\0__!skb"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "fwd_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid name",
+},
+
+{
+	.descr = "array type (invalid name, name_off <> 0)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		BTF_TYPE_ENC(NAME_TBD,
+			     BTF_INFO_ENC(BTF_KIND_ARRAY, 0, 0), 0),	/* [2] */
+		BTF_ARRAY_ENC(1, 1, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0__skb",
+	.str_sec_size = sizeof("\0__skb"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "array_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid name",
+},
+
+{
+	.descr = "struct type (name_off = 0)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		BTF_TYPE_ENC(0,
+			     BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4),	/* [2] */
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A",
+	.str_sec_size = sizeof("\0A"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "struct_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+},
+
+{
+	.descr = "struct type (invalid name, invalid identifier)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		BTF_TYPE_ENC(NAME_TBD,
+			     BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4),	/* [2] */
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A!\0B",
+	.str_sec_size = sizeof("\0A!\0B"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "struct_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid name",
+},
+
+{
+	.descr = "struct member (name_off = 0)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		BTF_TYPE_ENC(0,
+			     BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4),	/* [2] */
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A",
+	.str_sec_size = sizeof("\0A"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "struct_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+},
+
+{
+	.descr = "struct member (invalid name, invalid identifier)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		BTF_TYPE_ENC(NAME_TBD,
+			     BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4),	/* [2] */
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0B*",
+	.str_sec_size = sizeof("\0A\0B*"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "struct_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid name",
+},
+
+{
+	.descr = "enum type (name_off = 0)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		BTF_TYPE_ENC(0,
+			     BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1),
+			     sizeof(int)),				/* [2] */
+		BTF_ENUM_ENC(NAME_TBD, 0),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0B",
+	.str_sec_size = sizeof("\0A\0B"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "enum_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+},
+
+{
+	.descr = "enum type (invalid name, invalid identifier)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		BTF_TYPE_ENC(NAME_TBD,
+			     BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1),
+			     sizeof(int)),				/* [2] */
+		BTF_ENUM_ENC(NAME_TBD, 0),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A!\0B",
+	.str_sec_size = sizeof("\0A!\0B"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "enum_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid name",
+},
+
+{
+	.descr = "enum member (invalid name, name_off = 0)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		BTF_TYPE_ENC(0,
+			     BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1),
+			     sizeof(int)),				/* [2] */
+		BTF_ENUM_ENC(0, 0),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "enum_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid name",
+},
+
+{
+	.descr = "enum member (invalid name, invalid identifier)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		BTF_TYPE_ENC(0,
+			     BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1),
+			     sizeof(int)),				/* [2] */
+		BTF_ENUM_ENC(NAME_TBD, 0),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A!",
+	.str_sec_size = sizeof("\0A!"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "enum_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid name",
+},
 {
 	.descr = "arraymap invalid btf key (a bit field)",
 	.raw_types = {
-- 
GitLab


From 528bff0cdb6649f97f2c4802e4ac7a4b50645f2f Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Wed, 28 Nov 2018 09:38:23 -0800
Subject: [PATCH 1563/1890] tools: bpftool: fix a bitfield pretty print issue

Commit b12d6ec09730 ("bpf: btf: add btf print functionality")
added btf pretty print functionality to bpftool.
There is a problem though in printing a bitfield whose type
has modifiers.

For example, for a type like
  typedef int ___int;
  struct tmp_t {
          int a:3;
          ___int b:3;
  };
Suppose we have a map
  struct bpf_map_def SEC("maps") tmpmap = {
          .type = BPF_MAP_TYPE_HASH,
          .key_size = sizeof(__u32),
          .value_size = sizeof(struct tmp_t),
          .max_entries = 1,
  };
and the hash table is populated with one element with
key 0 and value (.a = 1 and .b = 2).

In BTF, the struct member "b" will have a type "typedef" which
points to an int type. The current implementation does not
pass the bit offset during transition from typedef to int type,
hence incorrectly print the value as
  $ bpftool m d id 79
  [{
          "key": 0,
          "value": {
              "a": 0x1,
              "b": 0x1
          }
      }
  ]

This patch fixed the issue by carrying bit_offset along the type
chain during bit_field print. The correct result can be printed as
  $ bpftool m d id 76
  [{
          "key": 0,
          "value": {
              "a": 0x1,
              "b": 0x2
          }
      }
  ]

The kernel pretty print is implemented correctly and does not
have this issue.

Fixes: b12d6ec09730 ("bpf: btf: add btf print functionality")
Signed-off-by: Yonghong Song <yhs@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/btf_dumper.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c
index 55bc512a18318..e4e6e2b3fd847 100644
--- a/tools/bpf/bpftool/btf_dumper.c
+++ b/tools/bpf/bpftool/btf_dumper.c
@@ -32,7 +32,7 @@ static void btf_dumper_ptr(const void *data, json_writer_t *jw,
 }
 
 static int btf_dumper_modifier(const struct btf_dumper *d, __u32 type_id,
-			       const void *data)
+			       __u8 bit_offset, const void *data)
 {
 	int actual_type_id;
 
@@ -40,7 +40,7 @@ static int btf_dumper_modifier(const struct btf_dumper *d, __u32 type_id,
 	if (actual_type_id < 0)
 		return actual_type_id;
 
-	return btf_dumper_do_type(d, actual_type_id, 0, data);
+	return btf_dumper_do_type(d, actual_type_id, bit_offset, data);
 }
 
 static void btf_dumper_enum(const void *data, json_writer_t *jw)
@@ -237,7 +237,7 @@ static int btf_dumper_do_type(const struct btf_dumper *d, __u32 type_id,
 	case BTF_KIND_VOLATILE:
 	case BTF_KIND_CONST:
 	case BTF_KIND_RESTRICT:
-		return btf_dumper_modifier(d, type_id, data);
+		return btf_dumper_modifier(d, type_id, bit_offset, data);
 	default:
 		jsonw_printf(d->jw, "(unsupported-kind");
 		return -EINVAL;
-- 
GitLab


From c967590457cae5ba4f018704c341641bdcecfdcf Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Mon, 26 Nov 2018 00:26:17 +0000
Subject: [PATCH 1564/1890] scsi: storvsc: Fix a race in sub-channel creation
 that can cause panic

We can concurrently try to open the same sub-channel from 2 paths:

path #1: vmbus_onoffer() -> vmbus_process_offer() -> handle_sc_creation().
path #2: storvsc_probe() -> storvsc_connect_to_vsp() ->
	 -> storvsc_channel_init() -> handle_multichannel_storage() ->
	 -> vmbus_are_subchannels_present() -> handle_sc_creation().

They conflict with each other, but it was not an issue before the recent
commit ae6935ed7d42 ("vmbus: split ring buffer allocation from open"),
because at the beginning of vmbus_open() we checked newchannel->state so
only one path could succeed, and the other would return with -EINVAL.

After ae6935ed7d42, the failing path frees the channel's ringbuffer by
vmbus_free_ring(), and this causes a panic later.

Commit ae6935ed7d42 itself is good, and it just reveals the longstanding
race. We can resolve the issue by removing path #2, i.e. removing the
second vmbus_are_subchannels_present() in handle_multichannel_storage().

BTW, the comment "Check to see if sub-channels have already been created"
in handle_multichannel_storage() is incorrect: when we unload the driver,
we first close the sub-channel(s) and then close the primary channel, next
the host sends rescind-offer message(s) so primary->sc_list will become
empty. This means the first vmbus_are_subchannels_present() in
handle_multichannel_storage() is never useful.

Fixes: ae6935ed7d42 ("vmbus: split ring buffer allocation from open")
Cc: stable@vger.kernel.org
Cc: Long Li <longli@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Cc: K. Y. Srinivasan <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/storvsc_drv.c | 61 +++++++++++++++++++-------------------
 1 file changed, 30 insertions(+), 31 deletions(-)

diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index f03dc03a42c35..8f88348ebe424 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -446,7 +446,6 @@ struct storvsc_device {
 
 	bool	 destroy;
 	bool	 drain_notify;
-	bool	 open_sub_channel;
 	atomic_t num_outstanding_req;
 	struct Scsi_Host *host;
 
@@ -636,33 +635,38 @@ static inline struct storvsc_device *get_in_stor_device(
 static void handle_sc_creation(struct vmbus_channel *new_sc)
 {
 	struct hv_device *device = new_sc->primary_channel->device_obj;
+	struct device *dev = &device->device;
 	struct storvsc_device *stor_device;
 	struct vmstorage_channel_properties props;
+	int ret;
 
 	stor_device = get_out_stor_device(device);
 	if (!stor_device)
 		return;
 
-	if (stor_device->open_sub_channel == false)
-		return;
-
 	memset(&props, 0, sizeof(struct vmstorage_channel_properties));
 
-	vmbus_open(new_sc,
-		   storvsc_ringbuffer_size,
-		   storvsc_ringbuffer_size,
-		   (void *)&props,
-		   sizeof(struct vmstorage_channel_properties),
-		   storvsc_on_channel_callback, new_sc);
+	ret = vmbus_open(new_sc,
+			 storvsc_ringbuffer_size,
+			 storvsc_ringbuffer_size,
+			 (void *)&props,
+			 sizeof(struct vmstorage_channel_properties),
+			 storvsc_on_channel_callback, new_sc);
 
-	if (new_sc->state == CHANNEL_OPENED_STATE) {
-		stor_device->stor_chns[new_sc->target_cpu] = new_sc;
-		cpumask_set_cpu(new_sc->target_cpu, &stor_device->alloced_cpus);
+	/* In case vmbus_open() fails, we don't use the sub-channel. */
+	if (ret != 0) {
+		dev_err(dev, "Failed to open sub-channel: err=%d\n", ret);
+		return;
 	}
+
+	/* Add the sub-channel to the array of available channels. */
+	stor_device->stor_chns[new_sc->target_cpu] = new_sc;
+	cpumask_set_cpu(new_sc->target_cpu, &stor_device->alloced_cpus);
 }
 
 static void  handle_multichannel_storage(struct hv_device *device, int max_chns)
 {
+	struct device *dev = &device->device;
 	struct storvsc_device *stor_device;
 	int num_cpus = num_online_cpus();
 	int num_sc;
@@ -679,21 +683,11 @@ static void  handle_multichannel_storage(struct hv_device *device, int max_chns)
 	request = &stor_device->init_request;
 	vstor_packet = &request->vstor_packet;
 
-	stor_device->open_sub_channel = true;
 	/*
 	 * Establish a handler for dealing with subchannels.
 	 */
 	vmbus_set_sc_create_callback(device->channel, handle_sc_creation);
 
-	/*
-	 * Check to see if sub-channels have already been created. This
-	 * can happen when this driver is re-loaded after unloading.
-	 */
-
-	if (vmbus_are_subchannels_present(device->channel))
-		return;
-
-	stor_device->open_sub_channel = false;
 	/*
 	 * Request the host to create sub-channels.
 	 */
@@ -710,23 +704,29 @@ static void  handle_multichannel_storage(struct hv_device *device, int max_chns)
 			       VM_PKT_DATA_INBAND,
 			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
 
-	if (ret != 0)
+	if (ret != 0) {
+		dev_err(dev, "Failed to create sub-channel: err=%d\n", ret);
 		return;
+	}
 
 	t = wait_for_completion_timeout(&request->wait_event, 10*HZ);
-	if (t == 0)
+	if (t == 0) {
+		dev_err(dev, "Failed to create sub-channel: timed out\n");
 		return;
+	}
 
 	if (vstor_packet->operation != VSTOR_OPERATION_COMPLETE_IO ||
-	    vstor_packet->status != 0)
+	    vstor_packet->status != 0) {
+		dev_err(dev, "Failed to create sub-channel: op=%d, sts=%d\n",
+			vstor_packet->operation, vstor_packet->status);
 		return;
+	}
 
 	/*
-	 * Now that we created the sub-channels, invoke the check; this
-	 * may trigger the callback.
+	 * We need to do nothing here, because vmbus_process_offer()
+	 * invokes channel->sc_creation_callback, which will open and use
+	 * the sub-channel(s).
 	 */
-	stor_device->open_sub_channel = true;
-	vmbus_are_subchannels_present(device->channel);
 }
 
 static void cache_wwn(struct storvsc_device *stor_device,
@@ -1794,7 +1794,6 @@ static int storvsc_probe(struct hv_device *device,
 	}
 
 	stor_device->destroy = false;
-	stor_device->open_sub_channel = false;
 	init_waitqueue_head(&stor_device->waiting_to_drain);
 	stor_device->device = device;
 	stor_device->host = host;
-- 
GitLab


From e5bde04ccce64d808f8b00a489a1fe5825d285cb Mon Sep 17 00:00:00 2001
From: Pan Bian <bianpan2016@163.com>
Date: Thu, 22 Nov 2018 18:00:16 +0800
Subject: [PATCH 1565/1890] crypto: do not free algorithm before using

In multiple functions, the algorithm fields are read after its reference
is dropped through crypto_mod_put. In this case, the algorithm memory
may be freed, resulting in use-after-free bugs. This patch delays the
put operation until the algorithm is never used.

Fixes: 79c65d179a40 ("crypto: cbc - Convert to skcipher")
Fixes: a7d85e06ed80 ("crypto: cfb - add support for Cipher FeedBack mode")
Fixes: 043a44001b9e ("crypto: pcbc - Convert to skcipher")
Cc: <stable@vger.kernel.org>
Signed-off-by: Pan Bian <bianpan2016@163.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/cbc.c  | 6 ++++--
 crypto/cfb.c  | 6 ++++--
 crypto/pcbc.c | 6 ++++--
 3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/crypto/cbc.c b/crypto/cbc.c
index b761b1f9c6ca1..dd5f332fd5668 100644
--- a/crypto/cbc.c
+++ b/crypto/cbc.c
@@ -140,9 +140,8 @@ static int crypto_cbc_create(struct crypto_template *tmpl, struct rtattr **tb)
 	spawn = skcipher_instance_ctx(inst);
 	err = crypto_init_spawn(spawn, alg, skcipher_crypto_instance(inst),
 				CRYPTO_ALG_TYPE_MASK);
-	crypto_mod_put(alg);
 	if (err)
-		goto err_free_inst;
+		goto err_put_alg;
 
 	err = crypto_inst_setname(skcipher_crypto_instance(inst), "cbc", alg);
 	if (err)
@@ -174,12 +173,15 @@ static int crypto_cbc_create(struct crypto_template *tmpl, struct rtattr **tb)
 	err = skcipher_register_instance(tmpl, inst);
 	if (err)
 		goto err_drop_spawn;
+	crypto_mod_put(alg);
 
 out:
 	return err;
 
 err_drop_spawn:
 	crypto_drop_spawn(spawn);
+err_put_alg:
+	crypto_mod_put(alg);
 err_free_inst:
 	kfree(inst);
 	goto out;
diff --git a/crypto/cfb.c b/crypto/cfb.c
index a0d68c09e1b9c..20987d0e09d89 100644
--- a/crypto/cfb.c
+++ b/crypto/cfb.c
@@ -286,9 +286,8 @@ static int crypto_cfb_create(struct crypto_template *tmpl, struct rtattr **tb)
 	spawn = skcipher_instance_ctx(inst);
 	err = crypto_init_spawn(spawn, alg, skcipher_crypto_instance(inst),
 				CRYPTO_ALG_TYPE_MASK);
-	crypto_mod_put(alg);
 	if (err)
-		goto err_free_inst;
+		goto err_put_alg;
 
 	err = crypto_inst_setname(skcipher_crypto_instance(inst), "cfb", alg);
 	if (err)
@@ -317,12 +316,15 @@ static int crypto_cfb_create(struct crypto_template *tmpl, struct rtattr **tb)
 	err = skcipher_register_instance(tmpl, inst);
 	if (err)
 		goto err_drop_spawn;
+	crypto_mod_put(alg);
 
 out:
 	return err;
 
 err_drop_spawn:
 	crypto_drop_spawn(spawn);
+err_put_alg:
+	crypto_mod_put(alg);
 err_free_inst:
 	kfree(inst);
 	goto out;
diff --git a/crypto/pcbc.c b/crypto/pcbc.c
index ef802f6e96421..8aa10144407c0 100644
--- a/crypto/pcbc.c
+++ b/crypto/pcbc.c
@@ -244,9 +244,8 @@ static int crypto_pcbc_create(struct crypto_template *tmpl, struct rtattr **tb)
 	spawn = skcipher_instance_ctx(inst);
 	err = crypto_init_spawn(spawn, alg, skcipher_crypto_instance(inst),
 				CRYPTO_ALG_TYPE_MASK);
-	crypto_mod_put(alg);
 	if (err)
-		goto err_free_inst;
+		goto err_put_alg;
 
 	err = crypto_inst_setname(skcipher_crypto_instance(inst), "pcbc", alg);
 	if (err)
@@ -275,12 +274,15 @@ static int crypto_pcbc_create(struct crypto_template *tmpl, struct rtattr **tb)
 	err = skcipher_register_instance(tmpl, inst);
 	if (err)
 		goto err_drop_spawn;
+	crypto_mod_put(alg);
 
 out:
 	return err;
 
 err_drop_spawn:
 	crypto_drop_spawn(spawn);
+err_put_alg:
+	crypto_mod_put(alg);
 err_free_inst:
 	kfree(inst);
 	goto out;
-- 
GitLab


From b51abed8355e5556886623b2772fa6b7598d2282 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 29 Nov 2018 08:02:49 +0100
Subject: [PATCH 1566/1890] ALSA: pcm: Call snd_pcm_unlink() conditionally at
 closing

Currently the PCM core calls snd_pcm_unlink() always unconditionally
at closing a stream.  However, since snd_pcm_unlink() invokes the
global rwsem down, the lock can be easily contended.  More badly, when
a thread runs in a high priority RT-FIFO, it may stall at spinning.

Basically the call of snd_pcm_unlink() is required only for the linked
streams that are already rare occasion.  For normal use cases, this
code path is fairly superfluous.

As an optimization (and also as a workaround for the RT problem
above in normal situations without linked streams), this patch adds a
check before calling snd_pcm_unlink() and calls it only when needed.

Reported-by: Chanho Min <chanho.min@lge.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/core/pcm_native.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 66c90f486af91..6afcc393113a9 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -2369,7 +2369,8 @@ int snd_pcm_hw_constraints_complete(struct snd_pcm_substream *substream)
 
 static void pcm_release_private(struct snd_pcm_substream *substream)
 {
-	snd_pcm_unlink(substream);
+	if (snd_pcm_stream_linked(substream))
+		snd_pcm_unlink(substream);
 }
 
 void snd_pcm_release_substream(struct snd_pcm_substream *substream)
-- 
GitLab


From b888a5f713e4d17faaaff24316585a4eb07f35b7 Mon Sep 17 00:00:00 2001
From: Chanho Min <chanho.min@lge.com>
Date: Mon, 26 Nov 2018 14:36:37 +0900
Subject: [PATCH 1567/1890] ALSA: pcm: Fix starvation on down_write_nonblock()

Commit 67ec1072b053 ("ALSA: pcm: Fix rwsem deadlock for non-atomic PCM
stream") fixes deadlock for non-atomic PCM stream. But, This patch
causes antother stuck.
If writer is RT thread and reader is a normal thread, the reader
thread will be difficult to get scheduled. It may not give chance to
release readlocks and writer gets stuck for a long time if they are
pinned to single cpu.

The deadlock described in the previous commit is because the linux
rwsem queues like a FIFO. So, we might need non-FIFO writelock, not
non-block one.

My suggestion is that the writer gives reader a chance to be scheduled
by using the minimum msleep() instaed of spinning without blocking by
writer. Also, The *_nonblock may be changed to *_nonfifo appropriately
to this concept.
In terms of performance, when trylock is failed, this minimum periodic
msleep will have the same performance as the tick-based
schedule()/wake_up_q().

[ Although this has a fairly high performance penalty, the relevant
  code path became already rare due to the previous commit ("ALSA:
  pcm: Call snd_pcm_unlink() conditionally at closing").  That is, now
  this unconditional msleep appears only when using linked streams,
  and this must be a rare case.  So we accept this as a quick
  workaround until finding a more suitable one -- tiwai ]

Fixes: 67ec1072b053 ("ALSA: pcm: Fix rwsem deadlock for non-atomic PCM stream")
Suggested-by: Wonmin Jung <wonmin.jung@lge.com>
Signed-off-by: Chanho Min <chanho.min@lge.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/core/pcm_native.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 6afcc393113a9..818dff1de545f 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -36,6 +36,7 @@
 #include <sound/timer.h>
 #include <sound/minors.h>
 #include <linux/uio.h>
+#include <linux/delay.h>
 
 #include "pcm_local.h"
 
@@ -91,12 +92,12 @@ static DECLARE_RWSEM(snd_pcm_link_rwsem);
  * and this may lead to a deadlock when the code path takes read sem
  * twice (e.g. one in snd_pcm_action_nonatomic() and another in
  * snd_pcm_stream_lock()).  As a (suboptimal) workaround, let writer to
- * spin until it gets the lock.
+ * sleep until all the readers are completed without blocking by writer.
  */
-static inline void down_write_nonblock(struct rw_semaphore *lock)
+static inline void down_write_nonfifo(struct rw_semaphore *lock)
 {
 	while (!down_write_trylock(lock))
-		cond_resched();
+		msleep(1);
 }
 
 #define PCM_LOCK_DEFAULT	0
@@ -1967,7 +1968,7 @@ static int snd_pcm_link(struct snd_pcm_substream *substream, int fd)
 		res = -ENOMEM;
 		goto _nolock;
 	}
-	down_write_nonblock(&snd_pcm_link_rwsem);
+	down_write_nonfifo(&snd_pcm_link_rwsem);
 	write_lock_irq(&snd_pcm_link_rwlock);
 	if (substream->runtime->status->state == SNDRV_PCM_STATE_OPEN ||
 	    substream->runtime->status->state != substream1->runtime->status->state ||
@@ -2014,7 +2015,7 @@ static int snd_pcm_unlink(struct snd_pcm_substream *substream)
 	struct snd_pcm_substream *s;
 	int res = 0;
 
-	down_write_nonblock(&snd_pcm_link_rwsem);
+	down_write_nonfifo(&snd_pcm_link_rwsem);
 	write_lock_irq(&snd_pcm_link_rwlock);
 	if (!snd_pcm_stream_linked(substream)) {
 		res = -EALREADY;
-- 
GitLab


From 44ff57e685f96d0cb9540004cc9d1d880e7a4315 Mon Sep 17 00:00:00 2001
From: Tony Das <tdas444@gmail.com>
Date: Wed, 28 Nov 2018 20:16:37 +0000
Subject: [PATCH 1568/1890] ALSA: usb-audio: Add SMSL D1 to quirks for native
 DSD support

This patch adds quirk VID/PID IDs for the SMSL D1 in order to enable
Native DSD support.

[ Moved the added entry in numerical order -- tiwai ]

Signed-off-by: Tony Das <tdas444@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/usb/quirks.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 8a945ece98690..6623cafc94f2c 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1373,6 +1373,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip,
 			return SNDRV_PCM_FMTBIT_DSD_U32_BE;
 		break;
 
+	case USB_ID(0x152a, 0x85de): /* SMSL D1 DAC */
 	case USB_ID(0x16d0, 0x09dd): /* Encore mDSD */
 	case USB_ID(0x0d8c, 0x0316): /* Hegel HD12 DSD */
 	case USB_ID(0x16b0, 0x06b2): /* NuPrime DAC-10 */
-- 
GitLab


From 3deef52ce10514ccdebba8e8ab85f9cebd0eb3f7 Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng@canonical.com>
Date: Thu, 29 Nov 2018 08:57:37 +0000
Subject: [PATCH 1569/1890] ALSA: hda: Add support for AMD Stoney Ridge

It's similar to other AMD audio devices, it also supports D3, which can
save some power drain.

Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/hda_intel.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 0bbdf1a01e763..76f03abd15ab7 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -2498,6 +2498,10 @@ static const struct pci_device_id azx_ids[] = {
 	/* AMD Hudson */
 	{ PCI_DEVICE(0x1022, 0x780d),
 	  .driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_ATI_SB },
+	/* AMD Stoney */
+	{ PCI_DEVICE(0x1022, 0x157a),
+	  .driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_ATI_SB |
+			 AZX_DCAPS_PM_RUNTIME },
 	/* AMD Raven */
 	{ PCI_DEVICE(0x1022, 0x15e3),
 	  .driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_ATI_SB |
-- 
GitLab


From 5363857b916c1f48027e9b96ee8be8376bf20811 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 29 Nov 2018 12:05:19 +0100
Subject: [PATCH 1570/1890] ALSA: pcm: Fix interval evaluation with openmin/max

As addressed in alsa-lib (commit b420056604f0), we need to fix the
case where the evaluation of PCM interval "(x x+1]" leading to
-EINVAL.  After applying rules, such an interval may be translated as
"(x x+1)".

Fixes: ff2d6acdf6f1 ("ALSA: pcm: Fix snd_interval_refine first/last with open min/max")
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/pcm_params.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/sound/pcm_params.h b/include/sound/pcm_params.h
index 2dd37cada7c08..888a833d3b003 100644
--- a/include/sound/pcm_params.h
+++ b/include/sound/pcm_params.h
@@ -254,11 +254,13 @@ static inline int snd_interval_empty(const struct snd_interval *i)
 static inline int snd_interval_single(const struct snd_interval *i)
 {
 	return (i->min == i->max || 
-		(i->min + 1 == i->max && i->openmax));
+		(i->min + 1 == i->max && (i->openmin || i->openmax)));
 }
 
 static inline int snd_interval_value(const struct snd_interval *i)
 {
+	if (i->openmin && !i->openmax)
+		return i->max;
 	return i->min;
 }
 
-- 
GitLab


From 98f5f932254b88ce828bc8e4d1642d14e5854caa Mon Sep 17 00:00:00 2001
From: Richard Genoud <richard.genoud@gmail.com>
Date: Tue, 27 Nov 2018 17:06:34 +0100
Subject: [PATCH 1571/1890] dmaengine: at_hdmac: fix memory leak in
 at_dma_xlate()

The leak was found when opening/closing a serial port a great number of
time, increasing kmalloc-32 in slabinfo.

Each time the port was opened, dma_request_slave_channel() was called.
Then, in at_dma_xlate(), atslave was allocated with devm_kzalloc() and
never freed. (Well, it was free at module unload, but that's not what we
want).
So, here, kzalloc is more suited for the job since it has to be freed in
atc_free_chan_resources().

Cc: stable@vger.kernel.org
Fixes: bbe89c8e3d59 ("at_hdmac: move to generic DMA binding")
Reported-by: Mario Forner <m.forner@be4energy.com>
Suggested-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Acked-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Acked-by: Ludovic Desroches <ludovic.desroches@microchip.com>
Signed-off-by: Richard Genoud <richard.genoud@gmail.com>
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/at_hdmac.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index 7cbac6e8c113f..1b7f0ca0d5cd4 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -1641,6 +1641,12 @@ static void atc_free_chan_resources(struct dma_chan *chan)
 	atchan->descs_allocated = 0;
 	atchan->status = 0;
 
+	/*
+	 * Free atslave allocated in at_dma_xlate()
+	 */
+	kfree(chan->private);
+	chan->private = NULL;
+
 	dev_vdbg(chan2dev(chan), "free_chan_resources: done\n");
 }
 
@@ -1675,7 +1681,7 @@ static struct dma_chan *at_dma_xlate(struct of_phandle_args *dma_spec,
 	dma_cap_zero(mask);
 	dma_cap_set(DMA_SLAVE, mask);
 
-	atslave = devm_kzalloc(&dmac_pdev->dev, sizeof(*atslave), GFP_KERNEL);
+	atslave = kzalloc(sizeof(*atslave), GFP_KERNEL);
 	if (!atslave)
 		return NULL;
 
-- 
GitLab


From 77e75fda94d2ebb86aa9d35fb1860f6395bf95de Mon Sep 17 00:00:00 2001
From: Richard Genoud <richard.genoud@gmail.com>
Date: Tue, 27 Nov 2018 17:06:35 +0100
Subject: [PATCH 1572/1890] dmaengine: at_hdmac: fix module unloading

of_dma_controller_free() was not called on module onloading.
This lead to a soft lockup:
watchdog: BUG: soft lockup - CPU#0 stuck for 23s!
Modules linked in: at_hdmac [last unloaded: at_hdmac]
when of_dma_request_slave_channel() tried to call ofdma->of_dma_xlate().

Cc: stable@vger.kernel.org
Fixes: bbe89c8e3d59 ("at_hdmac: move to generic DMA binding")
Acked-by: Ludovic Desroches <ludovic.desroches@microchip.com>
Signed-off-by: Richard Genoud <richard.genoud@gmail.com>
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/at_hdmac.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index 1b7f0ca0d5cd4..01d936c9fe899 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -2006,6 +2006,8 @@ static int at_dma_remove(struct platform_device *pdev)
 	struct resource		*io;
 
 	at_dma_off(atdma);
+	if (pdev->dev.of_node)
+		of_dma_controller_free(pdev->dev.of_node);
 	dma_async_device_unregister(&atdma->dma_common);
 
 	dma_pool_destroy(atdma->memset_pool);
-- 
GitLab


From 598e1a42e9626213565d3b22ea948ce78556512a Mon Sep 17 00:00:00 2001
From: Paul Moore <paul@paul-moore.com>
Date: Wed, 28 Nov 2018 12:57:33 -0500
Subject: [PATCH 1573/1890] selinux: add support for RTM_NEWCHAIN,
 RTM_DELCHAIN, and RTM_GETCHAIN

Commit 32a4f5ecd738 ("net: sched: introduce chain object to uapi")
added new RTM_* definitions without properly updating SELinux, this
patch adds the necessary SELinux support.

While there was a BUILD_BUG_ON() in the SELinux code to protect from
exactly this case, it was bypassed in the broken commit.  In order to
hopefully prevent this from happening in the future, add additional
comments which provide some instructions on how to resolve the
BUILD_BUG_ON() failures.

Fixes: 32a4f5ecd738 ("net: sched: introduce chain object to uapi")
Cc: <stable@vger.kernel.org> # 4.19
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 security/selinux/nlmsgtab.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c
index 74b951f55608d..9cec81209617d 100644
--- a/security/selinux/nlmsgtab.c
+++ b/security/selinux/nlmsgtab.c
@@ -80,6 +80,9 @@ static const struct nlmsg_perm nlmsg_route_perms[] =
 	{ RTM_NEWSTATS,		NETLINK_ROUTE_SOCKET__NLMSG_READ },
 	{ RTM_GETSTATS,		NETLINK_ROUTE_SOCKET__NLMSG_READ  },
 	{ RTM_NEWCACHEREPORT,	NETLINK_ROUTE_SOCKET__NLMSG_READ },
+	{ RTM_NEWCHAIN,		NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
+	{ RTM_DELCHAIN,		NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
+	{ RTM_GETCHAIN,		NETLINK_ROUTE_SOCKET__NLMSG_READ  },
 };
 
 static const struct nlmsg_perm nlmsg_tcpdiag_perms[] =
@@ -158,7 +161,11 @@ int selinux_nlmsg_lookup(u16 sclass, u16 nlmsg_type, u32 *perm)
 
 	switch (sclass) {
 	case SECCLASS_NETLINK_ROUTE_SOCKET:
-		/* RTM_MAX always point to RTM_SETxxxx, ie RTM_NEWxxx + 3 */
+		/* RTM_MAX always points to RTM_SETxxxx, ie RTM_NEWxxx + 3.
+		 * If the BUILD_BUG_ON() below fails you must update the
+		 * structures at the top of this file with the new mappings
+		 * before updating the BUILD_BUG_ON() macro!
+		 */
 		BUILD_BUG_ON(RTM_MAX != (RTM_NEWCHAIN + 3));
 		err = nlmsg_perm(nlmsg_type, perm, nlmsg_route_perms,
 				 sizeof(nlmsg_route_perms));
@@ -170,6 +177,10 @@ int selinux_nlmsg_lookup(u16 sclass, u16 nlmsg_type, u32 *perm)
 		break;
 
 	case SECCLASS_NETLINK_XFRM_SOCKET:
+		/* If the BUILD_BUG_ON() below fails you must update the
+		 * structures at the top of this file with the new mappings
+		 * before updating the BUILD_BUG_ON() macro!
+		 */
 		BUILD_BUG_ON(XFRM_MSG_MAX != XFRM_MSG_MAPPING);
 		err = nlmsg_perm(nlmsg_type, perm, nlmsg_xfrm_perms,
 				 sizeof(nlmsg_xfrm_perms));
-- 
GitLab


From ce8c80c536dac9f325a051b30bf7730ee505eddc Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 19 Nov 2018 11:27:28 +0000
Subject: [PATCH 1574/1890] arm64: Add workaround for Cortex-A76 erratum
 1286807

On the affected Cortex-A76 cores (r0p0 to r3p0), if a virtual address
for a cacheable mapping of a location is being accessed by a core while
another core is remapping the virtual address to a new physical page
using the recommended break-before-make sequence, then under very rare
circumstances TLBI+DSB completes before a read using the translation
being invalidated has been observed by other observers. The workaround
repeats the TLBI+DSB operation and is shared with the Qualcomm Falkor
erratum 1009

Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 Documentation/arm64/silicon-errata.txt |  1 +
 arch/arm64/Kconfig                     | 25 +++++++++++++++++++++++++
 arch/arm64/include/asm/tlbflush.h      |  4 ++--
 arch/arm64/kernel/cpu_errata.c         | 20 +++++++++++++++++---
 4 files changed, 45 insertions(+), 5 deletions(-)

diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt
index 76ccded8b74c0..8f95776211447 100644
--- a/Documentation/arm64/silicon-errata.txt
+++ b/Documentation/arm64/silicon-errata.txt
@@ -57,6 +57,7 @@ stable kernels.
 | ARM            | Cortex-A73      | #858921         | ARM64_ERRATUM_858921        |
 | ARM            | Cortex-A55      | #1024718        | ARM64_ERRATUM_1024718       |
 | ARM            | Cortex-A76      | #1188873        | ARM64_ERRATUM_1188873       |
+| ARM            | Cortex-A76      | #1286807        | ARM64_ERRATUM_1286807       |
 | ARM            | MMU-500         | #841119,#826419 | N/A                         |
 |                |                 |                 |                             |
 | Cavium         | ThunderX ITS    | #22375, #24313  | CAVIUM_ERRATUM_22375        |
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 787d7850e0643..ea2ab0330e3a1 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -497,6 +497,24 @@ config ARM64_ERRATUM_1188873
 
 	  If unsure, say Y.
 
+config ARM64_ERRATUM_1286807
+	bool "Cortex-A76: Modification of the translation table for a virtual address might lead to read-after-read ordering violation"
+	default y
+	select ARM64_WORKAROUND_REPEAT_TLBI
+	help
+	  This option adds workaround for ARM Cortex-A76 erratum 1286807
+
+	  On the affected Cortex-A76 cores (r0p0 to r3p0), if a virtual
+	  address for a cacheable mapping of a location is being
+	  accessed by a core while another core is remapping the virtual
+	  address to a new physical page using the recommended
+	  break-before-make sequence, then under very rare circumstances
+	  TLBI+DSB completes before a read using the translation being
+	  invalidated has been observed by other observers. The
+	  workaround repeats the TLBI+DSB operation.
+
+	  If unsure, say Y.
+
 config CAVIUM_ERRATUM_22375
 	bool "Cavium erratum 22375, 24313"
 	default y
@@ -566,9 +584,16 @@ config QCOM_FALKOR_ERRATUM_1003
 	  is unchanged. Work around the erratum by invalidating the walk cache
 	  entries for the trampoline before entering the kernel proper.
 
+config ARM64_WORKAROUND_REPEAT_TLBI
+	bool
+	help
+	  Enable the repeat TLBI workaround for Falkor erratum 1009 and
+	  Cortex-A76 erratum 1286807.
+
 config QCOM_FALKOR_ERRATUM_1009
 	bool "Falkor E1009: Prematurely complete a DSB after a TLBI"
 	default y
+	select ARM64_WORKAROUND_REPEAT_TLBI
 	help
 	  On Falkor v1, the CPU may prematurely complete a DSB following a
 	  TLBI xxIS invalidate maintenance operation. Repeat the TLBI operation
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index c3c0387aee18f..5dfd23897dea9 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -41,14 +41,14 @@
 		   ALTERNATIVE("nop\n			nop",		       \
 			       "dsb ish\n		tlbi " #op,	       \
 			       ARM64_WORKAROUND_REPEAT_TLBI,		       \
-			       CONFIG_QCOM_FALKOR_ERRATUM_1009)		       \
+			       CONFIG_ARM64_WORKAROUND_REPEAT_TLBI)	       \
 			    : : )
 
 #define __TLBI_1(op, arg) asm ("tlbi " #op ", %0\n"			       \
 		   ALTERNATIVE("nop\n			nop",		       \
 			       "dsb ish\n		tlbi " #op ", %0",     \
 			       ARM64_WORKAROUND_REPEAT_TLBI,		       \
-			       CONFIG_QCOM_FALKOR_ERRATUM_1009)		       \
+			       CONFIG_ARM64_WORKAROUND_REPEAT_TLBI)	       \
 			    : : "r" (arg))
 
 #define __TLBI_N(op, arg, n, ...) __TLBI_##n(op, arg)
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index a509e35132d22..6ad715d67df89 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -570,6 +570,20 @@ static const struct midr_range arm64_harden_el2_vectors[] = {
 
 #endif
 
+#ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI
+
+static const struct midr_range arm64_repeat_tlbi_cpus[] = {
+#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1009
+	MIDR_RANGE(MIDR_QCOM_FALKOR_V1, 0, 0, 0, 0),
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_1286807
+	MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 3, 0),
+#endif
+	{},
+};
+
+#endif
+
 const struct arm64_cpu_capabilities arm64_errata[] = {
 #if	defined(CONFIG_ARM64_ERRATUM_826319) || \
 	defined(CONFIG_ARM64_ERRATUM_827319) || \
@@ -695,11 +709,11 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 		.matches = is_kryo_midr,
 	},
 #endif
-#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1009
+#ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI
 	{
-		.desc = "Qualcomm Technologies Falkor erratum 1009",
+		.desc = "Qualcomm erratum 1009, ARM erratum 1286807",
 		.capability = ARM64_WORKAROUND_REPEAT_TLBI,
-		ERRATA_MIDR_REV(MIDR_QCOM_FALKOR_V1, 0, 0),
+		ERRATA_MIDR_RANGE_LIST(arm64_repeat_tlbi_cpus),
 	},
 #endif
 #ifdef CONFIG_ARM64_ERRATUM_858921
-- 
GitLab


From 874bfc6e5422d2421f7e4d5ea318d30e91679dfe Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 29 Nov 2018 14:39:33 +0900
Subject: [PATCH 1575/1890] arm64: ftrace: Fix to enable syscall events on
 arm64

Since commit 4378a7d4be30 ("arm64: implement syscall wrappers")
introduced "__arm64_" prefix to all syscall wrapper symbols in
sys_call_table, syscall tracer can not find corresponding
metadata from syscall name. In the result, we have no syscall
ftrace events on arm64 kernel, and some bpf testcases are failed
on arm64.

To fix this issue, this introduces custom
arch_syscall_match_sym_name() which skips first 8 bytes when
comparing the syscall and symbol names.

Fixes: 4378a7d4be30 ("arm64: implement syscall wrappers")
Reported-by: Naresh Kamboju <naresh.kamboju@linaro.org>
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Tested-by: Naresh Kamboju <naresh.kamboju@linaro.org>
Cc: stable@vger.kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/ftrace.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
index caa955f10e195..fac54fb050d00 100644
--- a/arch/arm64/include/asm/ftrace.h
+++ b/arch/arm64/include/asm/ftrace.h
@@ -56,6 +56,19 @@ static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs)
 {
 	return is_compat_task();
 }
+
+#define ARCH_HAS_SYSCALL_MATCH_SYM_NAME
+
+static inline bool arch_syscall_match_sym_name(const char *sym,
+					       const char *name)
+{
+	/*
+	 * Since all syscall functions have __arm64_ prefix, we must skip it.
+	 * However, as we described above, we decided to ignore compat
+	 * syscalls, so we don't care about __arm64_compat_ prefix here.
+	 */
+	return !strcmp(sym + 8, name);
+}
 #endif /* ifndef __ASSEMBLY__ */
 
 #endif /* __ASM_FTRACE_H */
-- 
GitLab


From a7b403104e17209ea71eea59d4a71bf9e0d8cb83 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 23 Nov 2018 17:24:51 +0100
Subject: [PATCH 1576/1890] xen/x86: add diagnostic printout to xen_mc_flush()
 in case of error

Failure of an element of a Xen multicall is signalled via a WARN()
only if the kernel is compiled with MC_DEBUG. It is impossible to
know which element failed and why it did so.

Change that by printing the related information even without MC_DEBUG,
even if maybe in some limited form (e.g. without information which
caller produced the failing element).

Move the printing out of the switch statement in order to have the
same information for a single call.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/x86/xen/multicalls.c | 35 ++++++++++++++++++++---------------
 1 file changed, 20 insertions(+), 15 deletions(-)

diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c
index 2bce7958ce8b3..0766a08bdf458 100644
--- a/arch/x86/xen/multicalls.c
+++ b/arch/x86/xen/multicalls.c
@@ -69,6 +69,11 @@ void xen_mc_flush(void)
 
 	trace_xen_mc_flush(b->mcidx, b->argidx, b->cbidx);
 
+#if MC_DEBUG
+	memcpy(b->debug, b->entries,
+	       b->mcidx * sizeof(struct multicall_entry));
+#endif
+
 	switch (b->mcidx) {
 	case 0:
 		/* no-op */
@@ -87,32 +92,34 @@ void xen_mc_flush(void)
 		break;
 
 	default:
-#if MC_DEBUG
-		memcpy(b->debug, b->entries,
-		       b->mcidx * sizeof(struct multicall_entry));
-#endif
-
 		if (HYPERVISOR_multicall(b->entries, b->mcidx) != 0)
 			BUG();
 		for (i = 0; i < b->mcidx; i++)
 			if (b->entries[i].result < 0)
 				ret++;
+	}
 
+	if (WARN_ON(ret)) {
+		pr_err("%d of %d multicall(s) failed: cpu %d\n",
+		       ret, b->mcidx, smp_processor_id());
+		for (i = 0; i < b->mcidx; i++) {
+			if (b->entries[i].result < 0) {
 #if MC_DEBUG
-		if (ret) {
-			printk(KERN_ERR "%d multicall(s) failed: cpu %d\n",
-			       ret, smp_processor_id());
-			dump_stack();
-			for (i = 0; i < b->mcidx; i++) {
-				printk(KERN_DEBUG "  call %2d/%d: op=%lu arg=[%lx] result=%ld\t%pF\n",
-				       i+1, b->mcidx,
+				pr_err("  call %2d: op=%lu arg=[%lx] result=%ld\t%pF\n",
+				       i + 1,
 				       b->debug[i].op,
 				       b->debug[i].args[0],
 				       b->entries[i].result,
 				       b->caller[i]);
+#else
+				pr_err("  call %2d: op=%lu arg=[%lx] result=%ld\n",
+				       i + 1,
+				       b->entries[i].op,
+				       b->entries[i].args[0],
+				       b->entries[i].result);
+#endif
 			}
 		}
-#endif
 	}
 
 	b->mcidx = 0;
@@ -126,8 +133,6 @@ void xen_mc_flush(void)
 	b->cbidx = 0;
 
 	local_irq_restore(flags);
-
-	WARN_ON(ret);
 }
 
 struct multicall_space __xen_mc_entry(size_t args)
-- 
GitLab


From 72791ac854fea36034fa7976b748fde585008e78 Mon Sep 17 00:00:00 2001
From: Srikanth Boddepalli <boddepalli.srikanth@gmail.com>
Date: Tue, 27 Nov 2018 19:53:27 +0530
Subject: [PATCH 1577/1890] xen: xlate_mmu: add missing header to fix 'W=1'
 warning

Add a missing header otherwise compiler warns about missed prototype:

drivers/xen/xlate_mmu.c:183:5: warning: no previous prototype for 'xen_xlate_unmap_gfn_range?' [-Wmissing-prototypes]
  int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma,
      ^~~~~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: Srikanth Boddepalli <boddepalli.srikanth@gmail.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Reviewed-by: Joey Pabalinas <joeypabalinas@gmail.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 drivers/xen/xlate_mmu.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/xen/xlate_mmu.c b/drivers/xen/xlate_mmu.c
index 23f1387b3ef79..e7df65d32c918 100644
--- a/drivers/xen/xlate_mmu.c
+++ b/drivers/xen/xlate_mmu.c
@@ -36,6 +36,7 @@
 #include <asm/xen/hypervisor.h>
 
 #include <xen/xen.h>
+#include <xen/xen-ops.h>
 #include <xen/page.h>
 #include <xen/interface/xen.h>
 #include <xen/interface/memory.h>
-- 
GitLab


From 123664101aa2156d05251704fc63f9bcbf77741a Mon Sep 17 00:00:00 2001
From: Igor Druzhinin <igor.druzhinin@citrix.com>
Date: Tue, 27 Nov 2018 20:58:21 +0000
Subject: [PATCH 1578/1890] Revert "xen/balloon: Mark unallocated host memory
 as UNUSABLE"

This reverts commit b3cf8528bb21febb650a7ecbf080d0647be40b9f.

That commit unintentionally broke Xen balloon memory hotplug with
"hotplug_unpopulated" set to 1. As long as "System RAM" resource
got assigned under a new "Unusable memory" resource in IO/Mem tree
any attempt to online this memory would fail due to general kernel
restrictions on having "System RAM" resources as 1st level only.

The original issue that commit has tried to workaround fa564ad96366
("x86/PCI: Enable a 64bit BAR on AMD Family 15h (Models 00-1f, 30-3f,
60-7f)") also got amended by the following 03a551734 ("x86/PCI: Move
and shrink AMD 64-bit window to avoid conflict") which made the
original fix to Xen ballooning unnecessary.

Signed-off-by: Igor Druzhinin <igor.druzhinin@citrix.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/x86/xen/enlighten.c | 78 ----------------------------------------
 arch/x86/xen/setup.c     |  6 ++--
 drivers/xen/balloon.c    | 65 +++++----------------------------
 include/xen/balloon.h    |  5 ---
 4 files changed, 13 insertions(+), 141 deletions(-)

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 67b2f31a1265f..aa1cc483bd2a2 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -10,7 +10,6 @@
 #include <xen/xen.h>
 #include <xen/features.h>
 #include <xen/page.h>
-#include <xen/interface/memory.h>
 
 #include <asm/xen/hypercall.h>
 #include <asm/xen/hypervisor.h>
@@ -346,80 +345,3 @@ void xen_arch_unregister_cpu(int num)
 }
 EXPORT_SYMBOL(xen_arch_unregister_cpu);
 #endif
-
-#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
-void __init arch_xen_balloon_init(struct resource *hostmem_resource)
-{
-	struct xen_memory_map memmap;
-	int rc;
-	unsigned int i, last_guest_ram;
-	phys_addr_t max_addr = PFN_PHYS(max_pfn);
-	struct e820_table *xen_e820_table;
-	const struct e820_entry *entry;
-	struct resource *res;
-
-	if (!xen_initial_domain())
-		return;
-
-	xen_e820_table = kmalloc(sizeof(*xen_e820_table), GFP_KERNEL);
-	if (!xen_e820_table)
-		return;
-
-	memmap.nr_entries = ARRAY_SIZE(xen_e820_table->entries);
-	set_xen_guest_handle(memmap.buffer, xen_e820_table->entries);
-	rc = HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap);
-	if (rc) {
-		pr_warn("%s: Can't read host e820 (%d)\n", __func__, rc);
-		goto out;
-	}
-
-	last_guest_ram = 0;
-	for (i = 0; i < memmap.nr_entries; i++) {
-		if (xen_e820_table->entries[i].addr >= max_addr)
-			break;
-		if (xen_e820_table->entries[i].type == E820_TYPE_RAM)
-			last_guest_ram = i;
-	}
-
-	entry = &xen_e820_table->entries[last_guest_ram];
-	if (max_addr >= entry->addr + entry->size)
-		goto out; /* No unallocated host RAM. */
-
-	hostmem_resource->start = max_addr;
-	hostmem_resource->end = entry->addr + entry->size;
-
-	/*
-	 * Mark non-RAM regions between the end of dom0 RAM and end of host RAM
-	 * as unavailable. The rest of that region can be used for hotplug-based
-	 * ballooning.
-	 */
-	for (; i < memmap.nr_entries; i++) {
-		entry = &xen_e820_table->entries[i];
-
-		if (entry->type == E820_TYPE_RAM)
-			continue;
-
-		if (entry->addr >= hostmem_resource->end)
-			break;
-
-		res = kzalloc(sizeof(*res), GFP_KERNEL);
-		if (!res)
-			goto out;
-
-		res->name = "Unavailable host RAM";
-		res->start = entry->addr;
-		res->end = (entry->addr + entry->size < hostmem_resource->end) ?
-			    entry->addr + entry->size : hostmem_resource->end;
-		rc = insert_resource(hostmem_resource, res);
-		if (rc) {
-			pr_warn("%s: Can't insert [%llx - %llx) (%d)\n",
-				__func__, res->start, res->end, rc);
-			kfree(res);
-			goto  out;
-		}
-	}
-
- out:
-	kfree(xen_e820_table);
-}
-#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 1163e33121fb3..075ed47993bbf 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -808,6 +808,7 @@ char * __init xen_memory_setup(void)
 	addr = xen_e820_table.entries[0].addr;
 	size = xen_e820_table.entries[0].size;
 	while (i < xen_e820_table.nr_entries) {
+		bool discard = false;
 
 		chunk_size = size;
 		type = xen_e820_table.entries[i].type;
@@ -823,10 +824,11 @@ char * __init xen_memory_setup(void)
 				xen_add_extra_mem(pfn_s, n_pfns);
 				xen_max_p2m_pfn = pfn_s + n_pfns;
 			} else
-				type = E820_TYPE_UNUSABLE;
+				discard = true;
 		}
 
-		xen_align_and_add_e820_region(addr, chunk_size, type);
+		if (!discard)
+			xen_align_and_add_e820_region(addr, chunk_size, type);
 
 		addr += chunk_size;
 		size -= chunk_size;
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index e12bb256036fb..7ab6caef599c5 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -251,25 +251,10 @@ static void release_memory_resource(struct resource *resource)
 	kfree(resource);
 }
 
-/*
- * Host memory not allocated to dom0. We can use this range for hotplug-based
- * ballooning.
- *
- * It's a type-less resource. Setting IORESOURCE_MEM will make resource
- * management algorithms (arch_remove_reservations()) look into guest e820,
- * which we don't want.
- */
-static struct resource hostmem_resource = {
-	.name   = "Host RAM",
-};
-
-void __attribute__((weak)) __init arch_xen_balloon_init(struct resource *res)
-{}
-
 static struct resource *additional_memory_resource(phys_addr_t size)
 {
-	struct resource *res, *res_hostmem;
-	int ret = -ENOMEM;
+	struct resource *res;
+	int ret;
 
 	res = kzalloc(sizeof(*res), GFP_KERNEL);
 	if (!res)
@@ -278,42 +263,13 @@ static struct resource *additional_memory_resource(phys_addr_t size)
 	res->name = "System RAM";
 	res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
 
-	res_hostmem = kzalloc(sizeof(*res), GFP_KERNEL);
-	if (res_hostmem) {
-		/* Try to grab a range from hostmem */
-		res_hostmem->name = "Host memory";
-		ret = allocate_resource(&hostmem_resource, res_hostmem,
-					size, 0, -1,
-					PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL);
-	}
-
-	if (!ret) {
-		/*
-		 * Insert this resource into iomem. Because hostmem_resource
-		 * tracks portion of guest e820 marked as UNUSABLE noone else
-		 * should try to use it.
-		 */
-		res->start = res_hostmem->start;
-		res->end = res_hostmem->end;
-		ret = insert_resource(&iomem_resource, res);
-		if (ret < 0) {
-			pr_err("Can't insert iomem_resource [%llx - %llx]\n",
-				res->start, res->end);
-			release_memory_resource(res_hostmem);
-			res_hostmem = NULL;
-			res->start = res->end = 0;
-		}
-	}
-
-	if (ret) {
-		ret = allocate_resource(&iomem_resource, res,
-					size, 0, -1,
-					PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL);
-		if (ret < 0) {
-			pr_err("Cannot allocate new System RAM resource\n");
-			kfree(res);
-			return NULL;
-		}
+	ret = allocate_resource(&iomem_resource, res,
+				size, 0, -1,
+				PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL);
+	if (ret < 0) {
+		pr_err("Cannot allocate new System RAM resource\n");
+		kfree(res);
+		return NULL;
 	}
 
 #ifdef CONFIG_SPARSEMEM
@@ -325,7 +281,6 @@ static struct resource *additional_memory_resource(phys_addr_t size)
 			pr_err("New System RAM resource outside addressable RAM (%lu > %lu)\n",
 			       pfn, limit);
 			release_memory_resource(res);
-			release_memory_resource(res_hostmem);
 			return NULL;
 		}
 	}
@@ -747,8 +702,6 @@ static int __init balloon_init(void)
 	set_online_page_callback(&xen_online_page);
 	register_memory_notifier(&xen_memory_nb);
 	register_sysctl_table(xen_root);
-
-	arch_xen_balloon_init(&hostmem_resource);
 #endif
 
 #ifdef CONFIG_XEN_PV
diff --git a/include/xen/balloon.h b/include/xen/balloon.h
index 61f410fd74e4c..4914b93a23f2b 100644
--- a/include/xen/balloon.h
+++ b/include/xen/balloon.h
@@ -44,8 +44,3 @@ static inline void xen_balloon_init(void)
 {
 }
 #endif
-
-#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
-struct resource;
-void arch_xen_balloon_init(struct resource *hostmem_resource);
-#endif
-- 
GitLab


From 975ef94a0284648fb0137bd5e949b18cef604e33 Mon Sep 17 00:00:00 2001
From: Pan Bian <bianpan2016@163.com>
Date: Thu, 22 Nov 2018 10:07:12 +0800
Subject: [PATCH 1579/1890] pvcalls-front: fixes incorrect error handling

kfree() is incorrectly used to release the pages allocated by
__get_free_page() and __get_free_pages(). Use the matching deallocators
i.e., free_page() and free_pages(), respectively.

Signed-off-by: Pan Bian <bianpan2016@163.com>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 drivers/xen/pvcalls-front.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
index 2f11ca72a2814..77224d8f3e6fe 100644
--- a/drivers/xen/pvcalls-front.c
+++ b/drivers/xen/pvcalls-front.c
@@ -385,8 +385,8 @@ static int create_active(struct sock_mapping *map, int *evtchn)
 out_error:
 	if (*evtchn >= 0)
 		xenbus_free_evtchn(pvcalls_front_dev, *evtchn);
-	kfree(map->active.data.in);
-	kfree(map->active.ring);
+	free_pages((unsigned long)map->active.data.in, PVCALLS_RING_ORDER);
+	free_page((unsigned long)map->active.ring);
 	return ret;
 }
 
-- 
GitLab


From 09ee3b4a249dd5c64da7d25a52a4ce42a49d647a Mon Sep 17 00:00:00 2001
From: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Date: Thu, 29 Nov 2018 17:08:36 +0900
Subject: [PATCH 1580/1890] net: ethernet: ave: Increase descriptors to improve
 performance

To improve performance, this increases Rx descriptor to 256, Tx descriptor
to 64, and adjusts NAPI weight to NAPI_POLL_WEIGHT.

Signed-off-by: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/socionext/sni_ave.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/socionext/sni_ave.c b/drivers/net/ethernet/socionext/sni_ave.c
index 6732f5cbde081..29b5b12bce6ca 100644
--- a/drivers/net/ethernet/socionext/sni_ave.c
+++ b/drivers/net/ethernet/socionext/sni_ave.c
@@ -185,8 +185,8 @@
 				 NETIF_MSG_TX_ERR)
 
 /* Parameter for descriptor */
-#define AVE_NR_TXDESC		32	/* Tx descriptor */
-#define AVE_NR_RXDESC		64	/* Rx descriptor */
+#define AVE_NR_TXDESC		64	/* Tx descriptor */
+#define AVE_NR_RXDESC		256	/* Rx descriptor */
 
 #define AVE_DESC_OFS_CMDSTS	0
 #define AVE_DESC_OFS_ADDRL	4
@@ -1689,9 +1689,10 @@ static int ave_probe(struct platform_device *pdev)
 		 pdev->name, pdev->id);
 
 	/* Register as a NAPI supported driver */
-	netif_napi_add(ndev, &priv->napi_rx, ave_napi_poll_rx, priv->rx.ndesc);
+	netif_napi_add(ndev, &priv->napi_rx, ave_napi_poll_rx,
+		       NAPI_POLL_WEIGHT);
 	netif_tx_napi_add(ndev, &priv->napi_tx, ave_napi_poll_tx,
-			  priv->tx.ndesc);
+			  NAPI_POLL_WEIGHT);
 
 	platform_set_drvdata(pdev, ndev);
 
-- 
GitLab


From 88113957ddb7b7d5451e28cd708c82ea7e63b097 Mon Sep 17 00:00:00 2001
From: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Date: Thu, 29 Nov 2018 17:08:37 +0900
Subject: [PATCH 1581/1890] net: ethernet: ave: Replace NET_IP_ALIGN with
 AVE_FRAME_HEADROOM

In commit 26a4676faa1a ("arm64: mm: define NET_IP_ALIGN to 0"),
AVE controller affects this modification because the controller forces
to ignore lower 2bits of buffer start address, and make 2-byte headroom,
that is, data reception starts from (buffer + 2).

This patch defines AVE_FRAME_HEADROOM macro as hardware-specific value,
and replaces NET_IP_ALIGN with it.

Signed-off-by: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/socionext/sni_ave.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/socionext/sni_ave.c b/drivers/net/ethernet/socionext/sni_ave.c
index 29b5b12bce6ca..0da11344d035e 100644
--- a/drivers/net/ethernet/socionext/sni_ave.c
+++ b/drivers/net/ethernet/socionext/sni_ave.c
@@ -194,6 +194,7 @@
 
 /* Parameter for ethernet frame */
 #define AVE_MAX_ETHFRAME	1518
+#define AVE_FRAME_HEADROOM	2
 
 /* Parameter for interrupt */
 #define AVE_INTM_COUNT		20
@@ -576,12 +577,13 @@ static int ave_rxdesc_prepare(struct net_device *ndev, int entry)
 
 	skb = priv->rx.desc[entry].skbs;
 	if (!skb) {
-		skb = netdev_alloc_skb_ip_align(ndev,
-						AVE_MAX_ETHFRAME);
+		skb = netdev_alloc_skb(ndev, AVE_MAX_ETHFRAME);
 		if (!skb) {
 			netdev_err(ndev, "can't allocate skb for Rx\n");
 			return -ENOMEM;
 		}
+		skb->data += AVE_FRAME_HEADROOM;
+		skb->tail += AVE_FRAME_HEADROOM;
 	}
 
 	/* set disable to cmdsts */
@@ -594,12 +596,12 @@ static int ave_rxdesc_prepare(struct net_device *ndev, int entry)
 	 * - Rx buffer begins with 2 byte headroom, and data will be put from
 	 *   (buffer + 2).
 	 * To satisfy this, specify the address to put back the buffer
-	 * pointer advanced by NET_IP_ALIGN by netdev_alloc_skb_ip_align(),
-	 * and expand the map size by NET_IP_ALIGN.
+	 * pointer advanced by AVE_FRAME_HEADROOM, and expand the map size
+	 * by AVE_FRAME_HEADROOM.
 	 */
 	ret = ave_dma_map(ndev, &priv->rx.desc[entry],
-			  skb->data - NET_IP_ALIGN,
-			  AVE_MAX_ETHFRAME + NET_IP_ALIGN,
+			  skb->data - AVE_FRAME_HEADROOM,
+			  AVE_MAX_ETHFRAME + AVE_FRAME_HEADROOM,
 			  DMA_FROM_DEVICE, &paddr);
 	if (ret) {
 		netdev_err(ndev, "can't map skb for Rx\n");
-- 
GitLab


From d75d0e874ffe929dec143d331b53e4bfceb10af2 Mon Sep 17 00:00:00 2001
From: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Date: Thu, 29 Nov 2018 17:08:38 +0900
Subject: [PATCH 1582/1890] net: ethernet: ave: Add MODULE_AUTHOR and
 MAINTAINERS entry

Add missing MODULE_AUTHOR of ave driver and an entry to MAINTAINERS.

Signed-off-by: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS                              | 7 +++++++
 drivers/net/ethernet/socionext/sni_ave.c | 1 +
 2 files changed, 8 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index da57abebaab35..51d35f0ab9896 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13885,6 +13885,13 @@ F:	drivers/md/raid*
 F:	include/linux/raid/
 F:	include/uapi/linux/raid/
 
+SOCIONEXT (SNI) AVE NETWORK DRIVER
+M:	Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	drivers/net/ethernet/socionext/sni_ave.c
+F:	Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt
+
 SOCIONEXT (SNI) NETSEC NETWORK DRIVER
 M:	Jassi Brar <jaswinder.singh@linaro.org>
 L:	netdev@vger.kernel.org
diff --git a/drivers/net/ethernet/socionext/sni_ave.c b/drivers/net/ethernet/socionext/sni_ave.c
index 0da11344d035e..7c7cd9d94bcc1 100644
--- a/drivers/net/ethernet/socionext/sni_ave.c
+++ b/drivers/net/ethernet/socionext/sni_ave.c
@@ -1916,5 +1916,6 @@ static struct platform_driver ave_driver = {
 };
 module_platform_driver(ave_driver);
 
+MODULE_AUTHOR("Kunihiko Hayashi <hayashi.kunihiko@socionext.com>");
 MODULE_DESCRIPTION("Socionext UniPhier AVE ethernet driver");
 MODULE_LICENSE("GPL v2");
-- 
GitLab


From d7f7e0018b96fd1a30a968faa9464eb57372c1ec Mon Sep 17 00:00:00 2001
From: Baruch Siach <baruch@tkos.co.il>
Date: Thu, 29 Nov 2018 12:40:11 +0200
Subject: [PATCH 1583/1890] net: phy: sfp: correct store of detected link modes

The link modes that sfp_parse_support() detects are stored in the
'modes' bitmap. There is no reason to make an exception for 1000Base-PX
or 1000Base-BX10.

Fixes: 03145864bd0f ("sfp: support 1G BiDi (eg, FiberStore SFP-GE-BX) modules")
Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Acked-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/sfp-bus.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c
index 83060fb349f4d..ad9db652874dc 100644
--- a/drivers/net/phy/sfp-bus.c
+++ b/drivers/net/phy/sfp-bus.c
@@ -162,7 +162,7 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
 	/* 1000Base-PX or 1000Base-BX10 */
 	if ((id->base.e_base_px || id->base.e_base_bx10) &&
 	    br_min <= 1300 && br_max >= 1200)
-		phylink_set(support, 1000baseX_Full);
+		phylink_set(modes, 1000baseX_Full);
 
 	/* For active or passive cables, select the link modes
 	 * based on the bit rates and the cable compliance bytes.
-- 
GitLab


From f28c020fb488e1a8b87469812017044bef88aa2b Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Thu, 29 Nov 2018 14:14:48 +0100
Subject: [PATCH 1584/1890] net: restore call to netdev_queue_numa_node_write
 when resetting XPS

Before commit 80d19669ecd3 ("net: Refactor XPS for CPUs and Rx queues"),
netif_reset_xps_queues() did netdev_queue_numa_node_write() for all the
queues being reset. Now, this is only done when the "active" variable in
clean_xps_maps() is false, ie when on all the CPUs, there's no active
XPS mapping left.

Fixes: 80d19669ecd3 ("net: Refactor XPS for CPUs and Rx queues")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index ddc551f24ba2a..32a63f4c3a92e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2187,17 +2187,19 @@ static void clean_xps_maps(struct net_device *dev, const unsigned long *mask,
 		active |= remove_xps_queue_cpu(dev, dev_maps, j, offset,
 					       count);
 	if (!active) {
-		if (is_rxqs_map) {
+		if (is_rxqs_map)
 			RCU_INIT_POINTER(dev->xps_rxqs_map, NULL);
-		} else {
+		else
 			RCU_INIT_POINTER(dev->xps_cpus_map, NULL);
+		kfree_rcu(dev_maps, rcu);
+	}
 
-			for (i = offset + (count - 1); count--; i--)
-				netdev_queue_numa_node_write(
-					netdev_get_tx_queue(dev, i),
-							NUMA_NO_NODE);
+	if (!is_rxqs_map) {
+		for (i = offset + (count - 1); count--; i--) {
+			netdev_queue_numa_node_write(
+				netdev_get_tx_queue(dev, i),
+				NUMA_NO_NODE);
 		}
-		kfree_rcu(dev_maps, rcu);
 	}
 }
 
-- 
GitLab


From 867d0ad476db89a1e8af3f297af402399a54eea5 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Thu, 29 Nov 2018 14:14:49 +0100
Subject: [PATCH 1585/1890] net: fix XPS static_key accounting

Commit 04157469b7b8 ("net: Use static_key for XPS maps") introduced a
static key for XPS, but the increments/decrements don't match.

First, the static key's counter is incremented once for each queue, but
only decremented once for a whole batch of queues, leading to large
unbalances.

Second, the xps_rxqs_needed key is decremented whenever we reset a batch
of queues, whether they had any rxqs mapping or not, so that if we setup
cpu-XPS on em1 and RXQS-XPS on em2, resetting the queues on em1 would
decrement the xps_rxqs_needed key.

This reworks the accounting scheme so that the xps_needed key is
incremented only once for each type of XPS for all the queues on a
device, and the xps_rxqs_needed key is incremented only once for all
queues. This is sufficient to let us retrieve queues via
get_xps_queue().

This patch introduces a new reset_xps_maps(), which reinitializes and
frees the appropriate map (xps_rxqs_map or xps_cpus_map), and drops a
reference to the needed keys:
 - both xps_needed and xps_rxqs_needed, in case of rxqs maps,
 - only xps_needed, in case of CPU maps.

Now, we also need to call reset_xps_maps() at the end of
__netif_set_xps_queue() when there's no active map left, for example
when writing '00000000,00000000' to all queues' xps_rxqs setting.

Fixes: 04157469b7b8 ("net: Use static_key for XPS maps")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 45 ++++++++++++++++++++++++---------------------
 1 file changed, 24 insertions(+), 21 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 32a63f4c3a92e..3470e7fff1f47 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2175,6 +2175,20 @@ static bool remove_xps_queue_cpu(struct net_device *dev,
 	return active;
 }
 
+static void reset_xps_maps(struct net_device *dev,
+			   struct xps_dev_maps *dev_maps,
+			   bool is_rxqs_map)
+{
+	if (is_rxqs_map) {
+		static_key_slow_dec_cpuslocked(&xps_rxqs_needed);
+		RCU_INIT_POINTER(dev->xps_rxqs_map, NULL);
+	} else {
+		RCU_INIT_POINTER(dev->xps_cpus_map, NULL);
+	}
+	static_key_slow_dec_cpuslocked(&xps_needed);
+	kfree_rcu(dev_maps, rcu);
+}
+
 static void clean_xps_maps(struct net_device *dev, const unsigned long *mask,
 			   struct xps_dev_maps *dev_maps, unsigned int nr_ids,
 			   u16 offset, u16 count, bool is_rxqs_map)
@@ -2186,13 +2200,8 @@ static void clean_xps_maps(struct net_device *dev, const unsigned long *mask,
 	     j < nr_ids;)
 		active |= remove_xps_queue_cpu(dev, dev_maps, j, offset,
 					       count);
-	if (!active) {
-		if (is_rxqs_map)
-			RCU_INIT_POINTER(dev->xps_rxqs_map, NULL);
-		else
-			RCU_INIT_POINTER(dev->xps_cpus_map, NULL);
-		kfree_rcu(dev_maps, rcu);
-	}
+	if (!active)
+		reset_xps_maps(dev, dev_maps, is_rxqs_map);
 
 	if (!is_rxqs_map) {
 		for (i = offset + (count - 1); count--; i--) {
@@ -2236,10 +2245,6 @@ static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
 		       false);
 
 out_no_maps:
-	if (static_key_enabled(&xps_rxqs_needed))
-		static_key_slow_dec_cpuslocked(&xps_rxqs_needed);
-
-	static_key_slow_dec_cpuslocked(&xps_needed);
 	mutex_unlock(&xps_map_mutex);
 	cpus_read_unlock();
 }
@@ -2357,9 +2362,12 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
 	if (!new_dev_maps)
 		goto out_no_new_maps;
 
-	static_key_slow_inc_cpuslocked(&xps_needed);
-	if (is_rxqs_map)
-		static_key_slow_inc_cpuslocked(&xps_rxqs_needed);
+	if (!dev_maps) {
+		/* Increment static keys at most once per type */
+		static_key_slow_inc_cpuslocked(&xps_needed);
+		if (is_rxqs_map)
+			static_key_slow_inc_cpuslocked(&xps_rxqs_needed);
+	}
 
 	for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
 	     j < nr_ids;) {
@@ -2457,13 +2465,8 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
 	}
 
 	/* free map if not active */
-	if (!active) {
-		if (is_rxqs_map)
-			RCU_INIT_POINTER(dev->xps_rxqs_map, NULL);
-		else
-			RCU_INIT_POINTER(dev->xps_cpus_map, NULL);
-		kfree_rcu(dev_maps, rcu);
-	}
+	if (!active)
+		reset_xps_maps(dev, dev_maps, is_rxqs_map);
 
 out_no_maps:
 	mutex_unlock(&xps_map_mutex);
-- 
GitLab


From 49f1c44b581b08e3289127ffe58bd208c3166701 Mon Sep 17 00:00:00 2001
From: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Date: Wed, 28 Nov 2018 16:17:50 -0500
Subject: [PATCH 1586/1890] drm/amd/display: Fix unintialized max_bpc state
 values

[Why]
If the "max bpc" isn't explicitly set in the atomic state then it
have a value of 0. This has the correct behavior of limiting a panel
to 8bpc in the case where the panel supports 8bpc. In the case of eDP
panels this isn't a true assumption - there are panels that can only
do 6bpc.

Banding occurs for these displays.

[How]
Initialize the max_bpc when the connector resets to 8bpc. Also carry
over the value when the state is duplicated.

Bugzilla: https://bugs.freedesktop.org/108825
Fixes: 307638884f72 ("drm/amd/display: Support amdgpu "max bpc" connector property")

Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index ca925200fe092..33b605b259c21 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -3042,6 +3042,7 @@ void amdgpu_dm_connector_funcs_reset(struct drm_connector *connector)
 		state->underscan_enable = false;
 		state->underscan_hborder = 0;
 		state->underscan_vborder = 0;
+		state->max_bpc = 8;
 
 		__drm_atomic_helper_connector_reset(connector, &state->base);
 	}
@@ -3063,6 +3064,7 @@ amdgpu_dm_connector_atomic_duplicate_state(struct drm_connector *connector)
 
 	new_state->freesync_capable = state->freesync_capable;
 	new_state->freesync_enable = state->freesync_enable;
+	new_state->max_bpc = state->max_bpc;
 
 	return &new_state->base;
 }
-- 
GitLab


From c6888879fd55b1ba903c2a770127edbf6aef6f27 Mon Sep 17 00:00:00 2001
From: Roman Li <Roman.Li@amd.com>
Date: Tue, 27 Nov 2018 17:16:37 -0500
Subject: [PATCH 1587/1890] drm/amd/display: Fix 6x4K displays light-up on
 Vega20 (v2)

[Why]
More than 4x4K didn't lightup on Vega20 due to low dcfclk value.
Powerplay expects valid min requirement for dcfclk from DC.

[How]
Update min_dcfclock_khz based on min_engine_clock value.

v2: backport to 4.20 (Alex)

Reviewed-by: Hersen Wu <hersenxs.wu@amd.com>
Reviewed-by: Feifei Xu <Feifei.Xu@amd.com>
Reviewed-by: Evan Quan <evan.quan@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Roman Li <Roman.Li@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
index b459867a05b20..a6bcb90e8419a 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
@@ -2512,6 +2512,8 @@ static void pplib_apply_display_requirements(
 			dc,
 			context->bw.dce.sclk_khz);
 
+	pp_display_cfg->min_dcfclock_khz = pp_display_cfg->min_engine_clock_khz;
+
 	pp_display_cfg->min_engine_clock_deep_sleep_khz
 			= context->bw.dce.sclk_deep_sleep_khz;
 
-- 
GitLab


From 90230968f102acbe103fbf7c03d41addfef5f153 Mon Sep 17 00:00:00 2001
From: Baruch Siach <baruch@tkos.co.il>
Date: Thu, 29 Nov 2018 12:00:05 +0200
Subject: [PATCH 1588/1890] net: phy: sfp: correct location of SFP standards

SFP standards are now available from the SNIA (Storage Networking
Industry Association) website.

Cc: Andrew Lunn <andrew@lunn.ch>
Cc: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sfp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/sfp.h b/include/linux/sfp.h
index d37518e89db2d..d9d9de3fcf8e2 100644
--- a/include/linux/sfp.h
+++ b/include/linux/sfp.h
@@ -224,7 +224,7 @@ struct sfp_eeprom_ext {
  *
  * See the SFF-8472 specification and related documents for the definition
  * of these structure members. This can be obtained from
- * ftp://ftp.seagate.com/sff
+ * https://www.snia.org/technology-communities/sff/specifications
  */
 struct sfp_eeprom_id {
 	struct sfp_eeprom_base base;
-- 
GitLab


From 89d328f637b9904b6d4c9af73c8a608b8dd4d6f8 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 1 Nov 2018 16:17:22 -0700
Subject: [PATCH 1589/1890] pstore/ram: Correctly calculate usable PRZ bytes

The actual number of bytes stored in a PRZ is smaller than the
bytes requested by platform data, since there is a header on each
PRZ. Additionally, if ECC is enabled, there are trailing bytes used
as well. Normally this mismatch doesn't matter since PRZs are circular
buffers and the leading "overflow" bytes are just thrown away. However, in
the case of a compressed record, this rather badly corrupts the results.

This corruption was visible with "ramoops.mem_size=204800 ramoops.ecc=1".
Any stored crashes would not be uncompressable (producing a pstorefs
"dmesg-*.enc.z" file), and triggering errors at boot:

  [    2.790759] pstore: crypto_comp_decompress failed, ret = -22!

Backporting this depends on commit 70ad35db3321 ("pstore: Convert console
write to use ->write_buf")

Reported-by: Joel Fernandes <joel@joelfernandes.org>
Fixes: b0aad7a99c1d ("pstore: Add compression support to pstore")
Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org>
---
 fs/pstore/ram.c        | 15 ++++++---------
 include/linux/pstore.h |  5 ++++-
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 712960e117fea..8646fe6e916fe 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -816,17 +816,14 @@ static int ramoops_probe(struct platform_device *pdev)
 
 	cxt->pstore.data = cxt;
 	/*
-	 * Console can handle any buffer size, so prefer LOG_LINE_MAX. If we
-	 * have to handle dumps, we must have at least record_size buffer. And
-	 * for ftrace, bufsize is irrelevant (if bufsize is 0, buf will be
-	 * ZERO_SIZE_PTR).
+	 * Since bufsize is only used for dmesg crash dumps, it
+	 * must match the size of the dprz record (after PRZ header
+	 * and ECC bytes have been accounted for).
 	 */
-	if (cxt->console_size)
-		cxt->pstore.bufsize = 1024; /* LOG_LINE_MAX */
-	cxt->pstore.bufsize = max(cxt->record_size, cxt->pstore.bufsize);
-	cxt->pstore.buf = kmalloc(cxt->pstore.bufsize, GFP_KERNEL);
+	cxt->pstore.bufsize = cxt->dprzs[0]->buffer_size;
+	cxt->pstore.buf = kzalloc(cxt->pstore.bufsize, GFP_KERNEL);
 	if (!cxt->pstore.buf) {
-		pr_err("cannot allocate pstore buffer\n");
+		pr_err("cannot allocate pstore crash dump buffer\n");
 		err = -ENOMEM;
 		goto fail_clear;
 	}
diff --git a/include/linux/pstore.h b/include/linux/pstore.h
index a15bc4d487528..30fcec375a3af 100644
--- a/include/linux/pstore.h
+++ b/include/linux/pstore.h
@@ -90,7 +90,10 @@ struct pstore_record {
  *
  * @buf_lock:	spinlock to serialize access to @buf
  * @buf:	preallocated crash dump buffer
- * @bufsize:	size of @buf available for crash dump writes
+ * @bufsize:	size of @buf available for crash dump bytes (must match
+ *		smallest number of bytes available for writing to a
+ *		backend entry, since compressed bytes don't take kindly
+ *		to being truncated)
  *
  * @read_mutex:	serializes @open, @read, @close, and @erase callbacks
  * @flags:	bitfield of frontends the backend can accept writes for
-- 
GitLab


From 7bca603a69c0c239654a8f0bcb99e1a60b30040c Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Thu, 29 Nov 2018 12:25:29 +0200
Subject: [PATCH 1590/1890] RDMA/mlx5: Initialize return variable in case
 pagefault was skipped

Pagefaults occurred in non-ODP MR are completely valid events, so
initialize return variable to 0.

Fixes: 4d5422a309de ("IB/mlx5: Skip non-ODP MR when handling a page fault")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/mlx5/odp.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index b711a0f3aa353..2cc3d69ab6f64 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -679,6 +679,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
 				    key);
 			if (bytes_mapped)
 				*bytes_mapped += bcnt;
+			ret = 0;
 			goto srcu_unlock;
 		}
 
-- 
GitLab


From d449ba3d581ed29f751a59792fdc775572c66904 Mon Sep 17 00:00:00 2001
From: Martin Schiller <ms@dev.tdt.de>
Date: Tue, 27 Nov 2018 09:50:27 +0100
Subject: [PATCH 1591/1890] net/x25: fix called/calling length calculation in
 x25_parse_address_block

The length of the called and calling address was not calculated
correctly (BCD encoding).

Signed-off-by: Martin Schiller <ms@dev.tdt.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/af_x25.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index d49aa79b79970..5226a7f43050b 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -100,7 +100,7 @@ int x25_parse_address_block(struct sk_buff *skb,
 	}
 
 	len = *skb->data;
-	needed = 1 + (len >> 4) + (len & 0x0f);
+	needed = 1 + ((len >> 4) + (len & 0x0f) + 1) / 2;
 
 	if (!pskb_may_pull(skb, needed)) {
 		/* packet is too short to hold the addresses it claims
-- 
GitLab


From 06137619f061f498c2924f6543fa45b7d39f0501 Mon Sep 17 00:00:00 2001
From: Martin Schiller <ms@dev.tdt.de>
Date: Tue, 27 Nov 2018 09:50:28 +0100
Subject: [PATCH 1592/1890] net/x25: fix null_x25_address handling

o x25_find_listener(): the compare for the null_x25_address was wrong.
   We have to check the x25_addr of the listener socket instead of the
   x25_addr of the incomming call.

 o x25_bind(): it was not possible to bind a socket to null_x25_address

Signed-off-by: Martin Schiller <ms@dev.tdt.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/af_x25.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 5226a7f43050b..5121729b8b631 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -288,7 +288,7 @@ static struct sock *x25_find_listener(struct x25_address *addr,
 	sk_for_each(s, &x25_list)
 		if ((!strcmp(addr->x25_addr,
 			x25_sk(s)->source_addr.x25_addr) ||
-				!strcmp(addr->x25_addr,
+				!strcmp(x25_sk(s)->source_addr.x25_addr,
 					null_x25_address.x25_addr)) &&
 					s->sk_state == TCP_LISTEN) {
 			/*
@@ -688,11 +688,15 @@ static int x25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 		goto out;
 	}
 
-	len = strlen(addr->sx25_addr.x25_addr);
-	for (i = 0; i < len; i++) {
-		if (!isdigit(addr->sx25_addr.x25_addr[i])) {
-			rc = -EINVAL;
-			goto out;
+	/* check for the null_x25_address */
+	if (strcmp(addr->sx25_addr.x25_addr, null_x25_address.x25_addr)) {
+
+		len = strlen(addr->sx25_addr.x25_addr);
+		for (i = 0; i < len; i++) {
+			if (!isdigit(addr->sx25_addr.x25_addr[i])) {
+				rc = -EINVAL;
+				goto out;
+			}
 		}
 	}
 
-- 
GitLab


From b020fcf6bb4b2d980298c416b3f407075aa2b3b6 Mon Sep 17 00:00:00 2001
From: Martin Schiller <ms@dev.tdt.de>
Date: Tue, 27 Nov 2018 09:50:29 +0100
Subject: [PATCH 1593/1890] net/x25: handle call collisions

If a session in X25_STATE_1 (Awaiting Call Accept) receives a call
request, the session will be closed (x25_disconnect), cause=0x01
(Number Busy) and diag=0x48 (Call Collision) will be set and a clear
request will be send.

Signed-off-by: Martin Schiller <ms@dev.tdt.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/x25_in.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c
index 3c12cae32001d..afb26221d8a8f 100644
--- a/net/x25/x25_in.c
+++ b/net/x25/x25_in.c
@@ -142,6 +142,15 @@ static int x25_state1_machine(struct sock *sk, struct sk_buff *skb, int frametyp
 			sk->sk_state_change(sk);
 		break;
 	}
+	case X25_CALL_REQUEST:
+		/* call collision */
+		x25->causediag.cause      = 0x01;
+		x25->causediag.diagnostic = 0x48;
+
+		x25_write_internal(sk, X25_CLEAR_REQUEST);
+		x25_disconnect(sk, EISCONN, 0x01, 0x48);
+		break;
+
 	case X25_CLEAR_REQUEST:
 		if (!pskb_may_pull(skb, X25_STD_MIN_LEN + 2))
 			goto out_clear;
-- 
GitLab


From 9410d386d0a829ace9558336263086c2fbbe8aed Mon Sep 17 00:00:00 2001
From: Christoph Paasch <cpaasch@apple.com>
Date: Thu, 29 Nov 2018 16:01:04 -0800
Subject: [PATCH 1594/1890] net: Prevent invalid access to skb->prev in
 __qdisc_drop_all

__qdisc_drop_all() accesses skb->prev to get to the tail of the
segment-list.

With commit 68d2f84a1368 ("net: gro: properly remove skb from list")
the skb-list handling has been changed to set skb->next to NULL and set
the list-poison on skb->prev.

With that change, __qdisc_drop_all() will panic when it tries to
dereference skb->prev.

Since commit 992cba7e276d ("net: Add and use skb_list_del_init().")
__list_del_entry is used, leaving skb->prev unchanged (thus,
pointing to the list-head if it's the first skb of the list).
This will make __qdisc_drop_all modify the next-pointer of the list-head
and result in a panic later on:

[   34.501053] general protection fault: 0000 [#1] SMP KASAN PTI
[   34.501968] CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.20.0-rc2.mptcp #108
[   34.502887] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.5.1 01/01/2011
[   34.504074] RIP: 0010:dev_gro_receive+0x343/0x1f90
[   34.504751] Code: e0 48 c1 e8 03 42 80 3c 30 00 0f 85 4a 1c 00 00 4d 8b 24 24 4c 39 65 d0 0f 84 0a 04 00 00 49 8d 7c 24 38 48 89 f8 48 c1 e8 03 <42> 0f b6 04 30 84 c0 74 08 3c 04
[   34.507060] RSP: 0018:ffff8883af507930 EFLAGS: 00010202
[   34.507761] RAX: 0000000000000007 RBX: ffff8883970b2c80 RCX: 1ffff11072e165a6
[   34.508640] RDX: 1ffff11075867008 RSI: ffff8883ac338040 RDI: 0000000000000038
[   34.509493] RBP: ffff8883af5079d0 R08: ffff8883970b2d40 R09: 0000000000000062
[   34.510346] R10: 0000000000000034 R11: 0000000000000000 R12: 0000000000000000
[   34.511215] R13: 0000000000000000 R14: dffffc0000000000 R15: ffff8883ac338008
[   34.512082] FS:  0000000000000000(0000) GS:ffff8883af500000(0000) knlGS:0000000000000000
[   34.513036] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   34.513741] CR2: 000055ccc3e9d020 CR3: 00000003abf32000 CR4: 00000000000006e0
[   34.514593] Call Trace:
[   34.514893]  <IRQ>
[   34.515157]  napi_gro_receive+0x93/0x150
[   34.515632]  receive_buf+0x893/0x3700
[   34.516094]  ? __netif_receive_skb+0x1f/0x1a0
[   34.516629]  ? virtnet_probe+0x1b40/0x1b40
[   34.517153]  ? __stable_node_chain+0x4d0/0x850
[   34.517684]  ? kfree+0x9a/0x180
[   34.518067]  ? __kasan_slab_free+0x171/0x190
[   34.518582]  ? detach_buf+0x1df/0x650
[   34.519061]  ? lapic_next_event+0x5a/0x90
[   34.519539]  ? virtqueue_get_buf_ctx+0x280/0x7f0
[   34.520093]  virtnet_poll+0x2df/0xd60
[   34.520533]  ? receive_buf+0x3700/0x3700
[   34.521027]  ? qdisc_watchdog_schedule_ns+0xd5/0x140
[   34.521631]  ? htb_dequeue+0x1817/0x25f0
[   34.522107]  ? sch_direct_xmit+0x142/0xf30
[   34.522595]  ? virtqueue_napi_schedule+0x26/0x30
[   34.523155]  net_rx_action+0x2f6/0xc50
[   34.523601]  ? napi_complete_done+0x2f0/0x2f0
[   34.524126]  ? kasan_check_read+0x11/0x20
[   34.524608]  ? _raw_spin_lock+0x7d/0xd0
[   34.525070]  ? _raw_spin_lock_bh+0xd0/0xd0
[   34.525563]  ? kvm_guest_apic_eoi_write+0x6b/0x80
[   34.526130]  ? apic_ack_irq+0x9e/0xe0
[   34.526567]  __do_softirq+0x188/0x4b5
[   34.527015]  irq_exit+0x151/0x180
[   34.527417]  do_IRQ+0xdb/0x150
[   34.527783]  common_interrupt+0xf/0xf
[   34.528223]  </IRQ>

This patch makes sure that skb->prev is set to NULL when entering
netem_enqueue.

Cc: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
Cc: Tyler Hicks <tyhicks@canonical.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Fixes: 68d2f84a1368 ("net: gro: properly remove skb from list")
Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_netem.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 2c38e3d079246..22cd46a600576 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -431,6 +431,9 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 	int count = 1;
 	int rc = NET_XMIT_SUCCESS;
 
+	/* Do not fool qdisc_drop_all() */
+	skb->prev = NULL;
+
 	/* Random duplication */
 	if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
 		++count;
-- 
GitLab


From ae3b7361dc0ee9a425bf7d77ce211f533500b39b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 13 Nov 2018 23:20:21 +0000
Subject: [PATCH 1595/1890] afs: Fix validation/callback interaction

When afs_validate() is called to validate a vnode (inode), there are two
unhandled cases in the fastpath at the top of the function:

 (1) If the vnode is promised (AFS_VNODE_CB_PROMISED is set), the break
     counters match and the data has expired, then there's an implicit case
     in which the vnode needs revalidating.

     This has no consequences since the default "valid = false" set at the
     top of the function happens to do the right thing.

 (2) If the vnode is not promised and it hasn't been deleted
     (AFS_VNODE_DELETED is not set) then there's a default case we're not
     handling in which the vnode is invalid.  If the vnode is invalid, we
     need to bring cb_s_break and cb_v_break up to date before we refetch
     the status.

     As a consequence, once the server loses track of the client
     (ie. sufficient time has passed since we last sent it an operation),
     it will send us a CB.InitCallBackState* operation when we next try to
     talk to it.  This calls afs_init_callback_state() which increments
     afs_server::cb_s_break, but this then doesn't propagate to the
     afs_vnode record.

     The result being that every afs_validate() call thereafter sends a
     status fetch operation to the server.

Clarify and fix this by:

 (A) Setting valid in all the branches rather than initialising it at the
     top so that the compiler catches where we've missed.

 (B) Restructuring the logic in the 'promised' branch so that we set valid
     to false if the callback is due to expire (or has expired) and so that
     the final case is that the vnode is still valid.

 (C) Adding an else-statement that ups cb_s_break and cb_v_break if the
     promised and deleted cases don't match.

Fixes: c435ee34551e ("afs: Overhaul the callback handling")
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/afs/inode.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 4c6d8e1112c2b..6b17d36204142 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -382,7 +382,7 @@ void afs_zap_data(struct afs_vnode *vnode)
 int afs_validate(struct afs_vnode *vnode, struct key *key)
 {
 	time64_t now = ktime_get_real_seconds();
-	bool valid = false;
+	bool valid;
 	int ret;
 
 	_enter("{v={%llx:%llu} fl=%lx},%x",
@@ -402,15 +402,21 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
 			vnode->cb_v_break = vnode->volume->cb_v_break;
 			valid = false;
 		} else if (vnode->status.type == AFS_FTYPE_DIR &&
-			   test_bit(AFS_VNODE_DIR_VALID, &vnode->flags) &&
-			   vnode->cb_expires_at - 10 > now) {
-			valid = true;
-		} else if (!test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags) &&
-			   vnode->cb_expires_at - 10 > now) {
+			   (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags) ||
+			    vnode->cb_expires_at - 10 <= now)) {
+			valid = false;
+		} else if (test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags) ||
+			   vnode->cb_expires_at - 10 <= now) {
+			valid = false;
+		} else {
 			valid = true;
 		}
 	} else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) {
 		valid = true;
+	} else {
+		vnode->cb_s_break = vnode->cb_interest->server->cb_s_break;
+		vnode->cb_v_break = vnode->volume->cb_v_break;
+		valid = false;
 	}
 
 	read_sequnlock_excl(&vnode->cb_lock);
-- 
GitLab


From 4584ae96ae307613625e80cb9c7d9a981bed47a7 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 13 Nov 2018 23:20:28 +0000
Subject: [PATCH 1596/1890] afs: Fix missing net error handling

kAFS can be given certain network errors (EADDRNOTAVAIL, EHOSTDOWN and
ERFKILL) that it doesn't handle in its server/address rotation algorithms.
They cause the probing and rotation to abort immediately rather than
rotating.

Fix this by:

 (1) Abstracting out the error prioritisation from the VL and FS rotation
     algorithms into a common function and expand usage into the server
     probing code.

     When multiple errors are available, this code selects the one we'd
     prefer to return.

 (2) Add handling for EADDRNOTAVAIL, EHOSTDOWN and ERFKILL.

Fixes: 0fafdc9f888b ("afs: Fix file locking")
Fixes: 0338747d8454 ("afs: Probe multiple fileservers simultaneously")
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/afs/fs_probe.c  | 39 +++++++++++++++++++++-------------
 fs/afs/internal.h  |  9 ++++++++
 fs/afs/misc.c      | 52 +++++++++++++++++++++++++++++++++++++++++++++
 fs/afs/rotate.c    | 53 ++++++++++++----------------------------------
 fs/afs/vl_probe.c  | 45 +++++++++++++++++++++++----------------
 fs/afs/vl_rotate.c | 50 +++++++++----------------------------------
 6 files changed, 135 insertions(+), 113 deletions(-)

diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c
index d049cb4597425..fde6b4d4121e3 100644
--- a/fs/afs/fs_probe.c
+++ b/fs/afs/fs_probe.c
@@ -61,8 +61,11 @@ void afs_fileserver_probe_result(struct afs_call *call)
 		afs_io_error(call, afs_io_error_fs_probe_fail);
 		goto out;
 	case -ECONNRESET: /* Responded, but call expired. */
+	case -ERFKILL:
+	case -EADDRNOTAVAIL:
 	case -ENETUNREACH:
 	case -EHOSTUNREACH:
+	case -EHOSTDOWN:
 	case -ECONNREFUSED:
 	case -ETIMEDOUT:
 	case -ETIME:
@@ -132,12 +135,14 @@ void afs_fileserver_probe_result(struct afs_call *call)
 static int afs_do_probe_fileserver(struct afs_net *net,
 				   struct afs_server *server,
 				   struct key *key,
-				   unsigned int server_index)
+				   unsigned int server_index,
+				   struct afs_error *_e)
 {
 	struct afs_addr_cursor ac = {
 		.index = 0,
 	};
-	int ret;
+	bool in_progress = false;
+	int err;
 
 	_enter("%pU", &server->uuid);
 
@@ -151,15 +156,17 @@ static int afs_do_probe_fileserver(struct afs_net *net,
 	server->probe.rtt = UINT_MAX;
 
 	for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++) {
-		ret = afs_fs_get_capabilities(net, server, &ac, key, server_index,
+		err = afs_fs_get_capabilities(net, server, &ac, key, server_index,
 					      true);
-		if (ret != -EINPROGRESS) {
-			afs_fs_probe_done(server);
-			return ret;
-		}
+		if (err == -EINPROGRESS)
+			in_progress = true;
+		else
+			afs_prioritise_error(_e, err, ac.abort_code);
 	}
 
-	return 0;
+	if (!in_progress)
+		afs_fs_probe_done(server);
+	return in_progress;
 }
 
 /*
@@ -169,21 +176,23 @@ int afs_probe_fileservers(struct afs_net *net, struct key *key,
 			  struct afs_server_list *list)
 {
 	struct afs_server *server;
-	int i, ret;
+	struct afs_error e;
+	bool in_progress = false;
+	int i;
 
+	e.error = 0;
+	e.responded = false;
 	for (i = 0; i < list->nr_servers; i++) {
 		server = list->servers[i].server;
 		if (test_bit(AFS_SERVER_FL_PROBED, &server->flags))
 			continue;
 
-		if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &server->flags)) {
-			ret = afs_do_probe_fileserver(net, server, key, i);
-			if (ret)
-				return ret;
-		}
+		if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &server->flags) &&
+		    afs_do_probe_fileserver(net, server, key, i, &e))
+			in_progress = true;
 	}
 
-	return 0;
+	return in_progress ? 0 : e.error;
 }
 
 /*
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 5da3b09b75186..8871b9e8645f1 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -695,6 +695,14 @@ struct afs_interface {
 	unsigned	mtu;		/* MTU of interface */
 };
 
+/*
+ * Error prioritisation and accumulation.
+ */
+struct afs_error {
+	short	error;			/* Accumulated error */
+	bool	responded;		/* T if server responded */
+};
+
 /*
  * Cursor for iterating over a server's address list.
  */
@@ -1015,6 +1023,7 @@ static inline void __afs_stat(atomic_t *s)
  * misc.c
  */
 extern int afs_abort_to_error(u32);
+extern void afs_prioritise_error(struct afs_error *, int, u32);
 
 /*
  * mntpt.c
diff --git a/fs/afs/misc.c b/fs/afs/misc.c
index 700a5fa7f4ece..bbb1fd51b019e 100644
--- a/fs/afs/misc.c
+++ b/fs/afs/misc.c
@@ -118,3 +118,55 @@ int afs_abort_to_error(u32 abort_code)
 	default:		return -EREMOTEIO;
 	}
 }
+
+/*
+ * Select the error to report from a set of errors.
+ */
+void afs_prioritise_error(struct afs_error *e, int error, u32 abort_code)
+{
+	switch (error) {
+	case 0:
+		return;
+	default:
+		if (e->error == -ETIMEDOUT ||
+		    e->error == -ETIME)
+			return;
+	case -ETIMEDOUT:
+	case -ETIME:
+		if (e->error == -ENOMEM ||
+		    e->error == -ENONET)
+			return;
+	case -ENOMEM:
+	case -ENONET:
+		if (e->error == -ERFKILL)
+			return;
+	case -ERFKILL:
+		if (e->error == -EADDRNOTAVAIL)
+			return;
+	case -EADDRNOTAVAIL:
+		if (e->error == -ENETUNREACH)
+			return;
+	case -ENETUNREACH:
+		if (e->error == -EHOSTUNREACH)
+			return;
+	case -EHOSTUNREACH:
+		if (e->error == -EHOSTDOWN)
+			return;
+	case -EHOSTDOWN:
+		if (e->error == -ECONNREFUSED)
+			return;
+	case -ECONNREFUSED:
+		if (e->error == -ECONNRESET)
+			return;
+	case -ECONNRESET: /* Responded, but call expired. */
+		if (e->responded)
+			return;
+		e->error = error;
+		return;
+
+	case -ECONNABORTED:
+		e->responded = true;
+		e->error = afs_abort_to_error(abort_code);
+		return;
+	}
+}
diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c
index 00504254c1c24..c3ae324781f84 100644
--- a/fs/afs/rotate.c
+++ b/fs/afs/rotate.c
@@ -136,7 +136,8 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
 	struct afs_addr_list *alist;
 	struct afs_server *server;
 	struct afs_vnode *vnode = fc->vnode;
-	u32 rtt, abort_code;
+	struct afs_error e;
+	u32 rtt;
 	int error = fc->ac.error, i;
 
 	_enter("%lx[%d],%lx[%d],%d,%d",
@@ -306,8 +307,11 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
 		if (fc->error != -EDESTADDRREQ)
 			goto iterate_address;
 		/* Fall through */
+	case -ERFKILL:
+	case -EADDRNOTAVAIL:
 	case -ENETUNREACH:
 	case -EHOSTUNREACH:
+	case -EHOSTDOWN:
 	case -ECONNREFUSED:
 		_debug("no conn");
 		fc->error = error;
@@ -446,50 +450,15 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc)
 	if (fc->flags & AFS_FS_CURSOR_VBUSY)
 		goto restart_from_beginning;
 
-	abort_code = 0;
-	error = -EDESTADDRREQ;
+	e.error = -EDESTADDRREQ;
+	e.responded = false;
 	for (i = 0; i < fc->server_list->nr_servers; i++) {
 		struct afs_server *s = fc->server_list->servers[i].server;
-		int probe_error = READ_ONCE(s->probe.error);
 
-		switch (probe_error) {
-		case 0:
-			continue;
-		default:
-			if (error == -ETIMEDOUT ||
-			    error == -ETIME)
-				continue;
-		case -ETIMEDOUT:
-		case -ETIME:
-			if (error == -ENOMEM ||
-			    error == -ENONET)
-				continue;
-		case -ENOMEM:
-		case -ENONET:
-			if (error == -ENETUNREACH)
-				continue;
-		case -ENETUNREACH:
-			if (error == -EHOSTUNREACH)
-				continue;
-		case -EHOSTUNREACH:
-			if (error == -ECONNREFUSED)
-				continue;
-		case -ECONNREFUSED:
-			if (error == -ECONNRESET)
-				continue;
-		case -ECONNRESET: /* Responded, but call expired. */
-			if (error == -ECONNABORTED)
-				continue;
-		case -ECONNABORTED:
-			abort_code = s->probe.abort_code;
-			error = probe_error;
-			continue;
-		}
+		afs_prioritise_error(&e, READ_ONCE(s->probe.error),
+				     s->probe.abort_code);
 	}
 
-	if (error == -ECONNABORTED)
-		error = afs_abort_to_error(abort_code);
-
 failed_set_error:
 	fc->error = error;
 failed:
@@ -553,8 +522,11 @@ bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
 		_leave(" = f [abort]");
 		return false;
 
+	case -ERFKILL:
+	case -EADDRNOTAVAIL:
 	case -ENETUNREACH:
 	case -EHOSTUNREACH:
+	case -EHOSTDOWN:
 	case -ECONNREFUSED:
 	case -ETIMEDOUT:
 	case -ETIME:
@@ -633,6 +605,7 @@ int afs_end_vnode_operation(struct afs_fs_cursor *fc)
 	struct afs_net *net = afs_v2net(fc->vnode);
 
 	if (fc->error == -EDESTADDRREQ ||
+	    fc->error == -EADDRNOTAVAIL ||
 	    fc->error == -ENETUNREACH ||
 	    fc->error == -EHOSTUNREACH)
 		afs_dump_edestaddrreq(fc);
diff --git a/fs/afs/vl_probe.c b/fs/afs/vl_probe.c
index c0f616bd70cba..f0b032976487c 100644
--- a/fs/afs/vl_probe.c
+++ b/fs/afs/vl_probe.c
@@ -61,8 +61,11 @@ void afs_vlserver_probe_result(struct afs_call *call)
 		afs_io_error(call, afs_io_error_vl_probe_fail);
 		goto out;
 	case -ECONNRESET: /* Responded, but call expired. */
+	case -ERFKILL:
+	case -EADDRNOTAVAIL:
 	case -ENETUNREACH:
 	case -EHOSTUNREACH:
+	case -EHOSTDOWN:
 	case -ECONNREFUSED:
 	case -ETIMEDOUT:
 	case -ETIME:
@@ -129,15 +132,17 @@ void afs_vlserver_probe_result(struct afs_call *call)
  * Probe all of a vlserver's addresses to find out the best route and to
  * query its capabilities.
  */
-static int afs_do_probe_vlserver(struct afs_net *net,
-				 struct afs_vlserver *server,
-				 struct key *key,
-				 unsigned int server_index)
+static bool afs_do_probe_vlserver(struct afs_net *net,
+				  struct afs_vlserver *server,
+				  struct key *key,
+				  unsigned int server_index,
+				  struct afs_error *_e)
 {
 	struct afs_addr_cursor ac = {
 		.index = 0,
 	};
-	int ret;
+	bool in_progress = false;
+	int err;
 
 	_enter("%s", server->name);
 
@@ -151,15 +156,17 @@ static int afs_do_probe_vlserver(struct afs_net *net,
 	server->probe.rtt = UINT_MAX;
 
 	for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++) {
-		ret = afs_vl_get_capabilities(net, &ac, key, server,
+		err = afs_vl_get_capabilities(net, &ac, key, server,
 					      server_index, true);
-		if (ret != -EINPROGRESS) {
-			afs_vl_probe_done(server);
-			return ret;
-		}
+		if (err == -EINPROGRESS)
+			in_progress = true;
+		else
+			afs_prioritise_error(_e, err, ac.abort_code);
 	}
 
-	return 0;
+	if (!in_progress)
+		afs_vl_probe_done(server);
+	return in_progress;
 }
 
 /*
@@ -169,21 +176,23 @@ int afs_send_vl_probes(struct afs_net *net, struct key *key,
 		       struct afs_vlserver_list *vllist)
 {
 	struct afs_vlserver *server;
-	int i, ret;
+	struct afs_error e;
+	bool in_progress = false;
+	int i;
 
+	e.error = 0;
+	e.responded = false;
 	for (i = 0; i < vllist->nr_servers; i++) {
 		server = vllist->servers[i].server;
 		if (test_bit(AFS_VLSERVER_FL_PROBED, &server->flags))
 			continue;
 
-		if (!test_and_set_bit_lock(AFS_VLSERVER_FL_PROBING, &server->flags)) {
-			ret = afs_do_probe_vlserver(net, server, key, i);
-			if (ret)
-				return ret;
-		}
+		if (!test_and_set_bit_lock(AFS_VLSERVER_FL_PROBING, &server->flags) &&
+		    afs_do_probe_vlserver(net, server, key, i, &e))
+			in_progress = true;
 	}
 
-	return 0;
+	return in_progress ? 0 : e.error;
 }
 
 /*
diff --git a/fs/afs/vl_rotate.c b/fs/afs/vl_rotate.c
index b64a284b99d27..7adde83a06482 100644
--- a/fs/afs/vl_rotate.c
+++ b/fs/afs/vl_rotate.c
@@ -71,8 +71,9 @@ bool afs_select_vlserver(struct afs_vl_cursor *vc)
 {
 	struct afs_addr_list *alist;
 	struct afs_vlserver *vlserver;
+	struct afs_error e;
 	u32 rtt;
-	int error = vc->ac.error, abort_code, i;
+	int error = vc->ac.error, i;
 
 	_enter("%lx[%d],%lx[%d],%d,%d",
 	       vc->untried, vc->index,
@@ -119,8 +120,11 @@ bool afs_select_vlserver(struct afs_vl_cursor *vc)
 			goto failed;
 		}
 
+	case -ERFKILL:
+	case -EADDRNOTAVAIL:
 	case -ENETUNREACH:
 	case -EHOSTUNREACH:
+	case -EHOSTDOWN:
 	case -ECONNREFUSED:
 	case -ETIMEDOUT:
 	case -ETIME:
@@ -235,50 +239,15 @@ bool afs_select_vlserver(struct afs_vl_cursor *vc)
 	if (vc->flags & AFS_VL_CURSOR_RETRY)
 		goto restart_from_beginning;
 
-	abort_code = 0;
-	error = -EDESTADDRREQ;
+	e.error = -EDESTADDRREQ;
+	e.responded = false;
 	for (i = 0; i < vc->server_list->nr_servers; i++) {
 		struct afs_vlserver *s = vc->server_list->servers[i].server;
-		int probe_error = READ_ONCE(s->probe.error);
 
-		switch (probe_error) {
-		case 0:
-			continue;
-		default:
-			if (error == -ETIMEDOUT ||
-			    error == -ETIME)
-				continue;
-		case -ETIMEDOUT:
-		case -ETIME:
-			if (error == -ENOMEM ||
-			    error == -ENONET)
-				continue;
-		case -ENOMEM:
-		case -ENONET:
-			if (error == -ENETUNREACH)
-				continue;
-		case -ENETUNREACH:
-			if (error == -EHOSTUNREACH)
-				continue;
-		case -EHOSTUNREACH:
-			if (error == -ECONNREFUSED)
-				continue;
-		case -ECONNREFUSED:
-			if (error == -ECONNRESET)
-				continue;
-		case -ECONNRESET: /* Responded, but call expired. */
-			if (error == -ECONNABORTED)
-				continue;
-		case -ECONNABORTED:
-			abort_code = s->probe.abort_code;
-			error = probe_error;
-			continue;
-		}
+		afs_prioritise_error(&e, READ_ONCE(s->probe.error),
+				     s->probe.abort_code);
 	}
 
-	if (error == -ECONNABORTED)
-		error = afs_abort_to_error(abort_code);
-
 failed_set_error:
 	vc->error = error;
 failed:
@@ -341,6 +310,7 @@ int afs_end_vlserver_operation(struct afs_vl_cursor *vc)
 	struct afs_net *net = vc->cell->net;
 
 	if (vc->error == -EDESTADDRREQ ||
+	    vc->error == -EADDRNOTAVAIL ||
 	    vc->error == -ENETUNREACH ||
 	    vc->error == -EHOSTUNREACH)
 		afs_vl_dump_edestaddrreq(vc);
-- 
GitLab


From 73116df7bb90435ccb2817f44113295240d15034 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 13 Nov 2018 23:20:35 +0000
Subject: [PATCH 1597/1890] afs: Use d_instantiate() rather than d_add() and
 don't d_drop()

Use d_instantiate() rather than d_add() and don't d_drop() in
afs_vnode_new_inode().  The dentry shouldn't be removed as it's not
changing its name.

Reported-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/afs/dir.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 43dea3b00c29b..8a2562e3a3163 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -1075,8 +1075,6 @@ static void afs_vnode_new_inode(struct afs_fs_cursor *fc,
 	if (fc->ac.error < 0)
 		return;
 
-	d_drop(new_dentry);
-
 	inode = afs_iget(fc->vnode->vfs_inode.i_sb, fc->key,
 			 newfid, newstatus, newcb, fc->cbi);
 	if (IS_ERR(inode)) {
@@ -1090,7 +1088,7 @@ static void afs_vnode_new_inode(struct afs_fs_cursor *fc,
 	vnode = AFS_FS_I(inode);
 	set_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
 	afs_vnode_commit_status(fc, vnode, 0);
-	d_add(new_dentry, inode);
+	d_instantiate(new_dentry, inode);
 }
 
 /*
-- 
GitLab


From 0c7a52e4d4b5c4d35b31f3c3ad32af814f1bf491 Mon Sep 17 00:00:00 2001
From: Zenghui Yu <yuzenghui@huawei.com>
Date: Wed, 28 Nov 2018 03:35:23 +0000
Subject: [PATCH 1598/1890] tracepoint: Use __idx instead of idx in DO_TRACE
 macro to make it unique

After enabling KVM event tracing, almost all of trace_kvm_exit()'s
printk shows

	"kvm_exit: IRQ: ..."

even if the actual exception_type is NOT IRQ.  More specifically,
trace_kvm_exit() is defined in virt/kvm/arm/trace.h by TRACE_EVENT.

This slight problem may have existed after commit e6753f23d961
("tracepoint: Make rcuidle tracepoint callers use SRCU"). There are
two variables in trace_kvm_exit() and __DO_TRACE() which have the
same name, *idx*. Thus the actual value of *idx* will be overwritten
when tracing. Fix it by adding a simple prefix.

Cc: Joel Fernandes <joel@joelfernandes.org>
Cc: Wang Haibin <wanghaibin.wang@huawei.com>
Cc: linux-trace-devel@vger.kernel.org
Cc: stable@vger.kernel.org
Fixes: e6753f23d961 ("tracepoint: Make rcuidle tracepoint callers use SRCU")
Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Zenghui Yu <yuzenghui@huawei.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/tracepoint.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 538ba1a58f5b2..e9de8ad0bad74 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -166,7 +166,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 		struct tracepoint_func *it_func_ptr;			\
 		void *it_func;						\
 		void *__data;						\
-		int __maybe_unused idx = 0;				\
+		int __maybe_unused __idx = 0;				\
 									\
 		if (!(cond))						\
 			return;						\
@@ -182,7 +182,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 		 * doesn't work from the idle path.			\
 		 */							\
 		if (rcuidle) {						\
-			idx = srcu_read_lock_notrace(&tracepoint_srcu);	\
+			__idx = srcu_read_lock_notrace(&tracepoint_srcu);\
 			rcu_irq_enter_irqson();				\
 		}							\
 									\
@@ -198,7 +198,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 									\
 		if (rcuidle) {						\
 			rcu_irq_exit_irqson();				\
-			srcu_read_unlock_notrace(&tracepoint_srcu, idx);\
+			srcu_read_unlock_notrace(&tracepoint_srcu, __idx);\
 		}							\
 									\
 		preempt_enable_notrace();				\
-- 
GitLab


From 5cf99a0f3161bc3ae2391269d134d6bf7e26f00e Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Thu, 29 Nov 2018 08:50:27 -0500
Subject: [PATCH 1599/1890] tracing/fgraph: Fix set_graph_function from showing
 interrupts

The tracefs file set_graph_function is used to only function graph functions
that are listed in that file (or all functions if the file is empty). The
way this is implemented is that the function graph tracer looks at every
function, and if the current depth is zero and the function matches
something in the file then it will trace that function. When other functions
are called, the depth will be greater than zero (because the original
function will be at depth zero), and all functions will be traced where the
depth is greater than zero.

The issue is that when a function is first entered, and the handler that
checks this logic is called, the depth is set to zero. If an interrupt comes
in and a function in the interrupt handler is traced, its depth will be
greater than zero and it will automatically be traced, even if the original
function was not. But because the logic only looks at depth it may trace
interrupts when it should not be.

The recent design change of the function graph tracer to fix other bugs
caused the depth to be zero while the function graph callback handler is
being called for a longer time, widening the race of this happening. This
bug was actually there for a longer time, but because the race window was so
small it seldom happened. The Fixes tag below is for the commit that widen
the race window, because that commit belongs to a series that will also help
fix the original bug.

Cc: stable@kernel.org
Fixes: 39eb456dacb5 ("function_graph: Use new curr_ret_depth to manage depth instead of curr_ret_stack")
Reported-by: Joe Lawrence <joe.lawrence@redhat.com>
Tested-by: Joe Lawrence <joe.lawrence@redhat.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace.h                 | 57 ++++++++++++++++++++++++++--
 kernel/trace/trace_functions_graph.c |  4 ++
 kernel/trace/trace_irqsoff.c         |  2 +
 kernel/trace/trace_sched_wakeup.c    |  2 +
 4 files changed, 62 insertions(+), 3 deletions(-)

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 3b8c0e24ab306..447bd96ee658a 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -512,12 +512,44 @@ enum {
  * can only be modified by current, we can reuse trace_recursion.
  */
 	TRACE_IRQ_BIT,
+
+	/* Set if the function is in the set_graph_function file */
+	TRACE_GRAPH_BIT,
+
+	/*
+	 * In the very unlikely case that an interrupt came in
+	 * at a start of graph tracing, and we want to trace
+	 * the function in that interrupt, the depth can be greater
+	 * than zero, because of the preempted start of a previous
+	 * trace. In an even more unlikely case, depth could be 2
+	 * if a softirq interrupted the start of graph tracing,
+	 * followed by an interrupt preempting a start of graph
+	 * tracing in the softirq, and depth can even be 3
+	 * if an NMI came in at the start of an interrupt function
+	 * that preempted a softirq start of a function that
+	 * preempted normal context!!!! Luckily, it can't be
+	 * greater than 3, so the next two bits are a mask
+	 * of what the depth is when we set TRACE_GRAPH_BIT
+	 */
+
+	TRACE_GRAPH_DEPTH_START_BIT,
+	TRACE_GRAPH_DEPTH_END_BIT,
 };
 
 #define trace_recursion_set(bit)	do { (current)->trace_recursion |= (1<<(bit)); } while (0)
 #define trace_recursion_clear(bit)	do { (current)->trace_recursion &= ~(1<<(bit)); } while (0)
 #define trace_recursion_test(bit)	((current)->trace_recursion & (1<<(bit)))
 
+#define trace_recursion_depth() \
+	(((current)->trace_recursion >> TRACE_GRAPH_DEPTH_START_BIT) & 3)
+#define trace_recursion_set_depth(depth) \
+	do {								\
+		current->trace_recursion &=				\
+			~(3 << TRACE_GRAPH_DEPTH_START_BIT);		\
+		current->trace_recursion |=				\
+			((depth) & 3) << TRACE_GRAPH_DEPTH_START_BIT;	\
+	} while (0)
+
 #define TRACE_CONTEXT_BITS	4
 
 #define TRACE_FTRACE_START	TRACE_FTRACE_BIT
@@ -843,8 +875,9 @@ extern void __trace_graph_return(struct trace_array *tr,
 extern struct ftrace_hash *ftrace_graph_hash;
 extern struct ftrace_hash *ftrace_graph_notrace_hash;
 
-static inline int ftrace_graph_addr(unsigned long addr)
+static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace)
 {
+	unsigned long addr = trace->func;
 	int ret = 0;
 
 	preempt_disable_notrace();
@@ -855,6 +888,14 @@ static inline int ftrace_graph_addr(unsigned long addr)
 	}
 
 	if (ftrace_lookup_ip(ftrace_graph_hash, addr)) {
+
+		/*
+		 * This needs to be cleared on the return functions
+		 * when the depth is zero.
+		 */
+		trace_recursion_set(TRACE_GRAPH_BIT);
+		trace_recursion_set_depth(trace->depth);
+
 		/*
 		 * If no irqs are to be traced, but a set_graph_function
 		 * is set, and called by an interrupt handler, we still
@@ -872,6 +913,13 @@ static inline int ftrace_graph_addr(unsigned long addr)
 	return ret;
 }
 
+static inline void ftrace_graph_addr_finish(struct ftrace_graph_ret *trace)
+{
+	if (trace_recursion_test(TRACE_GRAPH_BIT) &&
+	    trace->depth == trace_recursion_depth())
+		trace_recursion_clear(TRACE_GRAPH_BIT);
+}
+
 static inline int ftrace_graph_notrace_addr(unsigned long addr)
 {
 	int ret = 0;
@@ -885,7 +933,7 @@ static inline int ftrace_graph_notrace_addr(unsigned long addr)
 	return ret;
 }
 #else
-static inline int ftrace_graph_addr(unsigned long addr)
+static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace)
 {
 	return 1;
 }
@@ -894,6 +942,8 @@ static inline int ftrace_graph_notrace_addr(unsigned long addr)
 {
 	return 0;
 }
+static inline void ftrace_graph_addr_finish(struct ftrace_graph_ret *trace)
+{ }
 #endif /* CONFIG_DYNAMIC_FTRACE */
 
 extern unsigned int fgraph_max_depth;
@@ -901,7 +951,8 @@ extern unsigned int fgraph_max_depth;
 static inline bool ftrace_graph_ignore_func(struct ftrace_graph_ent *trace)
 {
 	/* trace it when it is-nested-in or is a function enabled. */
-	return !(trace->depth || ftrace_graph_addr(trace->func)) ||
+	return !(trace_recursion_test(TRACE_GRAPH_BIT) ||
+		 ftrace_graph_addr(trace)) ||
 		(trace->depth < 0) ||
 		(fgraph_max_depth && trace->depth >= fgraph_max_depth);
 }
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 2561460d7baf8..086af4f5c3e84 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -509,6 +509,8 @@ void trace_graph_return(struct ftrace_graph_ret *trace)
 	int cpu;
 	int pc;
 
+	ftrace_graph_addr_finish(trace);
+
 	local_irq_save(flags);
 	cpu = raw_smp_processor_id();
 	data = per_cpu_ptr(tr->trace_buffer.data, cpu);
@@ -532,6 +534,8 @@ void set_graph_array(struct trace_array *tr)
 
 static void trace_graph_thresh_return(struct ftrace_graph_ret *trace)
 {
+	ftrace_graph_addr_finish(trace);
+
 	if (tracing_thresh &&
 	    (trace->rettime - trace->calltime < tracing_thresh))
 		return;
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index b7357f9f82a35..98ea6d28df15d 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -208,6 +208,8 @@ static void irqsoff_graph_return(struct ftrace_graph_ret *trace)
 	unsigned long flags;
 	int pc;
 
+	ftrace_graph_addr_finish(trace);
+
 	if (!func_prolog_dec(tr, &data, &flags))
 		return;
 
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index a86b303e6c67d..7d04b98907551 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -270,6 +270,8 @@ static void wakeup_graph_return(struct ftrace_graph_ret *trace)
 	unsigned long flags;
 	int pc;
 
+	ftrace_graph_addr_finish(trace);
+
 	if (!func_prolog_preempt_disable(tr, &data, &pc))
 		return;
 
-- 
GitLab


From b84a64fad40637b1c9fa4f4dbf847a23e29e672b Mon Sep 17 00:00:00 2001
From: Eric Snowberg <eric.snowberg@oracle.com>
Date: Thu, 29 Nov 2018 18:12:20 +0100
Subject: [PATCH 1600/1890] x86/efi: Allocate e820 buffer before calling
 efi_exit_boot_service

The following commit:

  d64934019f6c ("x86/efi: Use efi_exit_boot_services()")

introduced a regression on systems with large memory maps causing them
to hang on boot. The first "goto get_map" that was removed from
exit_boot() ensured there was enough room for the memory map when
efi_call_early(exit_boot_services) was called. This happens when
(nr_desc > ARRAY_SIZE(params->e820_table).

Chain of events:

  exit_boot()
    efi_exit_boot_services()
      efi_get_memory_map                  <- at this point the mm can't grow over 8 desc
      priv_func()
        exit_boot_func()
          allocate_e820ext()              <- new mm grows over 8 desc from e820 alloc
      efi_call_early(exit_boot_services)  <- mm key doesn't match so retry
      efi_call_early(get_memory_map)      <- not enough room for new mm
      system hangs

This patch allocates the e820 buffer before calling efi_exit_boot_services()
and fixes the regression.

 [ mingo: minor cleanliness edits. ]

Signed-off-by: Eric Snowberg <eric.snowberg@oracle.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: <stable@vger.kernel.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arend van Spriel <arend.vanspriel@broadcom.com>
Cc: Bhupesh Sharma <bhsharma@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Hans de Goede <hdegoede@redhat.com>
Cc: Joe Perches <joe@perches.com>
Cc: Jon Hunter <jonathanh@nvidia.com>
Cc: Julien Thierry <julien.thierry@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Nathan Chancellor <natechancellor@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
Cc: Sedat Dilek <sedat.dilek@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: YiFei Zhu <zhuyifei1999@gmail.com>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/20181129171230.18699-2-ard.biesheuvel@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/boot/compressed/eboot.c | 65 ++++++++++++++++++++------------
 1 file changed, 41 insertions(+), 24 deletions(-)

diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 8b4c5e0011572..544ac4fafd112 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -1,3 +1,4 @@
+
 /* -----------------------------------------------------------------------
  *
  *   Copyright 2011 Intel Corporation; author Matt Fleming
@@ -634,37 +635,54 @@ static efi_status_t alloc_e820ext(u32 nr_desc, struct setup_data **e820ext,
 	return status;
 }
 
+static efi_status_t allocate_e820(struct boot_params *params,
+				  struct setup_data **e820ext,
+				  u32 *e820ext_size)
+{
+	unsigned long map_size, desc_size, buff_size;
+	struct efi_boot_memmap boot_map;
+	efi_memory_desc_t *map;
+	efi_status_t status;
+	__u32 nr_desc;
+
+	boot_map.map		= &map;
+	boot_map.map_size	= &map_size;
+	boot_map.desc_size	= &desc_size;
+	boot_map.desc_ver	= NULL;
+	boot_map.key_ptr	= NULL;
+	boot_map.buff_size	= &buff_size;
+
+	status = efi_get_memory_map(sys_table, &boot_map);
+	if (status != EFI_SUCCESS)
+		return status;
+
+	nr_desc = buff_size / desc_size;
+
+	if (nr_desc > ARRAY_SIZE(params->e820_table)) {
+		u32 nr_e820ext = nr_desc - ARRAY_SIZE(params->e820_table);
+
+		status = alloc_e820ext(nr_e820ext, e820ext, e820ext_size);
+		if (status != EFI_SUCCESS)
+			return status;
+	}
+
+	return EFI_SUCCESS;
+}
+
 struct exit_boot_struct {
 	struct boot_params	*boot_params;
 	struct efi_info		*efi;
-	struct setup_data	*e820ext;
-	__u32			e820ext_size;
 };
 
 static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg,
 				   struct efi_boot_memmap *map,
 				   void *priv)
 {
-	static bool first = true;
 	const char *signature;
 	__u32 nr_desc;
 	efi_status_t status;
 	struct exit_boot_struct *p = priv;
 
-	if (first) {
-		nr_desc = *map->buff_size / *map->desc_size;
-		if (nr_desc > ARRAY_SIZE(p->boot_params->e820_table)) {
-			u32 nr_e820ext = nr_desc -
-					ARRAY_SIZE(p->boot_params->e820_table);
-
-			status = alloc_e820ext(nr_e820ext, &p->e820ext,
-					       &p->e820ext_size);
-			if (status != EFI_SUCCESS)
-				return status;
-		}
-		first = false;
-	}
-
 	signature = efi_is_64bit() ? EFI64_LOADER_SIGNATURE
 				   : EFI32_LOADER_SIGNATURE;
 	memcpy(&p->efi->efi_loader_signature, signature, sizeof(__u32));
@@ -687,8 +705,8 @@ static efi_status_t exit_boot(struct boot_params *boot_params, void *handle)
 {
 	unsigned long map_sz, key, desc_size, buff_size;
 	efi_memory_desc_t *mem_map;
-	struct setup_data *e820ext;
-	__u32 e820ext_size;
+	struct setup_data *e820ext = NULL;
+	__u32 e820ext_size = 0;
 	efi_status_t status;
 	__u32 desc_version;
 	struct efi_boot_memmap map;
@@ -702,8 +720,10 @@ static efi_status_t exit_boot(struct boot_params *boot_params, void *handle)
 	map.buff_size		= &buff_size;
 	priv.boot_params	= boot_params;
 	priv.efi		= &boot_params->efi_info;
-	priv.e820ext		= NULL;
-	priv.e820ext_size	= 0;
+
+	status = allocate_e820(boot_params, &e820ext, &e820ext_size);
+	if (status != EFI_SUCCESS)
+		return status;
 
 	/* Might as well exit boot services now */
 	status = efi_exit_boot_services(sys_table, handle, &map, &priv,
@@ -711,9 +731,6 @@ static efi_status_t exit_boot(struct boot_params *boot_params, void *handle)
 	if (status != EFI_SUCCESS)
 		return status;
 
-	e820ext			= priv.e820ext;
-	e820ext_size		= priv.e820ext_size;
-
 	/* Historic? */
 	boot_params->alt_mem_k	= 32 * 1024;
 
-- 
GitLab


From 79c2206d369b87b19ac29cb47601059b6bf5c291 Mon Sep 17 00:00:00 2001
From: YiFei Zhu <zhuyifei1999@gmail.com>
Date: Thu, 29 Nov 2018 18:12:30 +0100
Subject: [PATCH 1601/1890] x86/earlyprintk/efi: Fix infinite loop on some
 screen widths

An affected screen resolution is 1366 x 768, which width is not
divisible by 8, the default font width. On such screens, when longer
lines are earlyprintk'ed, overflow-to-next-line can never trigger,
due to the left-most x-coordinate of the next character always less
than the screen width. Earlyprintk will infinite loop in trying to
print the rest of the string but unable to, due to the line being
full.

This patch makes the trigger consider the right-most x-coordinate,
instead of left-most, as the value to compare against the screen
width threshold.

Signed-off-by: YiFei Zhu <zhuyifei1999@gmail.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arend van Spriel <arend.vanspriel@broadcom.com>
Cc: Bhupesh Sharma <bhsharma@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Eric Snowberg <eric.snowberg@oracle.com>
Cc: Hans de Goede <hdegoede@redhat.com>
Cc: Joe Perches <joe@perches.com>
Cc: Jon Hunter <jonathanh@nvidia.com>
Cc: Julien Thierry <julien.thierry@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Nathan Chancellor <natechancellor@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
Cc: Sedat Dilek <sedat.dilek@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/20181129171230.18699-12-ard.biesheuvel@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/platform/efi/early_printk.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/platform/efi/early_printk.c b/arch/x86/platform/efi/early_printk.c
index 7476b3b097e1e..7138bc7a265c0 100644
--- a/arch/x86/platform/efi/early_printk.c
+++ b/arch/x86/platform/efi/early_printk.c
@@ -183,7 +183,7 @@ early_efi_write(struct console *con, const char *str, unsigned int num)
 			num--;
 		}
 
-		if (efi_x >= si->lfb_width) {
+		if (efi_x + font->width > si->lfb_width) {
 			efi_x = 0;
 			efi_y += font->height;
 		}
-- 
GitLab


From ce85882860f0e756f7066cbda1c43e8b50b73ab6 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Thu, 29 Nov 2018 10:42:26 +0100
Subject: [PATCH 1602/1890] drm/lease: Send a distinct uevent
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Sending the exact same hotplug event is not great uapi. Luckily the
only already merged implementation of leases (in the -modesetting
driver) doesn't care about what kind of uevent it gets, and
unconditionally processes both hotplug and lease changes. So we can
still adjust the uapi here.

But e.g. weston tries to filter stuff, and I guess others might want
to do that too. Try to make that possible. Cc: stable since it's uapi
adjustement that we want to roll out everywhere.

Michel DÃ¤nzer mentioned on irc that -amdgpu also has lease support. It
has the same code flow as -modesetting though, so we can still go
ahead.

v2: Mention -amdgpu (Michel)

Cc: Keith Packard <keithp@keithp.com>
Cc: Dave Airlie <airlied@redhat.com>
Cc: stable@vger.kernel.org
Reviewed-by: Keith Packard <keithp@keithp.com>
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181129094226.30591-1-daniel.vetter@ffwll.ch
---
 drivers/gpu/drm/drm_internal.h |  2 ++
 drivers/gpu/drm/drm_lease.c    |  2 +-
 drivers/gpu/drm/drm_sysfs.c    | 10 ++++++++++
 3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index 0c4eb4a9ab31f..51e06defc8d8a 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -104,6 +104,8 @@ struct device *drm_sysfs_minor_alloc(struct drm_minor *minor);
 int drm_sysfs_connector_add(struct drm_connector *connector);
 void drm_sysfs_connector_remove(struct drm_connector *connector);
 
+void drm_sysfs_lease_event(struct drm_device *dev);
+
 /* drm_gem.c */
 int drm_gem_init(struct drm_device *dev);
 void drm_gem_destroy(struct drm_device *dev);
diff --git a/drivers/gpu/drm/drm_lease.c b/drivers/gpu/drm/drm_lease.c
index 24a177ea54176..c61680ad962d9 100644
--- a/drivers/gpu/drm/drm_lease.c
+++ b/drivers/gpu/drm/drm_lease.c
@@ -296,7 +296,7 @@ void drm_lease_destroy(struct drm_master *master)
 
 	if (master->lessor) {
 		/* Tell the master to check the lessee list */
-		drm_sysfs_hotplug_event(dev);
+		drm_sysfs_lease_event(dev);
 		drm_master_put(&master->lessor);
 	}
 
diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c
index b3c1daad1169b..ecb7b33002bb2 100644
--- a/drivers/gpu/drm/drm_sysfs.c
+++ b/drivers/gpu/drm/drm_sysfs.c
@@ -301,6 +301,16 @@ void drm_sysfs_connector_remove(struct drm_connector *connector)
 	connector->kdev = NULL;
 }
 
+void drm_sysfs_lease_event(struct drm_device *dev)
+{
+	char *event_string = "LEASE=1";
+	char *envp[] = { event_string, NULL };
+
+	DRM_DEBUG("generating lease event\n");
+
+	kobject_uevent_env(&dev->primary->kdev->kobj, KOBJ_CHANGE, envp);
+}
+
 /**
  * drm_sysfs_hotplug_event - generate a DRM uevent
  * @dev: DRM device
-- 
GitLab


From e98e38090f7ef4bf519d2b34e0d5128ed5bf8c79 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Mon, 26 Nov 2018 11:52:16 +0100
Subject: [PATCH 1603/1890] HID: asus: Add event handler to catch unmapped Asus
 Vendor UsagePage codes

Various Asus devices generate HID events using the Asus Vendor specific
UsagePage 0xff31 and hid-asus will map these in its input_mapping for all
devices to which it binds (independent of any quirks).

Add an event callback which check for unmapped (because sofar unknown)
usages within the Asus Vendor UsagePage and log a warning for these.

The purpose of this patch is to help debugging / find such unmapped codes
and add them to the asus_input_mapping() function.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
---
 drivers/hid/hid-asus.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c
index a1fa2fc8c9b57..61fb5a43c1cb7 100644
--- a/drivers/hid/hid-asus.c
+++ b/drivers/hid/hid-asus.c
@@ -241,6 +241,18 @@ static int asus_report_input(struct asus_drvdata *drvdat, u8 *data, int size)
 	return 1;
 }
 
+static int asus_event(struct hid_device *hdev, struct hid_field *field,
+		      struct hid_usage *usage, __s32 value)
+{
+	if ((usage->hid & HID_USAGE_PAGE) == 0xff310000 &&
+	    (usage->hid & HID_USAGE) != 0x00 && !usage->type) {
+		hid_warn(hdev, "Unmapped Asus vendor usagepage code 0x%02x\n",
+			 usage->hid & HID_USAGE);
+	}
+
+	return 0;
+}
+
 static int asus_raw_event(struct hid_device *hdev,
 		struct hid_report *report, u8 *data, int size)
 {
@@ -832,6 +844,7 @@ static struct hid_driver asus_driver = {
 #ifdef CONFIG_PM
 	.reset_resume           = asus_reset_resume,
 #endif
+	.event			= asus_event,
 	.raw_event		= asus_raw_event
 };
 module_hid_driver(asus_driver);
-- 
GitLab


From 2340bad5d28e157c386cedf8d8a64ed6514e563f Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Mon, 26 Nov 2018 11:52:17 +0100
Subject: [PATCH 1604/1890] HID: asus: Add support for the ASUS FX503VD laptop

The ASUS FX503VD laptop uses an USB keyboard with several hotkeys
which use the Asus Vendor specific UsagePage.

This uses two usage-codes within the page which have not been seen
before, 0x7c for its mic-mute hotkey and 0x99 for Fn+F5 which has
a "fan" symbol as hotkey symbol on the keyb. we map this to KEY_PROG4
(PROG1-PROG3 are already used).

This commit adds the mappings for the 2 new usage codes and the USB-ids
for this keyboard to the hid-asus driver.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
---
 drivers/hid/hid-asus.c | 7 +++++++
 drivers/hid/hid-ids.h  | 1 +
 2 files changed, 8 insertions(+)

diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c
index 61fb5a43c1cb7..ab8bd40a77ed5 100644
--- a/drivers/hid/hid-asus.c
+++ b/drivers/hid/hid-asus.c
@@ -522,6 +522,7 @@ static int asus_input_mapping(struct hid_device *hdev,
 		case 0x20: asus_map_key_clear(KEY_BRIGHTNESSUP);		break;
 		case 0x35: asus_map_key_clear(KEY_DISPLAY_OFF);		break;
 		case 0x6c: asus_map_key_clear(KEY_SLEEP);		break;
+		case 0x7c: asus_map_key_clear(KEY_MICMUTE);		break;
 		case 0x82: asus_map_key_clear(KEY_CAMERA);		break;
 		case 0x88: asus_map_key_clear(KEY_RFKILL);			break;
 		case 0xb5: asus_map_key_clear(KEY_CALC);			break;
@@ -540,6 +541,9 @@ static int asus_input_mapping(struct hid_device *hdev,
 		/* Fn+Space Power4Gear Hybrid */
 		case 0x5c: asus_map_key_clear(KEY_PROG3);		break;
 
+		/* Fn+F5 "fan" symbol on FX503VD */
+		case 0x99: asus_map_key_clear(KEY_PROG4);		break;
+
 		default:
 			/* ASUS lazily declares 256 usages, ignore the rest,
 			 * as some make the keyboard appear as a pointer device. */
@@ -817,6 +821,9 @@ static const struct hid_device_id asus_devices[] = {
 		USB_DEVICE_ID_ASUSTEK_ROG_KEYBOARD2), QUIRK_USE_KBD_BACKLIGHT },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK,
 		USB_DEVICE_ID_ASUSTEK_ROG_KEYBOARD3), QUIRK_G752_KEYBOARD },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK,
+		USB_DEVICE_ID_ASUSTEK_FX503VD_KEYBOARD),
+	  QUIRK_USE_KBD_BACKLIGHT },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK,
 		USB_DEVICE_ID_ASUSTEK_T100TA_KEYBOARD),
 	  QUIRK_T100_KEYBOARD | QUIRK_NO_CONSUMER_USAGES },
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index ed35c9a9a1109..4206428c0ba28 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -190,6 +190,7 @@
 #define USB_DEVICE_ID_ASUSTEK_ROG_KEYBOARD1 0x1854
 #define USB_DEVICE_ID_ASUSTEK_ROG_KEYBOARD2 0x1837
 #define USB_DEVICE_ID_ASUSTEK_ROG_KEYBOARD3 0x1822
+#define USB_DEVICE_ID_ASUSTEK_FX503VD_KEYBOARD	0x1869
 
 #define USB_VENDOR_ID_ATEN		0x0557
 #define USB_DEVICE_ID_ATEN_UC100KM	0x2004
-- 
GitLab


From 41e817bca3acd3980efe5dd7d28af0e6f4ab9247 Mon Sep 17 00:00:00 2001
From: Maximilian Heyne <mheyne@amazon.de>
Date: Fri, 30 Nov 2018 08:35:14 -0700
Subject: [PATCH 1605/1890] fs: fix lost error code in dio_complete

commit e259221763a40403d5bb232209998e8c45804ab8 ("fs: simplify the
generic_write_sync prototype") reworked callers of generic_write_sync(),
and ended up dropping the error return for the directio path. Prior to
that commit, in dio_complete(), an error would be bubbled up the stack,
but after that commit, errors passed on to dio_complete were eaten up.

This was reported on the list earlier, and a fix was proposed in
https://lore.kernel.org/lkml/20160921141539.GA17898@infradead.org/, but
never followed up with.  We recently hit this bug in our testing where
fencing io errors, which were previously erroring out with EIO, were
being returned as success operations after this commit.

The fix proposed on the list earlier was a little short -- it would have
still called generic_write_sync() in case `ret` already contained an
error. This fix ensures generic_write_sync() is only called when there's
no pending error in the write. Additionally, transferred is replaced
with ret to bring this code in line with other callers.

Fixes: e259221763a4 ("fs: simplify the generic_write_sync prototype")
Reported-by: Ravi Nankani <rnankani@amazon.com>
Signed-off-by: Maximilian Heyne <mheyne@amazon.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
CC: Torsten Mehlan <tomeh@amazon.de>
CC: Uwe Dannowski <uwed@amazon.de>
CC: Amit Shah <aams@amazon.de>
CC: David Woodhouse <dwmw@amazon.co.uk>
CC: stable@vger.kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/direct-io.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/direct-io.c b/fs/direct-io.c
index 722d17c88edb9..41a0e97252aed 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -325,8 +325,8 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags)
 		 */
 		dio->iocb->ki_pos += transferred;
 
-		if (dio->op == REQ_OP_WRITE)
-			ret = generic_write_sync(dio->iocb,  transferred);
+		if (ret > 0 && dio->op == REQ_OP_WRITE)
+			ret = generic_write_sync(dio->iocb, ret);
 		dio->iocb->ki_complete(dio->iocb, ret, 0);
 	}
 
-- 
GitLab


From 4265b0fe453240b5fa6d0986d1f23688497f4078 Mon Sep 17 00:00:00 2001
From: Sandeep Panda <spanda@codeaurora.org>
Date: Fri, 30 Nov 2018 14:57:45 +0530
Subject: [PATCH 1606/1890] drm/bridge: fix AUX_CMD_SEND bit value for ti,
 sn65dsi86 bridge

Fix the AUX_CMD_SEND bit for ti,sn65dsi86 bridge chip. With wrong
value the dpcd aux transactions with eDP panel are failing.

Signed-off-by: Sandeep Panda <spanda@codeaurora.org>
Signed-off-by: Sean Paul <seanpaul@chromium.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20181130092745.4219-1-spanda@codeaurora.org
---
 drivers/gpu/drm/bridge/ti-sn65dsi86.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
index 680566d97adcf..10243965ee7c0 100644
--- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c
+++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
@@ -54,7 +54,7 @@
 #define SN_AUX_ADDR_7_0_REG			0x76
 #define SN_AUX_LENGTH_REG			0x77
 #define SN_AUX_CMD_REG				0x78
-#define  AUX_CMD_SEND				BIT(1)
+#define  AUX_CMD_SEND				BIT(0)
 #define  AUX_CMD_REQ(x)				((x) << 4)
 #define SN_AUX_RDATA_REG(x)			(0x79 + (x))
 #define SN_SSC_CONFIG_REG			0x93
-- 
GitLab


From c5a94f434c82529afda290df3235e4d85873c5b4 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Fri, 26 Oct 2018 17:16:29 +1100
Subject: [PATCH 1607/1890] fscache: fix race between enablement and dropping
 of object

It was observed that a process blocked indefintely in
__fscache_read_or_alloc_page(), waiting for FSCACHE_COOKIE_LOOKING_UP
to be cleared via fscache_wait_for_deferred_lookup().

At this time, ->backing_objects was empty, which would normaly prevent
__fscache_read_or_alloc_page() from getting to the point of waiting.
This implies that ->backing_objects was cleared *after*
__fscache_read_or_alloc_page was was entered.

When an object is "killed" and then "dropped",
FSCACHE_COOKIE_LOOKING_UP is cleared in fscache_lookup_failure(), then
KILL_OBJECT and DROP_OBJECT are "called" and only in DROP_OBJECT is
->backing_objects cleared.  This leaves a window where
something else can set FSCACHE_COOKIE_LOOKING_UP and
__fscache_read_or_alloc_page() can start waiting, before
->backing_objects is cleared

There is some uncertainty in this analysis, but it seems to be fit the
observations.  Adding the wake in this patch will be handled correctly
by __fscache_read_or_alloc_page(), as it checks if ->backing_objects
is empty again, after waiting.

Customer which reported the hang, also report that the hang cannot be
reproduced with this fix.

The backtrace for the blocked process looked like:

PID: 29360  TASK: ffff881ff2ac0f80  CPU: 3   COMMAND: "zsh"
 #0 [ffff881ff43efbf8] schedule at ffffffff815e56f1
 #1 [ffff881ff43efc58] bit_wait at ffffffff815e64ed
 #2 [ffff881ff43efc68] __wait_on_bit at ffffffff815e61b8
 #3 [ffff881ff43efca0] out_of_line_wait_on_bit at ffffffff815e625e
 #4 [ffff881ff43efd08] fscache_wait_for_deferred_lookup at ffffffffa04f2e8f [fscache]
 #5 [ffff881ff43efd18] __fscache_read_or_alloc_page at ffffffffa04f2ffe [fscache]
 #6 [ffff881ff43efd58] __nfs_readpage_from_fscache at ffffffffa0679668 [nfs]
 #7 [ffff881ff43efd78] nfs_readpage at ffffffffa067092b [nfs]
 #8 [ffff881ff43efda0] generic_file_read_iter at ffffffff81187a73
 #9 [ffff881ff43efe50] nfs_file_read at ffffffffa066544b [nfs]
#10 [ffff881ff43efe70] __vfs_read at ffffffff811fc756
#11 [ffff881ff43efee8] vfs_read at ffffffff811fccfa
#12 [ffff881ff43eff18] sys_read at ffffffff811fda62
#13 [ffff881ff43eff50] entry_SYSCALL_64_fastpath at ffffffff815e986e

Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/fscache/object.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/fscache/object.c b/fs/fscache/object.c
index 9edc920f651f3..6d9cb1719de5c 100644
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c
@@ -730,6 +730,9 @@ static const struct fscache_state *fscache_drop_object(struct fscache_object *ob
 
 	if (awaken)
 		wake_up_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING);
+	if (test_and_clear_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags))
+		wake_up_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP);
+
 
 	/* Prevent a race with our last child, which has to signal EV_CLEARED
 	 * before dropping our spinlock.
-- 
GitLab


From b7e768b7e3522695ed36dcb48ecdcd344bd30a9b Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Mon, 24 Sep 2018 10:33:44 -0700
Subject: [PATCH 1608/1890] cachefiles: Explicitly cast enumerated type in
 put_object

Clang warns when one enumerated type is implicitly converted to another.

fs/cachefiles/namei.c:247:50: warning: implicit conversion from
enumeration type 'enum cachefiles_obj_ref_trace' to different
enumeration type 'enum fscache_obj_ref_trace' [-Wenum-conversion]
        cache->cache.ops->put_object(&xobject->fscache,
cachefiles_obj_put_wait_retry);

Silence this warning by explicitly casting to fscache_obj_ref_trace,
which is also done in put_object.

Reported-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/cachefiles/namei.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 95983c744164a..5ab411d4bc592 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -244,11 +244,13 @@ static int cachefiles_mark_object_active(struct cachefiles_cache *cache,
 
 	ASSERT(!test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags));
 
-	cache->cache.ops->put_object(&xobject->fscache, cachefiles_obj_put_wait_retry);
+	cache->cache.ops->put_object(&xobject->fscache,
+		(enum fscache_obj_ref_trace)cachefiles_obj_put_wait_retry);
 	goto try_again;
 
 requeue:
-	cache->cache.ops->put_object(&xobject->fscache, cachefiles_obj_put_wait_timeo);
+	cache->cache.ops->put_object(&xobject->fscache,
+		(enum fscache_obj_ref_trace)cachefiles_obj_put_wait_timeo);
 	_leave(" = -ETIMEDOUT");
 	return -ETIMEDOUT;
 }
-- 
GitLab


From 34e06fe4d05bd120556a95d5ebf1bcc97b0a1ca0 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 13 Jul 2018 16:27:44 +0200
Subject: [PATCH 1609/1890] cachefiles: avoid deprecated get_seconds()

get_seconds() returns an unsigned long can overflow on some architectures
and is deprecated because of that. In cachefs, we cast that number to
a a 32-bit integer, which will overflow in year 2106 on all architectures.

As confirmed by David Howells, the overflow probably isn't harmful
in the end, since the timestamps are only used to make the file names
unique, but they don't strictly have to be in monotonically increasing
order since the files only exist in order to be deleted as quickly
as possible.

Moving to ktime_get_real_seconds() avoids the deprecated interface.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/cachefiles/namei.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 5ab411d4bc592..1645fcfd9691c 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -338,7 +338,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
 try_again:
 	/* first step is to make up a grave dentry in the graveyard */
 	sprintf(nbuffer, "%08x%08x",
-		(uint32_t) get_seconds(),
+		(uint32_t) ktime_get_real_seconds(),
 		(uint32_t) atomic_inc_return(&cache->gravecounter));
 
 	/* do the multiway lock magic */
-- 
GitLab


From 31ffa563833576bd49a8bf53120568312755e6e2 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 17 Jul 2018 09:53:42 +0100
Subject: [PATCH 1610/1890] fscache, cachefiles: remove redundant variable
 'cache'

Variable 'cache' is being assigned but is never used hence it is
redundant and can be removed.

Cleans up clang warning:
warning: variable 'cache' set but not used [-Wunused-but-set-variable]

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/cachefiles/rdwr.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index db233588a69af..8a577409d030b 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -968,11 +968,8 @@ void cachefiles_uncache_page(struct fscache_object *_object, struct page *page)
 	__releases(&object->fscache.cookie->lock)
 {
 	struct cachefiles_object *object;
-	struct cachefiles_cache *cache;
 
 	object = container_of(_object, struct cachefiles_object, fscache);
-	cache = container_of(object->fscache.cache,
-			     struct cachefiles_cache, cache);
 
 	_enter("%p,{%lu}", object, page->index);
 
-- 
GitLab


From 14a1336e6fff47dd1028b484d6c802105c58e2ee Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 20 Nov 2018 16:57:54 +0100
Subject: [PATCH 1611/1890] nvme: warn when finding multi-port subsystems
 without multipathing enabled

Without CONFIG_NVME_MULTIPATH enabled a multi-port subsystem might
show up as invididual devices and cause problems, warn about it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/nvme.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index cee79cb388aff..081cbdcce8803 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -531,6 +531,9 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
 static inline int nvme_mpath_init(struct nvme_ctrl *ctrl,
 		struct nvme_id_ctrl *id)
 {
+	if (ctrl->subsys->cmic & (1 << 3))
+		dev_warn(ctrl->device,
+"Please enable CONFIG_NVME_MULTIPATH for full support of multi-port devices.\n");
 	return 0;
 }
 static inline void nvme_mpath_uninit(struct nvme_ctrl *ctrl)
-- 
GitLab


From f6c8e432cb0479255322c5d0335b9f1699a0270c Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Wed, 21 Nov 2018 15:17:37 -0800
Subject: [PATCH 1612/1890] nvme: flush namespace scanning work just before
 removing namespaces

nvme_stop_ctrl can be called also for reset flow and there is no need to
flush the scan_work as namespaces are not being removed. This can cause
deadlock in rdma, fc and loop drivers since nvme_stop_ctrl barriers
before controller teardown (and specifically I/O cancellation of the
scan_work itself) takes place, but the scan_work will be blocked anyways
so there is no need to flush it.

Instead, move scan_work flush to nvme_remove_namespaces() where it really
needs to flush.

Reported-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Keith Busch <keith.busch@intel.com>
Reviewed by: James Smart <jsmart2021@gmail.com>
Tested-by: Ewan D. Milne <emilne@redhat.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index bb39b91253c2d..3cf1b773158ec 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3314,6 +3314,9 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
 	struct nvme_ns *ns, *next;
 	LIST_HEAD(ns_list);
 
+	/* prevent racing with ns scanning */
+	flush_work(&ctrl->scan_work);
+
 	/*
 	 * The dead states indicates the controller was not gracefully
 	 * disconnected. In that case, we won't be able to flush any data while
@@ -3476,7 +3479,6 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
 	nvme_mpath_stop(ctrl);
 	nvme_stop_keep_alive(ctrl);
 	flush_work(&ctrl->async_event_work);
-	flush_work(&ctrl->scan_work);
 	cancel_work_sync(&ctrl->fw_act_work);
 	if (ctrl->ops->stop_ctrl)
 		ctrl->ops->stop_ctrl(ctrl);
-- 
GitLab


From 6344d02dc8f886b6bbcd922ae1a17e4a41500f2d Mon Sep 17 00:00:00 2001
From: Prabhath Sajeepa <psajeepa@purestorage.com>
Date: Wed, 28 Nov 2018 11:11:29 -0700
Subject: [PATCH 1613/1890] nvme-rdma: fix double freeing of async event data

Some error paths in configuration of admin queue free data buffer
associated with async request SQE without resetting the data buffer
pointer to NULL, This buffer is also freed up again if the controller
is shutdown or reset.

Signed-off-by: Prabhath Sajeepa <psajeepa@purestorage.com>
Reviewed-by: Roland Dreier <roland@purestorage.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/rdma.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index d181cafedc584..ab6ec7295bf90 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -184,6 +184,7 @@ static int nvme_rdma_alloc_qe(struct ib_device *ibdev, struct nvme_rdma_qe *qe,
 	qe->dma = ib_dma_map_single(ibdev, qe->data, capsule_size, dir);
 	if (ib_dma_mapping_error(ibdev, qe->dma)) {
 		kfree(qe->data);
+		qe->data = NULL;
 		return -ENOMEM;
 	}
 
@@ -823,6 +824,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
 out_free_async_qe:
 	nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
 		sizeof(struct nvme_command), DMA_TO_DEVICE);
+	ctrl->async_event_sqe.data = NULL;
 out_free_queue:
 	nvme_rdma_free_queue(&ctrl->queues[0]);
 	return error;
-- 
GitLab


From 4f3a31a8e8bff5fc363ec9f4755e58a15f7f36c7 Mon Sep 17 00:00:00 2001
From: Sharat Masetty <smasetty@codeaurora.org>
Date: Fri, 12 Oct 2018 14:26:55 +0530
Subject: [PATCH 1614/1890] drm/msm: Check if target supports crash dump
 capture

This patch simply checks first to see if the target can support crash dump
capture before proceeding.

Signed-off-by: Sharat Masetty <smasetty@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
Signed-off-by: Sean Paul <seanpaul@chromium.org>
---
 drivers/gpu/drm/msm/msm_gpu.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 11aac83370664..32d04a9b48cff 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -345,6 +345,10 @@ static void msm_gpu_crashstate_capture(struct msm_gpu *gpu,
 {
 	struct msm_gpu_state *state;
 
+	/* Check if the target supports capturing crash state */
+	if (!gpu->funcs->gpu_state_get)
+		return;
+
 	/* Only save one crash state at a time */
 	if (gpu->crashstate)
 		return;
-- 
GitLab


From 482f96324a4e08818db7d75bb12beaaea6c9561d Mon Sep 17 00:00:00 2001
From: Sharat Masetty <smasetty@codeaurora.org>
Date: Fri, 12 Oct 2018 14:26:56 +0530
Subject: [PATCH 1615/1890] drm/msm: Fix task dump in gpu recovery

The current recovery code gets a pointer to the task struct and does a
few things all within the rcu_read_lock. This puts constraints on the
types of gfp flags that can be used within the rcu lock. This patch
instead gets a reference to the task within the rcu lock and releases
the lock immediately, this way the task stays afloat until we need it and
we also get to use the desired gfp flags.

Signed-off-by: Sharat Masetty <smasetty@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
Signed-off-by: Sean Paul <seanpaul@chromium.org>
---
 drivers/gpu/drm/msm/msm_gpu.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 32d04a9b48cff..2b7c8946adba9 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -438,10 +438,9 @@ static void recover_worker(struct work_struct *work)
 	if (submit) {
 		struct task_struct *task;
 
-		rcu_read_lock();
-		task = pid_task(submit->pid, PIDTYPE_PID);
+		task = get_pid_task(submit->pid, PIDTYPE_PID);
 		if (task) {
-			comm = kstrdup(task->comm, GFP_ATOMIC);
+			comm = kstrdup(task->comm, GFP_KERNEL);
 
 			/*
 			 * So slightly annoying, in other paths like
@@ -454,10 +453,10 @@ static void recover_worker(struct work_struct *work)
 			 * about the submit going away.
 			 */
 			mutex_unlock(&dev->struct_mutex);
-			cmd = kstrdup_quotable_cmdline(task, GFP_ATOMIC);
+			cmd = kstrdup_quotable_cmdline(task, GFP_KERNEL);
+			put_task_struct(task);
 			mutex_lock(&dev->struct_mutex);
 		}
-		rcu_read_unlock();
 
 		if (comm && cmd) {
 			dev_err(dev->dev, "%s: offending task: %s (%s)\n",
-- 
GitLab


From 51270de91412b819f654b849db3bf92dac0a0855 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 13 Oct 2018 13:28:06 +0300
Subject: [PATCH 1616/1890] drm/msm/gpu: Fix a couple memory leaks in debugfs

The msm_gpu_open() function should free "show_priv" on error or it
causes static checker warnings.

Fixes: 4f776f4511c7 ("drm/msm/gpu: Convert the GPU show function to use the GPU state")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Rob Clark <robdclark@gmail.com>
Signed-off-by: Sean Paul <seanpaul@chromium.org>
---
 drivers/gpu/drm/msm/msm_debugfs.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_debugfs.c b/drivers/gpu/drm/msm/msm_debugfs.c
index f0da0d3c8a80f..d756436c1fcd3 100644
--- a/drivers/gpu/drm/msm/msm_debugfs.c
+++ b/drivers/gpu/drm/msm/msm_debugfs.c
@@ -84,7 +84,7 @@ static int msm_gpu_open(struct inode *inode, struct file *file)
 
 	ret = mutex_lock_interruptible(&dev->struct_mutex);
 	if (ret)
-		return ret;
+		goto free_priv;
 
 	pm_runtime_get_sync(&gpu->pdev->dev);
 	show_priv->state = gpu->funcs->gpu_state_get(gpu);
@@ -94,13 +94,20 @@ static int msm_gpu_open(struct inode *inode, struct file *file)
 
 	if (IS_ERR(show_priv->state)) {
 		ret = PTR_ERR(show_priv->state);
-		kfree(show_priv);
-		return ret;
+		goto free_priv;
 	}
 
 	show_priv->dev = dev;
 
-	return single_open(file, msm_gpu_show, show_priv);
+	ret = single_open(file, msm_gpu_show, show_priv);
+	if (ret)
+		goto free_priv;
+
+	return 0;
+
+free_priv:
+	kfree(show_priv);
+	return ret;
 }
 
 static const struct file_operations msm_gpu_fops = {
-- 
GitLab


From 47e7f506ee6590ceb2efa1f08aca7f9f2ee5c1d3 Mon Sep 17 00:00:00 2001
From: Rob Clark <robdclark@gmail.com>
Date: Mon, 15 Oct 2018 11:22:57 -0400
Subject: [PATCH 1617/1890] drm/msm: fix handling of cmdstream offset

Userspace hasn't used submit cmds with submit_offset != 0 for a while,
but this starts cropping up again with cmdstream sub-buffer-allocation
in libdrm_freedreno.

Doesn't do much good to increment the buf ptr before assigning it.

Fixes: 78b8e5b847b4 drm/msm: dump a rd GPUADDR header for all buffers in the command
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
Signed-off-by: Rob Clark <robdclark@gmail.com>
Signed-off-by: Sean Paul <seanpaul@chromium.org>
---
 drivers/gpu/drm/msm/msm_rd.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c
index cca9334584391..0c2c8d2c631f3 100644
--- a/drivers/gpu/drm/msm/msm_rd.c
+++ b/drivers/gpu/drm/msm/msm_rd.c
@@ -316,10 +316,11 @@ static void snapshot_buf(struct msm_rd_state *rd,
 		uint64_t iova, uint32_t size)
 {
 	struct msm_gem_object *obj = submit->bos[idx].obj;
+	unsigned offset = 0;
 	const char *buf;
 
 	if (iova) {
-		buf += iova - submit->bos[idx].iova;
+		offset = iova - submit->bos[idx].iova;
 	} else {
 		iova = submit->bos[idx].iova;
 		size = obj->base.size;
@@ -340,6 +341,8 @@ static void snapshot_buf(struct msm_rd_state *rd,
 	if (IS_ERR(buf))
 		return;
 
+	buf += offset;
+
 	rd_write_section(rd, RD_BUFFER_CONTENTS, buf, size);
 
 	msm_gem_put_vaddr(&obj->base);
-- 
GitLab


From 8531f0587f5c9e1a74cd9543a97617349f5e0706 Mon Sep 17 00:00:00 2001
From: Abhinav Kumar <abhinavk@codeaurora.org>
Date: Thu, 14 Jun 2018 21:01:10 -0700
Subject: [PATCH 1618/1890] drm/msm/dsi: configure VCO rate for 10nm PLL driver

Currenty the VCO rate in the 10nm PLL driver relies
on the parent rate which is not configured.

Configure the VCO rate to 19.2 Mhz as required by
the 10nm PLL driver.

Signed-off-by: Abhinav Kumar <abhinavk@codeaurora.org>
Signed-off-by: Sean Paul <seanpaul@chromium.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/dsi/pll/dsi_pll_10nm.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/dsi/pll/dsi_pll_10nm.c b/drivers/gpu/drm/msm/dsi/pll/dsi_pll_10nm.c
index 4c03f0b7343ed..41bec570c5184 100644
--- a/drivers/gpu/drm/msm/dsi/pll/dsi_pll_10nm.c
+++ b/drivers/gpu/drm/msm/dsi/pll/dsi_pll_10nm.c
@@ -39,6 +39,8 @@
 #define DSI_PIXEL_PLL_CLK		1
 #define NUM_PROVIDED_CLKS		2
 
+#define VCO_REF_CLK_RATE		19200000
+
 struct dsi_pll_regs {
 	u32 pll_prop_gain_rate;
 	u32 pll_lockdet_rate;
@@ -316,7 +318,7 @@ static int dsi_pll_10nm_vco_set_rate(struct clk_hw *hw, unsigned long rate,
 	    parent_rate);
 
 	pll_10nm->vco_current_rate = rate;
-	pll_10nm->vco_ref_clk_rate = parent_rate;
+	pll_10nm->vco_ref_clk_rate = VCO_REF_CLK_RATE;
 
 	dsi_pll_setup_config(pll_10nm);
 
-- 
GitLab


From 3b712e43e3876b42b38321ecf790a1f5fe59c834 Mon Sep 17 00:00:00 2001
From: Sean Paul <seanpaul@chromium.org>
Date: Wed, 3 Oct 2018 16:22:31 -0400
Subject: [PATCH 1619/1890] drm/msm: Grab a vblank reference when waiting for
 commit_done

Similar to the atomic helpers, we should enable vblank while we're
waiting for the commit to finish. DPU needs this, MDP5 seems to work
fine without it.

Reviewed-by: Abhinav Kumar <abhinavk@codeaurora.org>
Signed-off-by: Sean Paul <seanpaul@chromium.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/msm_atomic.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/msm/msm_atomic.c b/drivers/gpu/drm/msm/msm_atomic.c
index 4bcdeca7479db..2088a20eb2702 100644
--- a/drivers/gpu/drm/msm/msm_atomic.c
+++ b/drivers/gpu/drm/msm/msm_atomic.c
@@ -34,7 +34,12 @@ static void msm_atomic_wait_for_commit_done(struct drm_device *dev,
 		if (!new_crtc_state->active)
 			continue;
 
+		if (drm_crtc_vblank_get(crtc))
+			continue;
+
 		kms->funcs->wait_for_crtc_commit_done(kms, crtc);
+
+		drm_crtc_vblank_put(crtc);
 	}
 }
 
-- 
GitLab


From ef1a8409348966f0b25ff97a170d6d0367710ea9 Mon Sep 17 00:00:00 2001
From: Alexander Popov <alex.popov@linux.com>
Date: Tue, 13 Nov 2018 00:08:48 +0300
Subject: [PATCH 1620/1890] stackleak: Disable function tracing and kprobes for
 stackleak_erase()

The stackleak_erase() function is called on the trampoline stack at the
end of syscall. This stack is not big enough for ftrace and kprobes
operations, e.g. it can be exhausted if we use kprobe_events for
stackleak_erase().

So let's disable function tracing and kprobes of stackleak_erase().

Reported-by: kernel test robot <lkp@intel.com>
Fixes: 10e9ae9fabaf ("gcc-plugins: Add STACKLEAK plugin for tracking the kernel stack")
Signed-off-by: Alexander Popov <alex.popov@linux.com>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 kernel/stackleak.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/kernel/stackleak.c b/kernel/stackleak.c
index e42892926244d..08cb57eed3893 100644
--- a/kernel/stackleak.c
+++ b/kernel/stackleak.c
@@ -11,6 +11,7 @@
  */
 
 #include <linux/stackleak.h>
+#include <linux/kprobes.h>
 
 #ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
 #include <linux/jump_label.h>
@@ -47,7 +48,7 @@ int stack_erasing_sysctl(struct ctl_table *table, int write,
 #define skip_erasing()	false
 #endif /* CONFIG_STACKLEAK_RUNTIME_DISABLE */
 
-asmlinkage void stackleak_erase(void)
+asmlinkage void notrace stackleak_erase(void)
 {
 	/* It would be nice not to have 'kstack_ptr' and 'boundary' on stack */
 	unsigned long kstack_ptr = current->lowest_stack;
@@ -101,6 +102,7 @@ asmlinkage void stackleak_erase(void)
 	/* Reset the 'lowest_stack' value for the next syscall */
 	current->lowest_stack = current_top_of_stack() - THREAD_SIZE/64;
 }
+NOKPROBE_SYMBOL(stackleak_erase);
 
 void __used stackleak_track_stack(void)
 {
-- 
GitLab


From 2a5cf35cd6c56b2924bce103413ad3381bdc31fa Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Sat, 1 Dec 2018 00:38:18 +0800
Subject: [PATCH 1621/1890] block: fix single range discard merge

There are actually two kinds of discard merge:

- one is the normal discard merge, just like normal read/write request,
and call it single-range discard

- another is the multi-range discard, queue_max_discard_segments(rq->q) > 1

For the former case, queue_max_discard_segments(rq->q) is 1, and we
should handle this kind of discard merge like the normal read/write
request.

This patch fixes the following kernel panic issue[1], which is caused by
not removing the single-range discard request from elevator queue.

Guangwu has one raid discard test case, in which this issue is a bit
easier to trigger, and I verified that this patch can fix the kernel
panic issue in Guangwu's test case.

[1] kernel panic log from Jens's report

 BUG: unable to handle kernel NULL pointer dereference at 0000000000000148
 PGD 0 P4D 0.
 Oops: 0000 [#1] SMP PTI
 CPU: 37 PID: 763 Comm: kworker/37:1H Not tainted \
4.20.0-rc3-00649-ge64d9a554a91-dirty #14  Hardware name: Wiwynn \
Leopard-Orv2/Leopard-DDR BW, BIOS LBM08   03/03/2017       Workqueue: kblockd \
blk_mq_run_work_fn                                            RIP: \
0010:blk_mq_get_driver_tag+0x81/0x120                                       Code: 24 \
10 48 89 7c 24 20 74 21 83 fa ff 0f 95 c0 48 8b 4c 24 28 65 48 33 0c 25 28 00 00 00 \
0f 85 96 00 00 00 48 83 c4 30 5b 5d c3 <48> 8b 87 48 01 00 00 8b 40 04 39 43 20 72 37 \
f6 87 b0 00 00 00 02  RSP: 0018:ffffc90004aabd30 EFLAGS: 00010246                     \
  RAX: 0000000000000003 RBX: ffff888465ea1300 RCX: ffffc90004aabde8
 RDX: 00000000ffffffff RSI: ffffc90004aabde8 RDI: 0000000000000000
 RBP: 0000000000000000 R08: ffff888465ea1348 R09: 0000000000000000
 R10: 0000000000001000 R11: 00000000ffffffff R12: ffff888465ea1300
 R13: 0000000000000000 R14: ffff888465ea1348 R15: ffff888465d10000
 FS:  0000000000000000(0000) GS:ffff88846f9c0000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 0000000000000148 CR3: 000000000220a003 CR4: 00000000003606e0
 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
 Call Trace:
  blk_mq_dispatch_rq_list+0xec/0x480
  ? elv_rb_del+0x11/0x30
  blk_mq_do_dispatch_sched+0x6e/0xf0
  blk_mq_sched_dispatch_requests+0xfa/0x170
  __blk_mq_run_hw_queue+0x5f/0xe0
  process_one_work+0x154/0x350
  worker_thread+0x46/0x3c0
  kthread+0xf5/0x130
  ? process_one_work+0x350/0x350
  ? kthread_destroy_worker+0x50/0x50
  ret_from_fork+0x1f/0x30
 Modules linked in: sb_edac x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel \
kvm switchtec irqbypass iTCO_wdt iTCO_vendor_support efivars cdc_ether usbnet mii \
cdc_acm i2c_i801 lpc_ich mfd_core ipmi_si ipmi_devintf ipmi_msghandler acpi_cpufreq \
button sch_fq_codel nfsd nfs_acl lockd grace auth_rpcgss oid_registry sunrpc nvme \
nvme_core fuse sg loop efivarfs autofs4  CR2: 0000000000000148                        \

 ---[ end trace 340a1fb996df1b9b ]---
 RIP: 0010:blk_mq_get_driver_tag+0x81/0x120
 Code: 24 10 48 89 7c 24 20 74 21 83 fa ff 0f 95 c0 48 8b 4c 24 28 65 48 33 0c 25 28 \
00 00 00 0f 85 96 00 00 00 48 83 c4 30 5b 5d c3 <48> 8b 87 48 01 00 00 8b 40 04 39 43 \
20 72 37 f6 87 b0 00 00 00 02

Fixes: 445251d0f4d329a ("blk-mq: fix discard merge with scheduler attached")
Reported-by: Jens Axboe <axboe@kernel.dk>
Cc: Guangwu Zhang <guazhang@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Jianchao Wang <jianchao.w.wang@oracle.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-merge.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/blk-merge.c b/block/blk-merge.c
index e7696c47489ad..7695034f4b87f 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -820,7 +820,7 @@ static struct request *attempt_merge(struct request_queue *q,
 
 	req->__data_len += blk_rq_bytes(next);
 
-	if (req_op(req) != REQ_OP_DISCARD)
+	if (!blk_discard_mergable(req))
 		elv_merge_requests(q, req, next);
 
 	/*
-- 
GitLab


From d6e820fcd4cf08b11d291a1dd7bbd0636914647c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Thu, 29 Nov 2018 16:25:10 +0100
Subject: [PATCH 1622/1890] drm/ttm: fix LRU handling in
 ttm_buffer_object_transfer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We need to set the NO_EVICT flag on the ghost object or otherwise we are
adding it to the LRU.

When it is added to the LRU we can run into a race between destroying
and evicting it again.

Signed-off-by: Christian KÃ¶nig <christian.koenig@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo_util.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index ba80150d10524..895d77d799e4f 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -492,8 +492,10 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
 	if (!fbo)
 		return -ENOMEM;
 
-	ttm_bo_get(bo);
 	fbo->base = *bo;
+	fbo->base.mem.placement |= TTM_PL_FLAG_NO_EVICT;
+
+	ttm_bo_get(bo);
 	fbo->bo = bo;
 
 	/**
-- 
GitLab


From f10d9102de2816050af650da2c8f8b62535bbb72 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Tue, 27 Nov 2018 11:41:27 -0500
Subject: [PATCH 1623/1890] drm/amdgpu: add VCN JPEG support
 amdgpu_ctx_num_entities
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Looks like it was missed when setting support was added.

Reviewed-by: Christian KÃ¶nig <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index f9b54236102d5..95f4c4139fc60 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -39,6 +39,7 @@ const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
 	[AMDGPU_HW_IP_UVD_ENC]	=	1,
 	[AMDGPU_HW_IP_VCN_DEC]	=	1,
 	[AMDGPU_HW_IP_VCN_ENC]	=	1,
+	[AMDGPU_HW_IP_VCN_JPEG]	=	1,
 };
 
 static int amdgput_ctx_total_num_entities(void)
-- 
GitLab


From 3bfa8897e4d08f822d1d58cf6cbbffbccef82e08 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Fri, 26 Oct 2018 15:59:05 +0200
Subject: [PATCH 1624/1890] drm/amdgpu: wait for IB test on first device open
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Instead of delaying that to the first query. Otherwise we could try to use the
SDMA for VM updates before the IB tests are done.

Signed-off-by: Christian KÃ¶nig <christian.koenig@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 81732a84c2ab0..8f3d44e5e7878 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -467,9 +467,6 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 	if (!info->return_size || !info->return_pointer)
 		return -EINVAL;
 
-	/* Ensure IB tests are run on ring */
-	flush_delayed_work(&adev->late_init_work);
-
 	switch (info->query) {
 	case AMDGPU_INFO_ACCEL_WORKING:
 		ui32 = adev->accel_working;
@@ -950,6 +947,9 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 	struct amdgpu_fpriv *fpriv;
 	int r, pasid;
 
+	/* Ensure IB tests are run on ring */
+	flush_delayed_work(&adev->late_init_work);
+
 	file_priv->driver_priv = NULL;
 
 	r = pm_runtime_get_sync(dev->dev);
-- 
GitLab


From ea2412dc21cc790335d319181dddc43682aef164 Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Thu, 29 Nov 2018 09:55:59 +0000
Subject: [PATCH 1625/1890] ACPI/IORT: Fix iort_get_platform_device_domain()
 uninitialized pointer value

Running the Clang static analyzer on IORT code detected the following
error:

Logic error: Branch condition evaluates to a garbage value

in

iort_get_platform_device_domain()

If the named component associated with a given device has no IORT
mappings, iort_get_platform_device_domain() exits its MSI mapping loop
with msi_parent pointer containing garbage, which can lead to erroneous
code path execution.

Initialize the msi_parent pointer, fixing the bug.

Fixes: d4f54a186667 ("ACPI: platform: setup MSI domain for ACPI based
platform device")
Reported-by: Patrick Bellasi <patrick.bellasi@arm.com>
Reviewed-by: Hanjun Guo <hanjun.guo@linaro.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Sudeep Holla <sudeep.holla@arm.com>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 drivers/acpi/arm64/iort.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index 2a361e22d38d0..70f4e80b9246a 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -700,7 +700,7 @@ static void iort_set_device_domain(struct device *dev,
  */
 static struct irq_domain *iort_get_platform_device_domain(struct device *dev)
 {
-	struct acpi_iort_node *node, *msi_parent;
+	struct acpi_iort_node *node, *msi_parent = NULL;
 	struct fwnode_handle *iort_fwnode;
 	struct acpi_iort_its_group *its;
 	int i;
-- 
GitLab


From b7cc40c32a8bfa6f2581a71747f6a7d491fe43ba Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@baylibre.com>
Date: Fri, 30 Nov 2018 15:51:56 +0300
Subject: [PATCH 1626/1890] ARC: change defconfig defaults to ARCv2

Change the default defconfig (used with 'make defconfig') to the ARCv2
nsim_hs_defconfig, and also switch the default Kconfig ISA selection to
ARCv2.

This allows several default defconfigs (e.g. make defconfig, make
allnoconfig, make tinyconfig) to all work with ARCv2 by default.

Note since we change default architecture from ARCompact to ARCv2
it's required to explicitly mention architecture type in ARCompact
defconfigs otherwise ARCv2 will be implied and binaries will be
generated for ARCv2.

Cc: <stable@vger.kernel.org> # 4.4.x
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/Kconfig                    | 2 +-
 arch/arc/Makefile                   | 2 +-
 arch/arc/configs/axs101_defconfig   | 1 +
 arch/arc/configs/nps_defconfig      | 1 +
 arch/arc/configs/nsim_700_defconfig | 1 +
 arch/arc/configs/nsimosci_defconfig | 1 +
 arch/arc/configs/tb10x_defconfig    | 1 +
 7 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 5fcbda6b37ccf..6dd7835573308 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -109,7 +109,7 @@ endmenu
 
 choice
 	prompt "ARC Instruction Set"
-	default ISA_ARCOMPACT
+	default ISA_ARCV2
 
 config ISA_ARCOMPACT
 	bool "ARCompact ISA"
diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index c64c505d966c7..df00578c279d4 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -6,7 +6,7 @@
 # published by the Free Software Foundation.
 #
 
-KBUILD_DEFCONFIG := nsim_700_defconfig
+KBUILD_DEFCONFIG := nsim_hs_defconfig
 
 cflags-y	+= -fno-common -pipe -fno-builtin -mmedium-calls -D__linux__
 cflags-$(CONFIG_ISA_ARCOMPACT)	+= -mA7
diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig
index 8c23bd086cd07..020d4493edfd0 100644
--- a/arch/arc/configs/axs101_defconfig
+++ b/arch/arc/configs/axs101_defconfig
@@ -14,6 +14,7 @@ CONFIG_PERF_EVENTS=y
 # CONFIG_VM_EVENT_COUNTERS is not set
 # CONFIG_SLUB_DEBUG is not set
 # CONFIG_COMPAT_BRK is not set
+CONFIG_ISA_ARCOMPACT=y
 CONFIG_MODULES=y
 CONFIG_MODULE_FORCE_LOAD=y
 CONFIG_MODULE_UNLOAD=y
diff --git a/arch/arc/configs/nps_defconfig b/arch/arc/configs/nps_defconfig
index ae7a0d8be98d5..6e84060e7c90a 100644
--- a/arch/arc/configs/nps_defconfig
+++ b/arch/arc/configs/nps_defconfig
@@ -15,6 +15,7 @@ CONFIG_SYSCTL_SYSCALL=y
 CONFIG_EMBEDDED=y
 CONFIG_PERF_EVENTS=y
 # CONFIG_COMPAT_BRK is not set
+CONFIG_ISA_ARCOMPACT=y
 CONFIG_KPROBES=y
 CONFIG_MODULES=y
 CONFIG_MODULE_FORCE_LOAD=y
diff --git a/arch/arc/configs/nsim_700_defconfig b/arch/arc/configs/nsim_700_defconfig
index 8e0b8b134cd9e..219c2a65294b8 100644
--- a/arch/arc/configs/nsim_700_defconfig
+++ b/arch/arc/configs/nsim_700_defconfig
@@ -15,6 +15,7 @@ CONFIG_EMBEDDED=y
 CONFIG_PERF_EVENTS=y
 # CONFIG_SLUB_DEBUG is not set
 # CONFIG_COMPAT_BRK is not set
+CONFIG_ISA_ARCOMPACT=y
 CONFIG_KPROBES=y
 CONFIG_MODULES=y
 # CONFIG_LBDAF is not set
diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig
index ad77f20e5aa6e..35dfc6491a094 100644
--- a/arch/arc/configs/nsimosci_defconfig
+++ b/arch/arc/configs/nsimosci_defconfig
@@ -15,6 +15,7 @@ CONFIG_EMBEDDED=y
 CONFIG_PERF_EVENTS=y
 # CONFIG_SLUB_DEBUG is not set
 # CONFIG_COMPAT_BRK is not set
+CONFIG_ISA_ARCOMPACT=y
 CONFIG_KPROBES=y
 CONFIG_MODULES=y
 # CONFIG_LBDAF is not set
diff --git a/arch/arc/configs/tb10x_defconfig b/arch/arc/configs/tb10x_defconfig
index a7f65313f84a5..e71ade3cf9c80 100644
--- a/arch/arc/configs/tb10x_defconfig
+++ b/arch/arc/configs/tb10x_defconfig
@@ -19,6 +19,7 @@ CONFIG_KALLSYMS_ALL=y
 # CONFIG_AIO is not set
 CONFIG_EMBEDDED=y
 # CONFIG_COMPAT_BRK is not set
+CONFIG_ISA_ARCOMPACT=y
 CONFIG_SLAB=y
 CONFIG_MODULES=y
 CONFIG_MODULE_FORCE_LOAD=y
-- 
GitLab


From 10d443431dc2bb733cf7add99b453e3fb9047a2e Mon Sep 17 00:00:00 2001
From: Jose Abreu <joabreu@synopsys.com>
Date: Fri, 30 Nov 2018 09:47:31 +0000
Subject: [PATCH 1627/1890] ARC: io.h: Implement reads{x}()/writes{x}()

Some ARC CPU's do not support unaligned loads/stores. Currently, generic
implementation of reads{b/w/l}()/writes{b/w/l}() is being used with ARC.
This can lead to misfunction of some drivers as generic functions do a
plain dereference of a pointer that can be unaligned.

Let's use {get/put}_unaligned() helpers instead of plain dereference of
pointer in order to fix. The helpers allow to get and store data from an
unaligned address whilst preserving the CPU internal alignment.
According to [1], the use of these helpers are costly in terms of
performance so we added an initial check for a buffer already aligned so
that the usage of the helpers can be avoided, when possible.

[1] Documentation/unaligned-memory-access.txt

Cc: Alexey Brodkin <abrodkin@synopsys.com>
Cc: Joao Pinto <jpinto@synopsys.com>
Cc: David Laight <David.Laight@ACULAB.COM>
Tested-by: Vitor Soares <soares@synopsys.com>
Signed-off-by: Jose Abreu <joabreu@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/io.h | 72 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)

diff --git a/arch/arc/include/asm/io.h b/arch/arc/include/asm/io.h
index c22b181e8206f..2f39d9b3886e4 100644
--- a/arch/arc/include/asm/io.h
+++ b/arch/arc/include/asm/io.h
@@ -12,6 +12,7 @@
 #include <linux/types.h>
 #include <asm/byteorder.h>
 #include <asm/page.h>
+#include <asm/unaligned.h>
 
 #ifdef CONFIG_ISA_ARCV2
 #include <asm/barrier.h>
@@ -94,6 +95,42 @@ static inline u32 __raw_readl(const volatile void __iomem *addr)
 	return w;
 }
 
+/*
+ * {read,write}s{b,w,l}() repeatedly access the same IO address in
+ * native endianness in 8-, 16-, 32-bit chunks {into,from} memory,
+ * @count times
+ */
+#define __raw_readsx(t,f) \
+static inline void __raw_reads##f(const volatile void __iomem *addr,	\
+				  void *ptr, unsigned int count)	\
+{									\
+	bool is_aligned = ((unsigned long)ptr % ((t) / 8)) == 0;	\
+	u##t *buf = ptr;						\
+									\
+	if (!count)							\
+		return;							\
+									\
+	/* Some ARC CPU's don't support unaligned accesses */		\
+	if (is_aligned) {						\
+		do {							\
+			u##t x = __raw_read##f(addr);			\
+			*buf++ = x;					\
+		} while (--count);					\
+	} else {							\
+		do {							\
+			u##t x = __raw_read##f(addr);			\
+			put_unaligned(x, buf++);			\
+		} while (--count);					\
+	}								\
+}
+
+#define __raw_readsb __raw_readsb
+__raw_readsx(8, b)
+#define __raw_readsw __raw_readsw
+__raw_readsx(16, w)
+#define __raw_readsl __raw_readsl
+__raw_readsx(32, l)
+
 #define __raw_writeb __raw_writeb
 static inline void __raw_writeb(u8 b, volatile void __iomem *addr)
 {
@@ -126,6 +163,35 @@ static inline void __raw_writel(u32 w, volatile void __iomem *addr)
 
 }
 
+#define __raw_writesx(t,f)						\
+static inline void __raw_writes##f(volatile void __iomem *addr, 	\
+				   const void *ptr, unsigned int count)	\
+{									\
+	bool is_aligned = ((unsigned long)ptr % ((t) / 8)) == 0;	\
+	const u##t *buf = ptr;						\
+									\
+	if (!count)							\
+		return;							\
+									\
+	/* Some ARC CPU's don't support unaligned accesses */		\
+	if (is_aligned) {						\
+		do {							\
+			__raw_write##f(*buf++, addr);			\
+		} while (--count);					\
+	} else {							\
+		do {							\
+			__raw_write##f(get_unaligned(buf++), addr);	\
+		} while (--count);					\
+	}								\
+}
+
+#define __raw_writesb __raw_writesb
+__raw_writesx(8, b)
+#define __raw_writesw __raw_writesw
+__raw_writesx(16, w)
+#define __raw_writesl __raw_writesl
+__raw_writesx(32, l)
+
 /*
  * MMIO can also get buffered/optimized in micro-arch, so barriers needed
  * Based on ARM model for the typical use case
@@ -141,10 +207,16 @@ static inline void __raw_writel(u32 w, volatile void __iomem *addr)
 #define readb(c)		({ u8  __v = readb_relaxed(c); __iormb(); __v; })
 #define readw(c)		({ u16 __v = readw_relaxed(c); __iormb(); __v; })
 #define readl(c)		({ u32 __v = readl_relaxed(c); __iormb(); __v; })
+#define readsb(p,d,l)		({ __raw_readsb(p,d,l); __iormb(); })
+#define readsw(p,d,l)		({ __raw_readsw(p,d,l); __iormb(); })
+#define readsl(p,d,l)		({ __raw_readsl(p,d,l); __iormb(); })
 
 #define writeb(v,c)		({ __iowmb(); writeb_relaxed(v,c); })
 #define writew(v,c)		({ __iowmb(); writew_relaxed(v,c); })
 #define writel(v,c)		({ __iowmb(); writel_relaxed(v,c); })
+#define writesb(p,d,l)		({ __iowmb(); __raw_writesb(p,d,l); })
+#define writesw(p,d,l)		({ __iowmb(); __raw_writesw(p,d,l); })
+#define writesl(p,d,l)		({ __iowmb(); __raw_writesl(p,d,l); })
 
 /*
  * Relaxed API for drivers which can handle barrier ordering themselves
-- 
GitLab


From 7e76e65ce7e9405a9608e1b806be58a6cbf4a737 Mon Sep 17 00:00:00 2001
From: Andy Gross <andy.gross@linaro.org>
Date: Thu, 29 Nov 2018 16:31:06 -0600
Subject: [PATCH 1628/1890] MAINTAINERS: Remove unused Qualcomm SoC mailing
 list

This patch removes the linux-soc mailing list from the Qualcomm SoC
entry.  We use the linux-msm and there is no need to have the second
one and this clears the list for use by others.

Signed-off-by: Andy Gross <andy.gross@linaro.org>
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index f3ea3287195d6..f412f5a2c66d5 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1922,7 +1922,6 @@ ARM/QUALCOMM SUPPORT
 M:	Andy Gross <andy.gross@linaro.org>
 M:	David Brown <david.brown@linaro.org>
 L:	linux-arm-msm@vger.kernel.org
-L:	linux-soc@vger.kernel.org
 S:	Maintained
 F:	Documentation/devicetree/bindings/soc/qcom/
 F:	arch/arm/boot/dts/qcom-*.dts
-- 
GitLab


From b1286ed7158e9b62787508066283ab0b8850b518 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 30 Nov 2018 12:13:15 -0800
Subject: [PATCH 1629/1890] test_hexdump: use memcpy instead of strncpy

New versions of gcc reasonably warn about the odd pattern of

	strncpy(p, q, strlen(q));

which really doesn't make sense: the strncpy() ends up being just a slow
and odd way to write memcpy() in this case.

Apparently there was a patch for this floating around earlier, but it
got lost.

Acked-again-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/test_hexdump.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/test_hexdump.c b/lib/test_hexdump.c
index 626f580b4ff7b..5144899d3c6b8 100644
--- a/lib/test_hexdump.c
+++ b/lib/test_hexdump.c
@@ -99,7 +99,7 @@ static void __init test_hexdump_prepare_test(size_t len, int rowsize,
 		const char *q = *result++;
 		size_t amount = strlen(q);
 
-		strncpy(p, q, amount);
+		memcpy(p, q, amount);
 		p += amount;
 
 		*p++ = ' ';
-- 
GitLab


From 4135cce7fd0a0d755665c02728578c7c5afe4726 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 27 Nov 2018 19:11:50 +0800
Subject: [PATCH 1630/1890] sctp: update frag_point when stream_interleave is
 set

sctp_assoc_update_frag_point() should be called whenever asoc->pathmtu
changes, but we missed one place in sctp_association_init(). It would
cause frag_point is zero when sending data.

As says in Jakub's reproducer, if sp->pathmtu is set by socketopt, the
new asoc->pathmtu inherits it in sctp_association_init(). Later when
transports are added and their pmtu >= asoc->pathmtu, it will never
call sctp_assoc_update_frag_point() to set frag_point.

This patch is to fix it by updating frag_point after asoc->pathmtu is
set as sp->pathmtu in sctp_association_init(). Note that it moved them
after sctp_stream_init(), as stream->si needs to be set first.

Frag_point's calculation is also related with datachunk's type, so it
needs to update frag_point when stream->si may be changed in
sctp_process_init().

v1->v2:
  - call sctp_assoc_update_frag_point() separately in sctp_process_init
    and sctp_association_init, per Marcelo's suggestion.

Fixes: 2f5e3c9df693 ("sctp: introduce sctp_assoc_update_frag_point")
Reported-by: Jakub Audykowicz <jakub.audykowicz@gmail.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/associola.c     | 7 ++++---
 net/sctp/sm_make_chunk.c | 3 +++
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 6a28b96e779e6..dd77ec3892b6a 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -118,9 +118,6 @@ static struct sctp_association *sctp_association_init(
 	asoc->flowlabel = sp->flowlabel;
 	asoc->dscp = sp->dscp;
 
-	/* Initialize default path MTU. */
-	asoc->pathmtu = sp->pathmtu;
-
 	/* Set association default SACK delay */
 	asoc->sackdelay = msecs_to_jiffies(sp->sackdelay);
 	asoc->sackfreq = sp->sackfreq;
@@ -252,6 +249,10 @@ static struct sctp_association *sctp_association_init(
 			     0, gfp))
 		goto fail_init;
 
+	/* Initialize default path MTU. */
+	asoc->pathmtu = sp->pathmtu;
+	sctp_assoc_update_frag_point(asoc);
+
 	/* Assume that peer would support both address types unless we are
 	 * told otherwise.
 	 */
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 4a4fd19712552..f4ac6c592e139 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2462,6 +2462,9 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk,
 			     asoc->c.sinit_max_instreams, gfp))
 		goto clean_up;
 
+	/* Update frag_point when stream_interleave may get changed. */
+	sctp_assoc_update_frag_point(asoc);
+
 	if (!asoc->temp && sctp_assoc_set_id(asoc, gfp))
 		goto clean_up;
 
-- 
GitLab


From 5f2b8b62786853341a20d4cd4948f9cbca3db002 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Tue, 27 Nov 2018 14:21:43 +0100
Subject: [PATCH 1631/1890] net: stmmac: Move debugfs init/exit to
 ->probe()/->remove()

Setting up and tearing down debugfs is current unbalanced, as seen by
this error during resume from suspend:

    [  752.134067] dwc-eth-dwmac 2490000.ethernet eth0: ERROR failed to create debugfs directory
    [  752.134347] dwc-eth-dwmac 2490000.ethernet eth0: stmmac_hw_setup: failed debugFS registration

The imbalance happens because the driver creates the debugfs hierarchy
when the device is opened and tears it down when the device is closed.
There's little gain in that, and it could be argued that it is even
surprising because it's not usually done for other devices. Fix the
imbalance by moving the debugfs creation and teardown to the driver's
->probe() and ->remove() implementations instead.

Note that the ring descriptors cannot be read while the interface is
down, so make sure to return an empty file when the descriptors_status
debugfs file is read.

Signed-off-by: Thierry Reding <treding@nvidia.com>
Acked-by: Jose Abreu <joabreu@synopsys.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/stmicro/stmmac/stmmac_main.c | 23 +++++++++++--------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 076a8be18d675..5551fead8f664 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2550,12 +2550,6 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
 			netdev_warn(priv->dev, "PTP init failed\n");
 	}
 
-#ifdef CONFIG_DEBUG_FS
-	ret = stmmac_init_fs(dev);
-	if (ret < 0)
-		netdev_warn(priv->dev, "%s: failed debugFS registration\n",
-			    __func__);
-#endif
 	priv->tx_lpi_timer = STMMAC_DEFAULT_TWT_LS;
 
 	if (priv->use_riwt) {
@@ -2756,10 +2750,6 @@ static int stmmac_release(struct net_device *dev)
 
 	netif_carrier_off(dev);
 
-#ifdef CONFIG_DEBUG_FS
-	stmmac_exit_fs(dev);
-#endif
-
 	stmmac_release_ptp(priv);
 
 	return 0;
@@ -3899,6 +3889,9 @@ static int stmmac_sysfs_ring_read(struct seq_file *seq, void *v)
 	u32 tx_count = priv->plat->tx_queues_to_use;
 	u32 queue;
 
+	if ((dev->flags & IFF_UP) == 0)
+		return 0;
+
 	for (queue = 0; queue < rx_count; queue++) {
 		struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
 
@@ -4397,6 +4390,13 @@ int stmmac_dvr_probe(struct device *device,
 		goto error_netdev_register;
 	}
 
+#ifdef CONFIG_DEBUG_FS
+	ret = stmmac_init_fs(ndev);
+	if (ret < 0)
+		netdev_warn(priv->dev, "%s: failed debugFS registration\n",
+			    __func__);
+#endif
+
 	return ret;
 
 error_netdev_register:
@@ -4432,6 +4432,9 @@ int stmmac_dvr_remove(struct device *dev)
 
 	netdev_info(priv->dev, "%s: removing driver", __func__);
 
+#ifdef CONFIG_DEBUG_FS
+	stmmac_exit_fs(ndev);
+#endif
 	stmmac_stop_all_dma(priv);
 
 	stmmac_mac_set(priv, priv->ioaddr, false);
-- 
GitLab


From 56e0e295091dde5d0346fad08d3d8b6c07084c9d Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 27 Nov 2018 14:00:15 +0000
Subject: [PATCH 1632/1890] liquidio: fix spelling mistake "deferal" ->
 "deferral"

There is a spelling mistake in the oct_stats_strings array, fix it.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/liquidio/lio_ethtool.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
index 4c3925af53bcc..abe5d0dac8510 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
@@ -111,7 +111,7 @@ static const char oct_stats_strings[][ETH_GSTRING_LEN] = {
 	"mac_tx_one_collision",
 	"mac_tx_multi_collision",
 	"mac_tx_max_collision_fail",
-	"mac_tx_max_deferal_fail",
+	"mac_tx_max_deferral_fail",
 	"mac_tx_fifo_err",
 	"mac_tx_runts",
 
-- 
GitLab


From 43d0e96022ae3c66743c01bba6c18a3afec7b578 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 27 Nov 2018 14:37:17 +0000
Subject: [PATCH 1633/1890] openvswitch: fix spelling mistake "execeeds" ->
 "exceeds"

There is a spelling mistake in a net_warn_ratelimited message, fix this.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/conntrack.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index a4660c48ff014..cd94f925495a5 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -1166,7 +1166,7 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
 				&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
 			if (err) {
 				net_warn_ratelimited("openvswitch: zone: %u "
-					"execeeds conntrack limit\n",
+					"exceeds conntrack limit\n",
 					info->zone.id);
 				return err;
 			}
-- 
GitLab


From 37c4b91f955fdd5f4ad771956b97d35f1321098e Mon Sep 17 00:00:00 2001
From: Dmitry Bogdanov <dmitry.bogdanov@aquantia.com>
Date: Tue, 27 Nov 2018 14:51:17 +0000
Subject: [PATCH 1634/1890] net: aquantia: fix rx checksum offload bits

The last set of csum offload fixes had a leak:

Checksum enabled status bits from rx descriptor were incorrectly
interpreted. Consequently all the other valid logic worked on zero bits.
That caused rx checksum offloads never to trigger.

Tested by dumping rx descriptors and validating resulting csum_level.

Reported-by: Igor Russkikh <igor.russkikh@aquantia.com>
Signed-off-by: Dmitry Bogdanov <dmitry.bogdanov@aquantia.com>
Signed-off-by: Igor Russkikh <igor.russkikh@aquantia.com>
Fixes: ad703c2b9127f ("net: aquantia: invalid checksumm offload implementation")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index f02592f43fe36..a7e853fa43c24 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -674,7 +674,7 @@ static int hw_atl_b0_hw_ring_rx_receive(struct aq_hw_s *self,
 
 		rx_stat = (0x0000003CU & rxd_wb->status) >> 2;
 
-		is_rx_check_sum_enabled = (rxd_wb->type) & (0x3U << 19);
+		is_rx_check_sum_enabled = (rxd_wb->type >> 19) & 0x3U;
 
 		pkt_type = 0xFFU & (rxd_wb->type >> 4);
 
-- 
GitLab


From 3b5b3a3331d141e8f2a7aaae3a94dfa1e61ecbe4 Mon Sep 17 00:00:00 2001
From: Toni Peltonen <peltzi@peltzi.fi>
Date: Tue, 27 Nov 2018 16:56:57 +0200
Subject: [PATCH 1635/1890] bonding: fix 802.3ad state sent to partner when
 unbinding slave

Previously when unbinding a slave the 802.3ad implementation only told
partner that the port is not suitable for aggregation by setting the port
aggregation state from aggregatable to individual. This is not enough. If the
physical layer still stays up and we only unbinded this port from the bond there
is nothing in the aggregation status alone to prevent the partner from sending
traffic towards us. To ensure that the partner doesn't consider this
port at all anymore we should also disable collecting and distributing to
signal that this actor is going away. Also clear AD_STATE_SYNCHRONIZATION to
ensure partner exits collecting + distributing state.

I have tested this behaviour againts Arista EOS switches with mlx5 cards
(physical link stays up even when interface is down) and simulated
the same situation virtually Linux <-> Linux with two network namespaces
running two veth device pairs. In both cases setting aggregation to
individual doesn't alone prevent traffic from being to sent towards this
port given that the link stays up in partners end. Partner still keeps
it's end in collecting + distributing state and continues until timeout is
reached. In most cases this means we are losing the traffic partner sends
towards our port while we wait for timeout. This is most visible with slow
periodic time (LACP rate slow).

Other open source implementations like Open VSwitch and libreswitch, and
vendor implementations like Arista EOS, seem to disable collecting +
distributing to when doing similar port disabling/detaching/removing change.
With this patch kernel implementation would behave the same way and ensure
partner doesn't consider our actor viable anymore.

Signed-off-by: Toni Peltonen <peltzi@peltzi.fi>
Signed-off-by: Jay Vosburgh <jay.vosburgh@canonical.com>
Acked-by: Jonathan Toppins <jtoppins@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_3ad.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index f43fb2f958a54..93dfcef8afc4b 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -2086,6 +2086,9 @@ void bond_3ad_unbind_slave(struct slave *slave)
 		   aggregator->aggregator_identifier);
 
 	/* Tell the partner that this port is not suitable for aggregation */
+	port->actor_oper_port_state &= ~AD_STATE_SYNCHRONIZATION;
+	port->actor_oper_port_state &= ~AD_STATE_COLLECTING;
+	port->actor_oper_port_state &= ~AD_STATE_DISTRIBUTING;
 	port->actor_oper_port_state &= ~AD_STATE_AGGREGATION;
 	__update_lacpdu_from_port(port);
 	ad_lacpdu_send(port);
-- 
GitLab


From 1166494891da88af25c444e65cd4f32c3e026b46 Mon Sep 17 00:00:00 2001
From: John Hurley <john.hurley@netronome.com>
Date: Tue, 27 Nov 2018 14:04:11 -0800
Subject: [PATCH 1636/1890] nfp: flower: release metadata on offload failure

Calling nfp_compile_flow_metadata both assigns a stats context and
increments a ref counter on (or allocates) a mask id table entry. These
are released by the nfp_modify_flow_metadata call on flow deletion,
however, if a flow add fails after metadata is set then the flow entry
will be deleted but the metadata assignments leaked.

Add an error path to the flow add offload function to ensure allocated
metadata is released in the event of an offload fail.

Fixes: 81f3ddf2547d ("nfp: add control message passing capabilities to flower offloads")
Signed-off-by: John Hurley <john.hurley@netronome.com>
Reviewed-by: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/flower/offload.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index 29c95423ab646..c3ad8d737cf0c 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -479,13 +479,13 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev,
 	err = nfp_flower_xmit_flow(netdev, flow_pay,
 				   NFP_FLOWER_CMSG_TYPE_FLOW_ADD);
 	if (err)
-		goto err_destroy_flow;
+		goto err_release_metadata;
 
 	flow_pay->tc_flower_cookie = flow->cookie;
 	err = rhashtable_insert_fast(&priv->flow_table, &flow_pay->fl_node,
 				     nfp_flower_table_params);
 	if (err)
-		goto err_destroy_flow;
+		goto err_release_metadata;
 
 	port->tc_offload_cnt++;
 
@@ -494,6 +494,8 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev,
 
 	return 0;
 
+err_release_metadata:
+	nfp_modify_flow_metadata(app, flow_pay);
 err_destroy_flow:
 	kfree(flow_pay->action_data);
 	kfree(flow_pay->mask_data);
-- 
GitLab


From b5f0cf08340090d1503dbdbfd797e32264974100 Mon Sep 17 00:00:00 2001
From: John Hurley <john.hurley@netronome.com>
Date: Tue, 27 Nov 2018 14:04:12 -0800
Subject: [PATCH 1637/1890] nfp: flower: prevent offload if rhashtable insert
 fails

For flow offload adds, if the rhash insert code fails, the flow will still
have been offloaded but the reference to it in the driver freed.

Re-order the offload setup calls to ensure that a flow will only be written
to FW if a kernel reference is held and stored in the rhashtable. Remove
this hashtable entry if the offload fails.

Fixes: c01d0efa5136 ("nfp: flower: use rhashtable for flow caching")
Signed-off-by: John Hurley <john.hurley@netronome.com>
Reviewed-by: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/netronome/nfp/flower/offload.c    | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index c3ad8d737cf0c..2f49eb75f3cce 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -476,17 +476,17 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev,
 	if (err)
 		goto err_destroy_flow;
 
-	err = nfp_flower_xmit_flow(netdev, flow_pay,
-				   NFP_FLOWER_CMSG_TYPE_FLOW_ADD);
-	if (err)
-		goto err_release_metadata;
-
 	flow_pay->tc_flower_cookie = flow->cookie;
 	err = rhashtable_insert_fast(&priv->flow_table, &flow_pay->fl_node,
 				     nfp_flower_table_params);
 	if (err)
 		goto err_release_metadata;
 
+	err = nfp_flower_xmit_flow(netdev, flow_pay,
+				   NFP_FLOWER_CMSG_TYPE_FLOW_ADD);
+	if (err)
+		goto err_remove_rhash;
+
 	port->tc_offload_cnt++;
 
 	/* Deallocate flow payload when flower rule has been destroyed. */
@@ -494,6 +494,10 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev,
 
 	return 0;
 
+err_remove_rhash:
+	WARN_ON_ONCE(rhashtable_remove_fast(&priv->flow_table,
+					    &flow_pay->fl_node,
+					    nfp_flower_table_params));
 err_release_metadata:
 	nfp_modify_flow_metadata(app, flow_pay);
 err_destroy_flow:
-- 
GitLab


From c01ac66b38660f2b507ccd0b75d28e3002d56fbb Mon Sep 17 00:00:00 2001
From: David Miller <davem@davemloft.net>
Date: Wed, 28 Nov 2018 22:33:53 -0800
Subject: [PATCH 1638/1890] bpf: Fix verifier log string check for bad
 alignment.

The message got changed a lot time ago.

This was responsible for 36 test case failures on sparc64.

Fixes: f1174f77b50c ("bpf/verifier: rework value tracking")
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_verifier.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 550b7e46bf4a4..5dd4410a716ce 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -14230,7 +14230,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 
 	reject_from_alignment = fd_prog < 0 &&
 				(test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS) &&
-				strstr(bpf_vlog, "Unknown alignment.");
+				strstr(bpf_vlog, "misaligned");
 #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 	if (reject_from_alignment) {
 		printf("FAIL\nFailed due to alignment despite having efficient unaligned access: '%s'!\n",
-- 
GitLab


From 38c7b224ce22c25fed04007839edf974bd13439d Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 30 Nov 2018 14:45:01 -0800
Subject: [PATCH 1639/1890] unifdef: use memcpy instead of strncpy

New versions of gcc reasonably warn about the odd pattern of

	strncpy(p, q, strlen(q));

which really doesn't make sense: the strncpy() ends up being just a slow
and odd way to write memcpy() in this case.

There was a comment about _why_ the code used strncpy - to avoid the
terminating NUL byte, but memcpy does the same and avoids the warning.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/unifdef.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/unifdef.c b/scripts/unifdef.c
index 7493c0ee51cc9..db00e3e30a59d 100644
--- a/scripts/unifdef.c
+++ b/scripts/unifdef.c
@@ -395,7 +395,7 @@ usage(void)
  * When we have processed a group that starts off with a known-false
  * #if/#elif sequence (which has therefore been deleted) followed by a
  * #elif that we don't understand and therefore must keep, we edit the
- * latter into a #if to keep the nesting correct. We use strncpy() to
+ * latter into a #if to keep the nesting correct. We use memcpy() to
  * overwrite the 4 byte token "elif" with "if  " without a '\0' byte.
  *
  * When we find a true #elif in a group, the following block will
@@ -450,7 +450,7 @@ static void Idrop (void) { Fdrop();  ignoreon(); }
 static void Itrue (void) { Ftrue();  ignoreon(); }
 static void Ifalse(void) { Ffalse(); ignoreon(); }
 /* modify this line */
-static void Mpass (void) { strncpy(keyword, "if  ", 4); Pelif(); }
+static void Mpass (void) { memcpy(keyword, "if  ", 4); Pelif(); }
 static void Mtrue (void) { keywordedit("else");  state(IS_TRUE_MIDDLE); }
 static void Melif (void) { keywordedit("endif"); state(IS_FALSE_TRAILER); }
 static void Melse (void) { keywordedit("endif"); state(IS_FALSE_ELSE); }
-- 
GitLab


From 12457e6391e000820bb0b2b775125a569c3e80ed Mon Sep 17 00:00:00 2001
From: Luis Chamberlain <mcgrof@kernel.org>
Date: Fri, 30 Nov 2018 14:08:49 -0800
Subject: [PATCH 1640/1890] MAINTAINERS: name change for Luis

My name has changed, works better than Global Entry I tell ya.

Link: http://lkml.kernel.org/r/20181122003138.7752-1-mcgrof@kernel.org
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 MAINTAINERS | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 5d17f568a3b97..27fe518f60797 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2491,7 +2491,7 @@ F:	drivers/net/wireless/ath/*
 ATHEROS ATH5K WIRELESS DRIVER
 M:	Jiri Slaby <jirislaby@gmail.com>
 M:	Nick Kossifidis <mickflemm@gmail.com>
-M:	"Luis R. Rodriguez" <mcgrof@do-not-panic.com>
+M:	Luis Chamberlain <mcgrof@kernel.org>
 L:	linux-wireless@vger.kernel.org
 W:	http://wireless.kernel.org/en/users/Drivers/ath5k
 S:	Maintained
@@ -5835,7 +5835,7 @@ F:	include/uapi/linux/firewire*.h
 F:	tools/firewire/
 
 FIRMWARE LOADER (request_firmware)
-M:	Luis R. Rodriguez <mcgrof@kernel.org>
+M:	Luis Chamberlain <mcgrof@kernel.org>
 L:	linux-kernel@vger.kernel.org
 S:	Maintained
 F:	Documentation/firmware_class/
@@ -8135,7 +8135,7 @@ F:	tools/testing/selftests/
 F:	Documentation/dev-tools/kselftest*
 
 KERNEL USERMODE HELPER
-M:	"Luis R. Rodriguez" <mcgrof@kernel.org>
+M:	Luis Chamberlain <mcgrof@kernel.org>
 L:	linux-kernel@vger.kernel.org
 S:	Maintained
 F:	kernel/umh.c
@@ -8311,7 +8311,7 @@ F:	mm/kmemleak.c
 F:	mm/kmemleak-test.c
 
 KMOD KERNEL MODULE LOADER - USERMODE HELPER
-M:	"Luis R. Rodriguez" <mcgrof@kernel.org>
+M:	Luis Chamberlain <mcgrof@kernel.org>
 L:	linux-kernel@vger.kernel.org
 S:	Maintained
 F:	kernel/kmod.c
@@ -12061,7 +12061,7 @@ F:	kernel/printk/
 F:	include/linux/printk.h
 
 PRISM54 WIRELESS DRIVER
-M:	"Luis R. Rodriguez" <mcgrof@gmail.com>
+M:	Luis Chamberlain <mcgrof@kernel.org>
 L:	linux-wireless@vger.kernel.org
 W:	http://wireless.kernel.org/en/users/Drivers/p54
 S:	Obsolete
@@ -12077,7 +12077,7 @@ F:	include/linux/proc_fs.h
 F:	tools/testing/selftests/proc/
 
 PROC SYSCTL
-M:	"Luis R. Rodriguez" <mcgrof@kernel.org>
+M:	Luis Chamberlain <mcgrof@kernel.org>
 M:	Kees Cook <keescook@chromium.org>
 L:	linux-kernel@vger.kernel.org
 L:	linux-fsdevel@vger.kernel.org
-- 
GitLab


From 08be37b798921af207e78082fe261a6ca8be5024 Mon Sep 17 00:00:00 2001
From: John Hubbard <jhubbard@nvidia.com>
Date: Fri, 30 Nov 2018 14:08:53 -0800
Subject: [PATCH 1641/1890] mm/gup: finish consolidating error handling

Commit df06b37ffe5a ("mm/gup: cache dev_pagemap while pinning pages")
attempted to operate on each page that get_user_pages had retrieved.  In
order to do that, it created a common exit point from the routine.
However, one case was missed, which this patch fixes up.

Also, there was still an unnecessary shadow declaration (with a
different type) of the "ret" variable, which this patch removes.

Keith's description of the situation is:

  This also fixes a potentially leaked dev_pagemap reference count if a
  failure occurs when an iteration crosses a vma boundary.  I don't think
  it's normal to have different vma's on a users mapped zone device
  memory, but good to fix anyway.

I actually thought that this code:

    /* first iteration or cross vma bound */
    if (!vma || start >= vma->vm_end) {
	        vma = find_extend_vma(mm, start);
	        if (!vma && in_gate_area(mm, start)) {
		            ret = get_gate_page(mm, start & PAGE_MASK,
		                    gup_flags, &vma,
		                    pages ? &pages[i] : NULL);
		            if (ret)
		                goto out;

dealt with the "you're trying to pin the gate page, as part of this
call", rather than the generic case of crossing a vma boundary.  (I
think there's a fine point that I must be overlooking.) But it's still a
valid case, either way.

Link: http://lkml.kernel.org/r/20181121081402.29641-2-jhubbard@nvidia.com
Fixes: df06b37ffe5a4 ("mm/gup: cache dev_pagemap while pinning pages")
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Reviewed-by: Keith Busch <keith.busch@intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/gup.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/mm/gup.c b/mm/gup.c
index aa43620a3270e..8cb68a50dbdf2 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -702,12 +702,11 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 		if (!vma || start >= vma->vm_end) {
 			vma = find_extend_vma(mm, start);
 			if (!vma && in_gate_area(mm, start)) {
-				int ret;
 				ret = get_gate_page(mm, start & PAGE_MASK,
 						gup_flags, &vma,
 						pages ? &pages[i] : NULL);
 				if (ret)
-					return i ? : ret;
+					goto out;
 				ctx.page_mask = 0;
 				goto next_page;
 			}
-- 
GitLab


From e21e57445a64598b29a6f629688f9b9a39e7242a Mon Sep 17 00:00:00 2001
From: Larry Chen <lchen@suse.com>
Date: Fri, 30 Nov 2018 14:08:56 -0800
Subject: [PATCH 1642/1890] ocfs2: fix deadlock caused by ocfs2_defrag_extent()

ocfs2_defrag_extent may fall into deadlock.

ocfs2_ioctl_move_extents
    ocfs2_ioctl_move_extents
      ocfs2_move_extents
        ocfs2_defrag_extent
          ocfs2_lock_allocators_move_extents

            ocfs2_reserve_clusters
              inode_lock GLOBAL_BITMAP_SYSTEM_INODE

	  __ocfs2_flush_truncate_log
              inode_lock GLOBAL_BITMAP_SYSTEM_INODE

As backtrace shows above, ocfs2_reserve_clusters() will call inode_lock
against the global bitmap if local allocator has not sufficient cluters.
Once global bitmap could meet the demand, ocfs2_reserve_cluster will
return success with global bitmap locked.

After ocfs2_reserve_cluster(), if truncate log is full,
__ocfs2_flush_truncate_log() will definitely fall into deadlock because
it needs to inode_lock global bitmap, which has already been locked.

To fix this bug, we could remove from
ocfs2_lock_allocators_move_extents() the code which intends to lock
global allocator, and put the removed code after
__ocfs2_flush_truncate_log().

ocfs2_lock_allocators_move_extents() is referred by 2 places, one is
here, the other does not need the data allocator context, which means
this patch does not affect the caller so far.

Link: http://lkml.kernel.org/r/20181101071422.14470-1-lchen@suse.com
Signed-off-by: Larry Chen <lchen@suse.com>
Reviewed-by: Changwei Ge <ge.changwei@h3c.com>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Joseph Qi <jiangqi903@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ocfs2/move_extents.c | 47 +++++++++++++++++++++++------------------
 1 file changed, 26 insertions(+), 21 deletions(-)

diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 3f1685d7d43bf..1565dd8e8856e 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -157,18 +157,14 @@ static int __ocfs2_move_extent(handle_t *handle,
 }
 
 /*
- * lock allocators, and reserving appropriate number of bits for
- * meta blocks and data clusters.
- *
- * in some cases, we don't need to reserve clusters, just let data_ac
- * be NULL.
+ * lock allocator, and reserve appropriate number of bits for
+ * meta blocks.
  */
-static int ocfs2_lock_allocators_move_extents(struct inode *inode,
+static int ocfs2_lock_meta_allocator_move_extents(struct inode *inode,
 					struct ocfs2_extent_tree *et,
 					u32 clusters_to_move,
 					u32 extents_to_split,
 					struct ocfs2_alloc_context **meta_ac,
-					struct ocfs2_alloc_context **data_ac,
 					int extra_blocks,
 					int *credits)
 {
@@ -193,13 +189,6 @@ static int ocfs2_lock_allocators_move_extents(struct inode *inode,
 		goto out;
 	}
 
-	if (data_ac) {
-		ret = ocfs2_reserve_clusters(osb, clusters_to_move, data_ac);
-		if (ret) {
-			mlog_errno(ret);
-			goto out;
-		}
-	}
 
 	*credits += ocfs2_calc_extend_credits(osb->sb, et->et_root_el);
 
@@ -259,10 +248,10 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
 		}
 	}
 
-	ret = ocfs2_lock_allocators_move_extents(inode, &context->et, *len, 1,
-						 &context->meta_ac,
-						 &context->data_ac,
-						 extra_blocks, &credits);
+	ret = ocfs2_lock_meta_allocator_move_extents(inode, &context->et,
+						*len, 1,
+						&context->meta_ac,
+						extra_blocks, &credits);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
@@ -285,6 +274,21 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
 		}
 	}
 
+	/*
+	 * Make sure ocfs2_reserve_cluster is called after
+	 * __ocfs2_flush_truncate_log, otherwise, dead lock may happen.
+	 *
+	 * If ocfs2_reserve_cluster is called
+	 * before __ocfs2_flush_truncate_log, dead lock on global bitmap
+	 * may happen.
+	 *
+	 */
+	ret = ocfs2_reserve_clusters(osb, *len, &context->data_ac);
+	if (ret) {
+		mlog_errno(ret);
+		goto out_unlock_mutex;
+	}
+
 	handle = ocfs2_start_trans(osb, credits);
 	if (IS_ERR(handle)) {
 		ret = PTR_ERR(handle);
@@ -617,9 +621,10 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
 		}
 	}
 
-	ret = ocfs2_lock_allocators_move_extents(inode, &context->et, len, 1,
-						 &context->meta_ac,
-						 NULL, extra_blocks, &credits);
+	ret = ocfs2_lock_meta_allocator_move_extents(inode, &context->et,
+						len, 1,
+						&context->meta_ac,
+						extra_blocks, &credits);
 	if (ret) {
 		mlog_errno(ret);
 		goto out;
-- 
GitLab


From 6ff38bd40230af35e446239396e5fc8ebd6a5248 Mon Sep 17 00:00:00 2001
From: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
Date: Fri, 30 Nov 2018 14:09:00 -0800
Subject: [PATCH 1643/1890] mm: cleancache: fix corruption on missed inode
 invalidation

If all pages are deleted from the mapping by memory reclaim and also
moved to the cleancache:

__delete_from_page_cache
  (no shadow case)
  unaccount_page_cache_page
    cleancache_put_page
  page_cache_delete
    mapping->nrpages -= nr
    (nrpages becomes 0)

We don't clean the cleancache for an inode after final file truncation
(removal).

truncate_inode_pages_final
  check (nrpages || nrexceptional) is false
    no truncate_inode_pages
      no cleancache_invalidate_inode(mapping)

These way when reading the new file created with same inode we may get
these trash leftover pages from cleancache and see wrong data instead of
the contents of the new file.

Fix it by always doing truncate_inode_pages which is already ready for
nrpages == 0 && nrexceptional == 0 case and just invalidates inode.

[akpm@linux-foundation.org: add comment, per Jan]
Link: http://lkml.kernel.org/r/20181112095734.17979-1-ptikhomirov@virtuozzo.com
Fixes: commit 91b0abe36a7b ("mm + fs: store shadow entries in page cache")
Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
Reviewed-by: Vasily Averin <vvs@virtuozzo.com>
Reviewed-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/truncate.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/mm/truncate.c b/mm/truncate.c
index 45d68e90b7037..798e7ccfb030b 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -517,9 +517,13 @@ void truncate_inode_pages_final(struct address_space *mapping)
 		 */
 		xa_lock_irq(&mapping->i_pages);
 		xa_unlock_irq(&mapping->i_pages);
-
-		truncate_inode_pages(mapping, 0);
 	}
+
+	/*
+	 * Cleancache needs notification even if there are no pages or shadow
+	 * entries.
+	 */
+	truncate_inode_pages(mapping, 0);
 }
 EXPORT_SYMBOL(truncate_inode_pages_final);
 
-- 
GitLab


From c1cb20d43728aa9b5393bd8d489bc85c142949b2 Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao@google.com>
Date: Fri, 30 Nov 2018 14:09:03 -0800
Subject: [PATCH 1644/1890] mm: use swp_offset as key in shmem_replace_page()

We changed the key of swap cache tree from swp_entry_t.val to
swp_offset.  We need to do so in shmem_replace_page() as well.

Hugh said:
 "shmem_replace_page() has been wrong since the day I wrote it: good
  enough to work on swap "type" 0, which is all most people ever use
  (especially those few who need shmem_replace_page() at all), but
  broken once there are any non-0 swp_type bits set in the higher order
  bits"

Link: http://lkml.kernel.org/r/20181121215442.138545-1-yuzhao@google.com
Fixes: f6ab1f7f6b2d ("mm, swap: use offset of swap entry as key of swap cache")
Signed-off-by: Yu Zhao <yuzhao@google.com>
Reviewed-by: Matthew Wilcox <willy@infradead.org>
Acked-by: Hugh Dickins <hughd@google.com>
Cc: <stable@vger.kernel.org>	[4.9+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/shmem.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index d44991ea5ed4b..42b70978e8148 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1509,11 +1509,13 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
 {
 	struct page *oldpage, *newpage;
 	struct address_space *swap_mapping;
+	swp_entry_t entry;
 	pgoff_t swap_index;
 	int error;
 
 	oldpage = *pagep;
-	swap_index = page_private(oldpage);
+	entry.val = page_private(oldpage);
+	swap_index = swp_offset(entry);
 	swap_mapping = page_mapping(oldpage);
 
 	/*
@@ -1532,7 +1534,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
 	__SetPageLocked(newpage);
 	__SetPageSwapBacked(newpage);
 	SetPageUptodate(newpage);
-	set_page_private(newpage, swap_index);
+	set_page_private(newpage, entry.val);
 	SetPageSwapCache(newpage);
 
 	/*
-- 
GitLab


From 8f416836c0d50b198cad1225132e5abebf8980dc Mon Sep 17 00:00:00 2001
From: Wei Yang <richard.weiyang@gmail.com>
Date: Fri, 30 Nov 2018 14:09:07 -0800
Subject: [PATCH 1645/1890] mm/page_alloc.c: fix calculation of pgdat->nr_zones

init_currently_empty_zone() will adjust pgdat->nr_zones and set it to
'zone_idx(zone) + 1' unconditionally.  This is correct in the normal
case, while not exact in hot-plug situation.

This function is used in two places:

  * free_area_init_core()
  * move_pfn_range_to_zone()

In the first case, we are sure zone index increase monotonically.  While
in the second one, this is under users control.

One way to reproduce this is:
----------------------------

1. create a virtual machine with empty node1

   -m 4G,slots=32,maxmem=32G \
   -smp 4,maxcpus=8          \
   -numa node,nodeid=0,mem=4G,cpus=0-3 \
   -numa node,nodeid=1,mem=0G,cpus=4-7

2. hot-add cpu 3-7

   cpu-add [3-7]

2. hot-add memory to nod1

   object_add memory-backend-ram,id=ram0,size=1G
   device_add pc-dimm,id=dimm0,memdev=ram0,node=1

3. online memory with following order

   echo online_movable > memory47/state
   echo online > memory40/state

After this, node1 will have its nr_zones equals to (ZONE_NORMAL + 1)
instead of (ZONE_MOVABLE + 1).

Michal said:
 "Having an incorrect nr_zones might result in all sorts of problems
  which would be quite hard to debug (e.g. reclaim not considering the
  movable zone). I do not expect many users would suffer from this it
  but still this is trivial and obviously right thing to do so
  backporting to the stable tree shouldn't be harmful (last famous
  words)"

Link: http://lkml.kernel.org/r/20181117022022.9956-1-richard.weiyang@gmail.com
Fixes: f1dd2cd13c4b ("mm, memory_hotplug: do not associate hotadded memory to zones until online")
Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6847177dc4a1a..2ec9cc4072165 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5813,8 +5813,10 @@ void __meminit init_currently_empty_zone(struct zone *zone,
 					unsigned long size)
 {
 	struct pglist_data *pgdat = zone->zone_pgdat;
+	int zone_idx = zone_idx(zone) + 1;
 
-	pgdat->nr_zones = zone_idx(zone) + 1;
+	if (zone_idx > pgdat->nr_zones)
+		pgdat->nr_zones = zone_idx;
 
 	zone->zone_start_pfn = zone_start_pfn;
 
-- 
GitLab


From 94570a413f01dda8938d980d36814eee295d2a58 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 30 Nov 2018 14:09:10 -0800
Subject: [PATCH 1646/1890] proc: update MAINTAINERS with proc.txt

Turns out that /proc has official documentation and people even trying
to keep it uptodate.

Link: http://lkml.kernel.org/r/20181116134630.GA8004@avx2
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 27fe518f60797..ba26e68cb2755 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12075,6 +12075,7 @@ S:	Maintained
 F:	fs/proc/
 F:	include/linux/proc_fs.h
 F:	tools/testing/selftests/proc/
+F:	Documentation/filesystems/proc.txt
 
 PROC SYSCTL
 M:	Luis Chamberlain <mcgrof@kernel.org>
-- 
GitLab


From ce96a407adef126870b3f4a1b73529dd8aa80f49 Mon Sep 17 00:00:00 2001
From: Pan Bian <bianpan2016@163.com>
Date: Fri, 30 Nov 2018 14:09:14 -0800
Subject: [PATCH 1647/1890] hfs: do not free node before using

hfs_bmap_free() frees the node via hfs_bnode_put(node).  However, it
then reads node->this when dumping error message on an error path, which
may result in a use-after-free bug.  This patch frees the node only when
it is never again used.

Link: http://lkml.kernel.org/r/1542963889-128825-1-git-send-email-bianpan2016@163.com
Fixes: a1185ffa2fc ("HFS rewrite")
Signed-off-by: Pan Bian <bianpan2016@163.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Joe Perches <joe@perches.com>
Cc: Ernesto A. Fernandez <ernesto.mnd.fernandez@gmail.com>
Cc: Viacheslav Dubeyko <slava@dubeyko.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hfs/btree.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c
index 98b96ffb95ed3..19017d2961734 100644
--- a/fs/hfs/btree.c
+++ b/fs/hfs/btree.c
@@ -338,13 +338,14 @@ void hfs_bmap_free(struct hfs_bnode *node)
 
 		nidx -= len * 8;
 		i = node->next;
-		hfs_bnode_put(node);
 		if (!i) {
 			/* panic */;
 			pr_crit("unable to free bnode %u. bmap not found!\n",
 				node->this);
+			hfs_bnode_put(node);
 			return;
 		}
+		hfs_bnode_put(node);
 		node = hfs_bnode_find(tree, i);
 		if (IS_ERR(node))
 			return;
-- 
GitLab


From c7d7d620dcbd2a1c595092280ca943f2fced7bbd Mon Sep 17 00:00:00 2001
From: Pan Bian <bianpan2016@163.com>
Date: Fri, 30 Nov 2018 14:09:18 -0800
Subject: [PATCH 1648/1890] hfsplus: do not free node before using

hfs_bmap_free() frees node via hfs_bnode_put(node).  However it then
reads node->this when dumping error message on an error path, which may
result in a use-after-free bug.  This patch frees node only when it is
never used.

Link: http://lkml.kernel.org/r/1543053441-66942-1-git-send-email-bianpan2016@163.com
Signed-off-by: Pan Bian <bianpan2016@163.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Ernesto A. Fernandez <ernesto.mnd.fernandez@gmail.com>
Cc: Joe Perches <joe@perches.com>
Cc: Viacheslav Dubeyko <slava@dubeyko.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hfsplus/btree.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c
index 236efe51eca67..66774f4cb4fd5 100644
--- a/fs/hfsplus/btree.c
+++ b/fs/hfsplus/btree.c
@@ -466,14 +466,15 @@ void hfs_bmap_free(struct hfs_bnode *node)
 
 		nidx -= len * 8;
 		i = node->next;
-		hfs_bnode_put(node);
 		if (!i) {
 			/* panic */;
 			pr_crit("unable to free bnode %u. "
 					"bmap not found!\n",
 				node->this);
+			hfs_bnode_put(node);
 			return;
 		}
+		hfs_bnode_put(node);
 		node = hfs_bnode_find(tree, i);
 		if (IS_ERR(node))
 			return;
-- 
GitLab


From 5618cf031fecda63847cafd1091e7b8bd626cdb1 Mon Sep 17 00:00:00 2001
From: Luis Chamberlain <mcgrof@kernel.org>
Date: Fri, 30 Nov 2018 14:09:21 -0800
Subject: [PATCH 1649/1890] lib/test_kmod.c: fix rmmod double free

We free the misc device string twice on rmmod; fix this.  Without this
we cannot remove the module without crashing.

Link: http://lkml.kernel.org/r/20181124050500.5257-1-mcgrof@kernel.org
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Reported-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: <stable@vger.kernel.org>	[4.12+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/test_kmod.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lib/test_kmod.c b/lib/test_kmod.c
index e3ddd836491fa..d82d022111e0e 100644
--- a/lib/test_kmod.c
+++ b/lib/test_kmod.c
@@ -1214,7 +1214,6 @@ void unregister_test_dev_kmod(struct kmod_test_device *test_dev)
 
 	dev_info(test_dev->dev, "removing interface\n");
 	misc_deregister(&test_dev->misc_dev);
-	kfree(&test_dev->misc_dev.name);
 
 	mutex_unlock(&test_dev->config_mutex);
 	mutex_unlock(&test_dev->trigger_mutex);
-- 
GitLab


From 9e368259ad988356c4c95150fafd1a06af095d98 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Fri, 30 Nov 2018 14:09:25 -0800
Subject: [PATCH 1650/1890] userfaultfd: use ENOENT instead of EFAULT if the
 atomic copy user fails

Patch series "userfaultfd shmem updates".

Jann found two bugs in the userfaultfd shmem MAP_SHARED backend: the
lack of the VM_MAYWRITE check and the lack of i_size checks.

Then looking into the above we also fixed the MAP_PRIVATE case.

Hugh by source review also found a data loss source if UFFDIO_COPY is
used on shmem MAP_SHARED PROT_READ mappings (the production usages
incidentally run with PROT_READ|PROT_WRITE, so the data loss couldn't
happen in those production usages like with QEMU).

The whole patchset is marked for stable.

We verified QEMU postcopy live migration with guest running on shmem
MAP_PRIVATE run as well as before after the fix of shmem MAP_PRIVATE.
Regardless if it's shmem or hugetlbfs or MAP_PRIVATE or MAP_SHARED, QEMU
unconditionally invokes a punch hole if the guest mapping is filebacked
and a MADV_DONTNEED too (needed to get rid of the MAP_PRIVATE COWs and
for the anon backend).

This patch (of 5):

We internally used EFAULT to communicate with the caller, switch to
ENOENT, so EFAULT can be used as a non internal retval.

Link: http://lkml.kernel.org/r/20181126173452.26955-2-aarcange@redhat.com
Fixes: 4c27fe4c4c84 ("userfaultfd: shmem: add shmem_mcopy_atomic_pte for userfaultfd support")
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
Reviewed-by: Hugh Dickins <hughd@google.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Jann Horn <jannh@google.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: <stable@vger.kernel.org>
Cc: stable@vger.kernel.org
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/hugetlb.c     | 2 +-
 mm/shmem.c       | 2 +-
 mm/userfaultfd.c | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 7f2a28ab46d53..705a3e9cc910e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4080,7 +4080,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
 
 		/* fallback to copy_from_user outside mmap_sem */
 		if (unlikely(ret)) {
-			ret = -EFAULT;
+			ret = -ENOENT;
 			*pagep = page;
 			/* don't free the page */
 			goto out;
diff --git a/mm/shmem.c b/mm/shmem.c
index 42b70978e8148..6c54a6874e414 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2238,7 +2238,7 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
 				*pagep = page;
 				shmem_inode_unacct_blocks(inode, 1);
 				/* don't free the page */
-				return -EFAULT;
+				return -ENOENT;
 			}
 		} else {		/* mfill_zeropage_atomic */
 			clear_highpage(page);
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 5029f241908f4..46c8949e5f8f8 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -48,7 +48,7 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm,
 
 		/* fallback to copy_from_user outside mmap_sem */
 		if (unlikely(ret)) {
-			ret = -EFAULT;
+			ret = -ENOENT;
 			*pagep = page;
 			/* don't free the page */
 			goto out;
@@ -274,7 +274,7 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
 
 		cond_resched();
 
-		if (unlikely(err == -EFAULT)) {
+		if (unlikely(err == -ENOENT)) {
 			up_read(&dst_mm->mmap_sem);
 			BUG_ON(!page);
 
@@ -530,7 +530,7 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
 				       src_addr, &page, zeropage);
 		cond_resched();
 
-		if (unlikely(err == -EFAULT)) {
+		if (unlikely(err == -ENOENT)) {
 			void *page_kaddr;
 
 			up_read(&dst_mm->mmap_sem);
-- 
GitLab


From 5b51072e97d587186c2f5390c8c9c1fb7e179505 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Fri, 30 Nov 2018 14:09:28 -0800
Subject: [PATCH 1651/1890] userfaultfd: shmem: allocate anonymous memory for
 MAP_PRIVATE shmem

Userfaultfd did not create private memory when UFFDIO_COPY was invoked
on a MAP_PRIVATE shmem mapping.  Instead it wrote to the shmem file,
even when that had not been opened for writing.  Though, fortunately,
that could only happen where there was a hole in the file.

Fix the shmem-backed implementation of UFFDIO_COPY to create private
memory for MAP_PRIVATE mappings.  The hugetlbfs-backed implementation
was already correct.

This change is visible to userland, if userfaultfd has been used in
unintended ways: so it introduces a small risk of incompatibility, but
is necessary in order to respect file permissions.

An app that uses UFFDIO_COPY for anything like postcopy live migration
won't notice the difference, and in fact it'll run faster because there
will be no copy-on-write and memory waste in the tmpfs pagecache
anymore.

Userfaults on MAP_PRIVATE shmem keep triggering only on file holes like
before.

The real zeropage can also be built on a MAP_PRIVATE shmem mapping
through UFFDIO_ZEROPAGE and that's safe because the zeropage pte is
never dirty, in turn even an mprotect upgrading the vma permission from
PROT_READ to PROT_READ|PROT_WRITE won't make the zeropage pte writable.

Link: http://lkml.kernel.org/r/20181126173452.26955-3-aarcange@redhat.com
Fixes: 4c27fe4c4c84 ("userfaultfd: shmem: add shmem_mcopy_atomic_pte for userfaultfd support")
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Reported-by: Mike Rapoport <rppt@linux.ibm.com>
Reviewed-by: Hugh Dickins <hughd@google.com>
Cc: <stable@vger.kernel.org>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Jann Horn <jannh@google.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/userfaultfd.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 46c8949e5f8f8..471b6457f95f1 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -380,7 +380,17 @@ static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm,
 {
 	ssize_t err;
 
-	if (vma_is_anonymous(dst_vma)) {
+	/*
+	 * The normal page fault path for a shmem will invoke the
+	 * fault, fill the hole in the file and COW it right away. The
+	 * result generates plain anonymous memory. So when we are
+	 * asked to fill an hole in a MAP_PRIVATE shmem mapping, we'll
+	 * generate anonymous memory directly without actually filling
+	 * the hole. For the MAP_PRIVATE case the robustness check
+	 * only happens in the pagetable (to verify it's still none)
+	 * and not in the radix tree.
+	 */
+	if (!(dst_vma->vm_flags & VM_SHARED)) {
 		if (!zeropage)
 			err = mcopy_atomic_pte(dst_mm, dst_pmd, dst_vma,
 					       dst_addr, src_addr, page);
@@ -489,7 +499,8 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
 	 * dst_vma.
 	 */
 	err = -ENOMEM;
-	if (vma_is_anonymous(dst_vma) && unlikely(anon_vma_prepare(dst_vma)))
+	if (!(dst_vma->vm_flags & VM_SHARED) &&
+	    unlikely(anon_vma_prepare(dst_vma)))
 		goto out_unlock;
 
 	while (src_addr < src_start + len) {
-- 
GitLab


From 29ec90660d68bbdd69507c1c8b4e33aa299278b1 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Fri, 30 Nov 2018 14:09:32 -0800
Subject: [PATCH 1652/1890] userfaultfd: shmem/hugetlbfs: only allow to
 register VM_MAYWRITE vmas

After the VMA to register the uffd onto is found, check that it has
VM_MAYWRITE set before allowing registration.  This way we inherit all
common code checks before allowing to fill file holes in shmem and
hugetlbfs with UFFDIO_COPY.

The userfaultfd memory model is not applicable for readonly files unless
it's a MAP_PRIVATE.

Link: http://lkml.kernel.org/r/20181126173452.26955-4-aarcange@redhat.com
Fixes: ff62a3421044 ("hugetlb: implement memfd sealing")
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
Reviewed-by: Hugh Dickins <hughd@google.com>
Reported-by: Jann Horn <jannh@google.com>
Fixes: 4c27fe4c4c84 ("userfaultfd: shmem: add shmem_mcopy_atomic_pte for userfaultfd support")
Cc: <stable@vger.kernel.org>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/userfaultfd.c | 15 +++++++++++++++
 mm/userfaultfd.c | 15 ++++++---------
 2 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 356d2b8568c14..cd58939dc977e 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -1361,6 +1361,19 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
 		ret = -EINVAL;
 		if (!vma_can_userfault(cur))
 			goto out_unlock;
+
+		/*
+		 * UFFDIO_COPY will fill file holes even without
+		 * PROT_WRITE. This check enforces that if this is a
+		 * MAP_SHARED, the process has write permission to the backing
+		 * file. If VM_MAYWRITE is set it also enforces that on a
+		 * MAP_SHARED vma: there is no F_WRITE_SEAL and no further
+		 * F_WRITE_SEAL can be taken until the vma is destroyed.
+		 */
+		ret = -EPERM;
+		if (unlikely(!(cur->vm_flags & VM_MAYWRITE)))
+			goto out_unlock;
+
 		/*
 		 * If this vma contains ending address, and huge pages
 		 * check alignment.
@@ -1406,6 +1419,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
 		BUG_ON(!vma_can_userfault(vma));
 		BUG_ON(vma->vm_userfaultfd_ctx.ctx &&
 		       vma->vm_userfaultfd_ctx.ctx != ctx);
+		WARN_ON(!(vma->vm_flags & VM_MAYWRITE));
 
 		/*
 		 * Nothing to do: this vma is already registered into this
@@ -1552,6 +1566,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
 		cond_resched();
 
 		BUG_ON(!vma_can_userfault(vma));
+		WARN_ON(!(vma->vm_flags & VM_MAYWRITE));
 
 		/*
 		 * Nothing to do: this vma is already registered into this
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 471b6457f95f1..43cf314cfddd5 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -205,8 +205,9 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
 		if (!dst_vma || !is_vm_hugetlb_page(dst_vma))
 			goto out_unlock;
 		/*
-		 * Only allow __mcopy_atomic_hugetlb on userfaultfd
-		 * registered ranges.
+		 * Check the vma is registered in uffd, this is
+		 * required to enforce the VM_MAYWRITE check done at
+		 * uffd registration time.
 		 */
 		if (!dst_vma->vm_userfaultfd_ctx.ctx)
 			goto out_unlock;
@@ -459,13 +460,9 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
 	if (!dst_vma)
 		goto out_unlock;
 	/*
-	 * Be strict and only allow __mcopy_atomic on userfaultfd
-	 * registered ranges to prevent userland errors going
-	 * unnoticed. As far as the VM consistency is concerned, it
-	 * would be perfectly safe to remove this check, but there's
-	 * no useful usage for __mcopy_atomic ouside of userfaultfd
-	 * registered ranges. This is after all why these are ioctls
-	 * belonging to the userfaultfd and not syscalls.
+	 * Check the vma is registered in uffd, this is required to
+	 * enforce the VM_MAYWRITE check done at uffd registration
+	 * time.
 	 */
 	if (!dst_vma->vm_userfaultfd_ctx.ctx)
 		goto out_unlock;
-- 
GitLab


From e2a50c1f64145a04959df2442305d57307e5395a Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Fri, 30 Nov 2018 14:09:37 -0800
Subject: [PATCH 1653/1890] userfaultfd: shmem: add i_size checks

With MAP_SHARED: recheck the i_size after taking the PT lock, to
serialize against truncate with the PT lock.  Delete the page from the
pagecache if the i_size_read check fails.

With MAP_PRIVATE: check the i_size after the PT lock before mapping
anonymous memory or zeropages into the MAP_PRIVATE shmem mapping.

A mostly irrelevant cleanup: like we do the delete_from_page_cache()
pagecache removal after dropping the PT lock, the PT lock is a spinlock
so drop it before the sleepable page lock.

Link: http://lkml.kernel.org/r/20181126173452.26955-5-aarcange@redhat.com
Fixes: 4c27fe4c4c84 ("userfaultfd: shmem: add shmem_mcopy_atomic_pte for userfaultfd support")
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
Reviewed-by: Hugh Dickins <hughd@google.com>
Reported-by: Jann Horn <jannh@google.com>
Cc: <stable@vger.kernel.org>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/shmem.c       | 18 ++++++++++++++++--
 mm/userfaultfd.c | 26 ++++++++++++++++++++++++--
 2 files changed, 40 insertions(+), 4 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index 6c54a6874e414..99d5867daadb1 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2216,6 +2216,7 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
 	struct page *page;
 	pte_t _dst_pte, *dst_pte;
 	int ret;
+	pgoff_t offset, max_off;
 
 	ret = -ENOMEM;
 	if (!shmem_inode_acct_block(inode, 1))
@@ -2253,6 +2254,12 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
 	__SetPageSwapBacked(page);
 	__SetPageUptodate(page);
 
+	ret = -EFAULT;
+	offset = linear_page_index(dst_vma, dst_addr);
+	max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
+	if (unlikely(offset >= max_off))
+		goto out_release;
+
 	ret = mem_cgroup_try_charge_delay(page, dst_mm, gfp, &memcg, false);
 	if (ret)
 		goto out_release;
@@ -2268,8 +2275,14 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
 	if (dst_vma->vm_flags & VM_WRITE)
 		_dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte));
 
-	ret = -EEXIST;
 	dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
+
+	ret = -EFAULT;
+	max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
+	if (unlikely(offset >= max_off))
+		goto out_release_uncharge_unlock;
+
+	ret = -EEXIST;
 	if (!pte_none(*dst_pte))
 		goto out_release_uncharge_unlock;
 
@@ -2287,13 +2300,14 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
 
 	/* No need to invalidate - it was non-present before */
 	update_mmu_cache(dst_vma, dst_addr, dst_pte);
-	unlock_page(page);
 	pte_unmap_unlock(dst_pte, ptl);
+	unlock_page(page);
 	ret = 0;
 out:
 	return ret;
 out_release_uncharge_unlock:
 	pte_unmap_unlock(dst_pte, ptl);
+	delete_from_page_cache(page);
 out_release_uncharge:
 	mem_cgroup_cancel_charge(page, memcg, false);
 out_release:
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 43cf314cfddd5..458acda96f207 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -33,6 +33,8 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm,
 	void *page_kaddr;
 	int ret;
 	struct page *page;
+	pgoff_t offset, max_off;
+	struct inode *inode;
 
 	if (!*pagep) {
 		ret = -ENOMEM;
@@ -73,8 +75,17 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm,
 	if (dst_vma->vm_flags & VM_WRITE)
 		_dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte));
 
-	ret = -EEXIST;
 	dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
+	if (dst_vma->vm_file) {
+		/* the shmem MAP_PRIVATE case requires checking the i_size */
+		inode = dst_vma->vm_file->f_inode;
+		offset = linear_page_index(dst_vma, dst_addr);
+		max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
+		ret = -EFAULT;
+		if (unlikely(offset >= max_off))
+			goto out_release_uncharge_unlock;
+	}
+	ret = -EEXIST;
 	if (!pte_none(*dst_pte))
 		goto out_release_uncharge_unlock;
 
@@ -108,11 +119,22 @@ static int mfill_zeropage_pte(struct mm_struct *dst_mm,
 	pte_t _dst_pte, *dst_pte;
 	spinlock_t *ptl;
 	int ret;
+	pgoff_t offset, max_off;
+	struct inode *inode;
 
 	_dst_pte = pte_mkspecial(pfn_pte(my_zero_pfn(dst_addr),
 					 dst_vma->vm_page_prot));
-	ret = -EEXIST;
 	dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
+	if (dst_vma->vm_file) {
+		/* the shmem MAP_PRIVATE case requires checking the i_size */
+		inode = dst_vma->vm_file->f_inode;
+		offset = linear_page_index(dst_vma, dst_addr);
+		max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
+		ret = -EFAULT;
+		if (unlikely(offset >= max_off))
+			goto out_unlock;
+	}
+	ret = -EEXIST;
 	if (!pte_none(*dst_pte))
 		goto out_unlock;
 	set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
-- 
GitLab


From dcf7fe9d89763a28e0f43975b422ff141fe79e43 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Fri, 30 Nov 2018 14:09:43 -0800
Subject: [PATCH 1654/1890] userfaultfd: shmem: UFFDIO_COPY: set the page dirty
 if VM_WRITE is not set

Set the page dirty if VM_WRITE is not set because in such case the pte
won't be marked dirty and the page would be reclaimed without writepage
(i.e.  swapout in the shmem case).

This was found by source review.  Most apps (certainly including QEMU)
only use UFFDIO_COPY on PROT_READ|PROT_WRITE mappings or the app can't
modify the memory in the first place.  This is for correctness and it
could help the non cooperative use case to avoid unexpected data loss.

Link: http://lkml.kernel.org/r/20181126173452.26955-6-aarcange@redhat.com
Reviewed-by: Hugh Dickins <hughd@google.com>
Cc: stable@vger.kernel.org
Fixes: 4c27fe4c4c84 ("userfaultfd: shmem: add shmem_mcopy_atomic_pte for userfaultfd support")
Reported-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Jann Horn <jannh@google.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Mike Rapoport <rppt@linux.ibm.com>
Cc: Peter Xu <peterx@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/shmem.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/mm/shmem.c b/mm/shmem.c
index 99d5867daadb1..16a3d7044c52b 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2274,6 +2274,16 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
 	_dst_pte = mk_pte(page, dst_vma->vm_page_prot);
 	if (dst_vma->vm_flags & VM_WRITE)
 		_dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte));
+	else {
+		/*
+		 * We don't set the pte dirty if the vma has no
+		 * VM_WRITE permission, so mark the page dirty or it
+		 * could be freed from under us. We could do it
+		 * unconditionally before unlock_page(), but doing it
+		 * only if VM_WRITE is not set is faster.
+		 */
+		set_page_dirty(page);
+	}
 
 	dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
 
@@ -2307,6 +2317,7 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
 	return ret;
 out_release_uncharge_unlock:
 	pte_unmap_unlock(dst_pte, ptl);
+	ClearPageDirty(page);
 	delete_from_page_cache(page);
 out_release_uncharge:
 	mem_cgroup_cancel_charge(page, memcg, false);
-- 
GitLab


From 8de456cf87ba863e028c4dd01bae44255ce3d835 Mon Sep 17 00:00:00 2001
From: Qian Cai <cai@gmx.us>
Date: Fri, 30 Nov 2018 14:09:48 -0800
Subject: [PATCH 1655/1890] debugobjects: avoid recursive calls with kmemleak

CONFIG_DEBUG_OBJECTS_RCU_HEAD does not play well with kmemleak due to
recursive calls.

fill_pool
  kmemleak_ignore
    make_black_object
      put_object
        __call_rcu (kernel/rcu/tree.c)
          debug_rcu_head_queue
            debug_object_activate
              debug_object_init
                fill_pool
                  kmemleak_ignore
                    make_black_object
                      ...

So add SLAB_NOLEAKTRACE to kmem_cache_create() to not register newly
allocated debug objects at all.

Link: http://lkml.kernel.org/r/20181126165343.2339-1-cai@gmx.us
Signed-off-by: Qian Cai <cai@gmx.us>
Suggested-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Waiman Long <longman@redhat.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yang Shi <yang.shi@linux.alibaba.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/debugobjects.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index 70935ed911259..14afeeb7d6ef5 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -135,7 +135,6 @@ static void fill_pool(void)
 		if (!new)
 			return;
 
-		kmemleak_ignore(new);
 		raw_spin_lock_irqsave(&pool_lock, flags);
 		hlist_add_head(&new->node, &obj_pool);
 		debug_objects_allocated++;
@@ -1128,7 +1127,6 @@ static int __init debug_objects_replace_static_objects(void)
 		obj = kmem_cache_zalloc(obj_cache, GFP_KERNEL);
 		if (!obj)
 			goto free;
-		kmemleak_ignore(obj);
 		hlist_add_head(&obj->node, &objects);
 	}
 
@@ -1184,7 +1182,8 @@ void __init debug_objects_mem_init(void)
 
 	obj_cache = kmem_cache_create("debug_objects_cache",
 				      sizeof (struct debug_obj), 0,
-				      SLAB_DEBUG_OBJECTS, NULL);
+				      SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE,
+				      NULL);
 
 	if (!obj_cache || debug_objects_replace_static_objects()) {
 		debug_objects_enabled = 0;
-- 
GitLab


From dbd4af54745fc0c805217693c807a3928b2d408b Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 30 Nov 2018 14:09:53 -0800
Subject: [PATCH 1656/1890] proc: fixup map_files test on arm

https://bugs.linaro.org/show_bug.cgi?id=3782

Turns out arm doesn't permit mapping address 0, so try minimum virtual
address instead.

Link: http://lkml.kernel.org/r/20181113165446.GA28157@avx2
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Reported-by: Rafael David Tinoco <rafael.tinoco@linaro.org>
Tested-by: Rafael David Tinoco <rafael.tinoco@linaro.org>
Acked-by: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/proc/proc-self-map-files-002.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/proc/proc-self-map-files-002.c b/tools/testing/selftests/proc/proc-self-map-files-002.c
index 6f1f4a6e1ecb1..85744425b08d3 100644
--- a/tools/testing/selftests/proc/proc-self-map-files-002.c
+++ b/tools/testing/selftests/proc/proc-self-map-files-002.c
@@ -13,7 +13,7 @@
  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
-/* Test readlink /proc/self/map_files/... with address 0. */
+/* Test readlink /proc/self/map_files/... with minimum address. */
 #include <errno.h>
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -47,6 +47,11 @@ static void fail(const char *fmt, unsigned long a, unsigned long b)
 int main(void)
 {
 	const unsigned int PAGE_SIZE = sysconf(_SC_PAGESIZE);
+#ifdef __arm__
+	unsigned long va = 2 * PAGE_SIZE;
+#else
+	unsigned long va = 0;
+#endif
 	void *p;
 	int fd;
 	unsigned long a, b;
@@ -55,7 +60,7 @@ int main(void)
 	if (fd == -1)
 		return 1;
 
-	p = mmap(NULL, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_FILE|MAP_FIXED, fd, 0);
+	p = mmap((void *)va, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_FILE|MAP_FIXED, fd, 0);
 	if (p == MAP_FAILED) {
 		if (errno == EPERM)
 			return 2;
-- 
GitLab


From e0c274472d5d27f277af722e017525e0b33784cd Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Fri, 30 Nov 2018 14:09:58 -0800
Subject: [PATCH 1657/1890] psi: make disabling/enabling easier for vendor
 kernels

Mel Gorman reports a hackbench regression with psi that would prohibit
shipping the suse kernel with it default-enabled, but he'd still like
users to be able to opt in at little to no cost to others.

With the current combination of CONFIG_PSI and the psi_disabled bool set
from the commandline, this is a challenge.  Do the following things to
make it easier:

1. Add a config option CONFIG_PSI_DEFAULT_DISABLED that allows distros
   to enable CONFIG_PSI in their kernel but leave the feature disabled
   unless a user requests it at boot-time.

   To avoid double negatives, rename psi_disabled= to psi=.

2. Make psi_disabled a static branch to eliminate any branch costs
   when the feature is disabled.

In terms of numbers before and after this patch, Mel says:

: The following is a comparision using CONFIG_PSI=n as a baseline against
: your patch and a vanilla kernel
:
:                          4.20.0-rc4             4.20.0-rc4             4.20.0-rc4
:                 kconfigdisable-v1r1                vanilla        psidisable-v1r1
: Amean     1       1.3100 (   0.00%)      1.3923 (  -6.28%)      1.3427 (  -2.49%)
: Amean     3       3.8860 (   0.00%)      4.1230 *  -6.10%*      3.8860 (  -0.00%)
: Amean     5       6.8847 (   0.00%)      8.0390 * -16.77%*      6.7727 (   1.63%)
: Amean     7       9.9310 (   0.00%)     10.8367 *  -9.12%*      9.9910 (  -0.60%)
: Amean     12     16.6577 (   0.00%)     18.2363 *  -9.48%*     17.1083 (  -2.71%)
: Amean     18     26.5133 (   0.00%)     27.8833 *  -5.17%*     25.7663 (   2.82%)
: Amean     24     34.3003 (   0.00%)     34.6830 (  -1.12%)     32.0450 (   6.58%)
: Amean     30     40.0063 (   0.00%)     40.5800 (  -1.43%)     41.5087 (  -3.76%)
: Amean     32     40.1407 (   0.00%)     41.2273 (  -2.71%)     39.9417 (   0.50%)
:
: It's showing that the vanilla kernel takes a hit (as the bisection
: indicated it would) and that disabling PSI by default is reasonably
: close in terms of performance for this particular workload on this
: particular machine so;

Link: http://lkml.kernel.org/r/20181127165329.GA29728@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Tested-by: Mel Gorman <mgorman@techsingularity.net>
Reported-by: Mel Gorman <mgorman@techsingularity.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 .../admin-guide/kernel-parameters.txt         |  4 +++
 include/linux/psi.h                           |  3 +-
 init/Kconfig                                  |  9 ++++++
 kernel/sched/psi.c                            | 30 +++++++++++++------
 kernel/sched/stats.h                          |  8 ++---
 5 files changed, 40 insertions(+), 14 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 675170c360788..5d6ba930d4f46 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3505,6 +3505,10 @@
 			before loading.
 			See Documentation/blockdev/ramdisk.txt.
 
+	psi=		[KNL] Enable or disable pressure stall information
+			tracking.
+			Format: <bool>
+
 	psmouse.proto=	[HW,MOUSE] Highest PS2 mouse protocol extension to
 			probe for; one of (bare|imps|exps|lifebook|any).
 	psmouse.rate=	[HW,MOUSE] Set desired mouse report rate, in reports
diff --git a/include/linux/psi.h b/include/linux/psi.h
index 8e0725aac0aa8..7006008d5b72f 100644
--- a/include/linux/psi.h
+++ b/include/linux/psi.h
@@ -1,6 +1,7 @@
 #ifndef _LINUX_PSI_H
 #define _LINUX_PSI_H
 
+#include <linux/jump_label.h>
 #include <linux/psi_types.h>
 #include <linux/sched.h>
 
@@ -9,7 +10,7 @@ struct css_set;
 
 #ifdef CONFIG_PSI
 
-extern bool psi_disabled;
+extern struct static_key_false psi_disabled;
 
 void psi_init(void);
 
diff --git a/init/Kconfig b/init/Kconfig
index a4112e95724a0..cf5b5a0dcbc2f 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -509,6 +509,15 @@ config PSI
 
 	  Say N if unsure.
 
+config PSI_DEFAULT_DISABLED
+	bool "Require boot parameter to enable pressure stall information tracking"
+	default n
+	depends on PSI
+	help
+	  If set, pressure stall information tracking will be disabled
+	  per default but can be enabled through passing psi_enable=1
+	  on the kernel commandline during boot.
+
 endmenu # "CPU/Task time and stats accounting"
 
 config CPU_ISOLATION
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index 3d7355d7c3e38..fe24de3fbc938 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -136,8 +136,18 @@
 
 static int psi_bug __read_mostly;
 
-bool psi_disabled __read_mostly;
-core_param(psi_disabled, psi_disabled, bool, 0644);
+DEFINE_STATIC_KEY_FALSE(psi_disabled);
+
+#ifdef CONFIG_PSI_DEFAULT_DISABLED
+bool psi_enable;
+#else
+bool psi_enable = true;
+#endif
+static int __init setup_psi(char *str)
+{
+	return kstrtobool(str, &psi_enable) == 0;
+}
+__setup("psi=", setup_psi);
 
 /* Running averages - we need to be higher-res than loadavg */
 #define PSI_FREQ	(2*HZ+1)	/* 2 sec intervals */
@@ -169,8 +179,10 @@ static void group_init(struct psi_group *group)
 
 void __init psi_init(void)
 {
-	if (psi_disabled)
+	if (!psi_enable) {
+		static_branch_enable(&psi_disabled);
 		return;
+	}
 
 	psi_period = jiffies_to_nsecs(PSI_FREQ);
 	group_init(&psi_system);
@@ -549,7 +561,7 @@ void psi_memstall_enter(unsigned long *flags)
 	struct rq_flags rf;
 	struct rq *rq;
 
-	if (psi_disabled)
+	if (static_branch_likely(&psi_disabled))
 		return;
 
 	*flags = current->flags & PF_MEMSTALL;
@@ -579,7 +591,7 @@ void psi_memstall_leave(unsigned long *flags)
 	struct rq_flags rf;
 	struct rq *rq;
 
-	if (psi_disabled)
+	if (static_branch_likely(&psi_disabled))
 		return;
 
 	if (*flags)
@@ -600,7 +612,7 @@ void psi_memstall_leave(unsigned long *flags)
 #ifdef CONFIG_CGROUPS
 int psi_cgroup_alloc(struct cgroup *cgroup)
 {
-	if (psi_disabled)
+	if (static_branch_likely(&psi_disabled))
 		return 0;
 
 	cgroup->psi.pcpu = alloc_percpu(struct psi_group_cpu);
@@ -612,7 +624,7 @@ int psi_cgroup_alloc(struct cgroup *cgroup)
 
 void psi_cgroup_free(struct cgroup *cgroup)
 {
-	if (psi_disabled)
+	if (static_branch_likely(&psi_disabled))
 		return;
 
 	cancel_delayed_work_sync(&cgroup->psi.clock_work);
@@ -637,7 +649,7 @@ void cgroup_move_task(struct task_struct *task, struct css_set *to)
 	struct rq_flags rf;
 	struct rq *rq;
 
-	if (psi_disabled) {
+	if (static_branch_likely(&psi_disabled)) {
 		/*
 		 * Lame to do this here, but the scheduler cannot be locked
 		 * from the outside, so we move cgroups from inside sched/.
@@ -673,7 +685,7 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res)
 {
 	int full;
 
-	if (psi_disabled)
+	if (static_branch_likely(&psi_disabled))
 		return -EOPNOTSUPP;
 
 	update_stats(group);
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index 4904c46770007..aa0de240fb419 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -66,7 +66,7 @@ static inline void psi_enqueue(struct task_struct *p, bool wakeup)
 {
 	int clear = 0, set = TSK_RUNNING;
 
-	if (psi_disabled)
+	if (static_branch_likely(&psi_disabled))
 		return;
 
 	if (!wakeup || p->sched_psi_wake_requeue) {
@@ -86,7 +86,7 @@ static inline void psi_dequeue(struct task_struct *p, bool sleep)
 {
 	int clear = TSK_RUNNING, set = 0;
 
-	if (psi_disabled)
+	if (static_branch_likely(&psi_disabled))
 		return;
 
 	if (!sleep) {
@@ -102,7 +102,7 @@ static inline void psi_dequeue(struct task_struct *p, bool sleep)
 
 static inline void psi_ttwu_dequeue(struct task_struct *p)
 {
-	if (psi_disabled)
+	if (static_branch_likely(&psi_disabled))
 		return;
 	/*
 	 * Is the task being migrated during a wakeup? Make sure to
@@ -128,7 +128,7 @@ static inline void psi_ttwu_dequeue(struct task_struct *p)
 
 static inline void psi_task_tick(struct rq *rq)
 {
-	if (psi_disabled)
+	if (static_branch_likely(&psi_disabled))
 		return;
 
 	if (unlikely(rq->curr->flags & PF_MEMSTALL))
-- 
GitLab


From 903e8ff86753e6f327bb92166a0665e4ecb8e2e7 Mon Sep 17 00:00:00 2001
From: Anders Roxell <anders.roxell@linaro.org>
Date: Fri, 30 Nov 2018 14:10:05 -0800
Subject: [PATCH 1658/1890] kernel/kcov.c: mark funcs in
 __sanitizer_cov_trace_pc() as notrace

Since __sanitizer_cov_trace_pc() is marked as notrace, function calls in
__sanitizer_cov_trace_pc() shouldn't be traced either.
ftrace_graph_caller() gets called for each function that isn't marked
'notrace', like canonicalize_ip().  This is the call trace from a run:

[  139.644550]  ftrace_graph_caller+0x1c/0x24
[  139.648352]  canonicalize_ip+0x18/0x28
[  139.652313]  __sanitizer_cov_trace_pc+0x14/0x58
[  139.656184]  sched_clock+0x34/0x1e8
[  139.659759]  trace_clock_local+0x40/0x88
[  139.663722]  ftrace_push_return_trace+0x8c/0x1f0
[  139.667767]  prepare_ftrace_return+0xa8/0x100
[  139.671709]  ftrace_graph_caller+0x1c/0x24

Rework so that check_kcov_mode() and canonicalize_ip() that are called
from __sanitizer_cov_trace_pc() are also marked as notrace.

Link: http://lkml.kernel.org/r/20181128081239.18317-1-anders.roxell@linaro.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signen-off-by: Anders Roxell <anders.roxell@linaro.org>
Co-developed-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/kcov.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/kcov.c b/kernel/kcov.c
index 3ebd09efe72a6..97959d7b77e2a 100644
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -56,7 +56,7 @@ struct kcov {
 	struct task_struct	*t;
 };
 
-static bool check_kcov_mode(enum kcov_mode needed_mode, struct task_struct *t)
+static notrace bool check_kcov_mode(enum kcov_mode needed_mode, struct task_struct *t)
 {
 	unsigned int mode;
 
@@ -78,7 +78,7 @@ static bool check_kcov_mode(enum kcov_mode needed_mode, struct task_struct *t)
 	return mode == needed_mode;
 }
 
-static unsigned long canonicalize_ip(unsigned long ip)
+static notrace unsigned long canonicalize_ip(unsigned long ip)
 {
 #ifdef CONFIG_RANDOMIZE_BASE
 	ip -= kaslr_offset();
-- 
GitLab


From 7c0950d455d6ab610d2990a13120f935b75abf2c Mon Sep 17 00:00:00 2001
From: Li Zhijian <lizhijian@cn.fujitsu.com>
Date: Fri, 30 Nov 2018 14:10:09 -0800
Subject: [PATCH 1659/1890] initramfs: clean old path before creating a
 hardlink

sys_link() can fail due to the new path already existing.  This case
ofen occurs when we use a concated initrd, for example:

1) prepare a basic rootfs, it contains a regular files rc.local
lizhijian@:~/yocto-tiny-i386-2016-04-22$ cat etc/rc.local
 #!/bin/sh
 echo "Running /etc/rc.local..."
yocto-tiny-i386-2016-04-22$ find . | sed 's,^\./,,' | cpio -o -H newc | gzip -n -9 >../rootfs.cgz

2) create a extra initrd which also includes a etc/rc.local
lizhijian@:~/lkp-x86_64/etc$ echo "append initrd" >rc.local
lizhijian@:~/lkp/lkp-x86_64/etc$ cat rc.local
append initrd
lizhijian@:~/lkp/lkp-x86_64/etc$ ln rc.local rc.local.hardlink
append initrd
lizhijian@:~/lkp/lkp-x86_64/etc$ stat rc.local rc.local.hardlink
  File: 'rc.local'
  Size: 14        	Blocks: 8          IO Block: 4096   regular file
Device: 801h/2049d	Inode: 11296086    Links: 2
Access: (0664/-rw-rw-r--)  Uid: ( 1002/lizhijian)   Gid: ( 1002/lizhijian)
Access: 2018-11-15 16:08:28.654464815 +0800
Modify: 2018-11-15 16:07:57.514903210 +0800
Change: 2018-11-15 16:08:24.180228872 +0800
 Birth: -
  File: 'rc.local.hardlink'
  Size: 14        	Blocks: 8          IO Block: 4096   regular file
Device: 801h/2049d	Inode: 11296086    Links: 2
Access: (0664/-rw-rw-r--)  Uid: ( 1002/lizhijian)   Gid: ( 1002/lizhijian)
Access: 2018-11-15 16:08:28.654464815 +0800
Modify: 2018-11-15 16:07:57.514903210 +0800
Change: 2018-11-15 16:08:24.180228872 +0800
 Birth: -

lizhijian@:~/lkp/lkp-x86_64$ find . | sed 's,^\./,,' | cpio -o -H newc | gzip -n -9 >../rc-local.cgz
lizhijian@:~/lkp/lkp-x86_64$ gzip -dc ../rc-local.cgz | cpio -t
.
etc
etc/rc.local.hardlink <<< it will be extracted first at this initrd
etc/rc.local

3) concate 2 initrds and boot
lizhijian@:~/lkp$ cat rootfs.cgz rc-local.cgz >concate-initrd.cgz
lizhijian@:~/lkp$ qemu-system-x86_64 -nographic -enable-kvm -cpu host -smp 1 -m 1024 -kernel ~/lkp/linux/arch/x86/boot/bzImage -append "console=ttyS0 earlyprint=ttyS0 ignore_loglevel" -initrd ./concate-initr.cgz -serial stdio -nodefaults

In this case, sys_link(2) will fail and return -EEXIST, so we can only get
the rc.local at rootfs.cgz instead of rc-local.cgz

[akpm@linux-foundation.org: move code to avoid forward declaration]
Link: http://lkml.kernel.org/r/1542352368-13299-1-git-send-email-lizhijian@cn.fujitsu.com
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
Cc: Philip Li <philip.li@intel.com>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Li Zhijian <zhijianx.li@intel.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 init/initramfs.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/init/initramfs.c b/init/initramfs.c
index 640557788026d..f6f4a1e4cd547 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -291,16 +291,6 @@ static int __init do_reset(void)
 	return 1;
 }
 
-static int __init maybe_link(void)
-{
-	if (nlink >= 2) {
-		char *old = find_link(major, minor, ino, mode, collected);
-		if (old)
-			return (ksys_link(old, collected) < 0) ? -1 : 1;
-	}
-	return 0;
-}
-
 static void __init clean_path(char *path, umode_t fmode)
 {
 	struct kstat st;
@@ -313,6 +303,18 @@ static void __init clean_path(char *path, umode_t fmode)
 	}
 }
 
+static int __init maybe_link(void)
+{
+	if (nlink >= 2) {
+		char *old = find_link(major, minor, ino, mode, collected);
+		if (old) {
+			clean_path(collected, 0);
+			return (ksys_link(old, collected) < 0) ? -1 : 1;
+		}
+	}
+	return 0;
+}
+
 static __initdata int wfd;
 
 static int __init do_name(void)
-- 
GitLab


From 906f9cdfc2a0800f13683f9e4ebdfd08c12ee81b Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Fri, 30 Nov 2018 14:10:13 -0800
Subject: [PATCH 1660/1890] mm/huge_memory: rename freeze_page() to
 unmap_page()

The term "freeze" is used in several ways in the kernel, and in mm it
has the particular meaning of forcing page refcount temporarily to 0.
freeze_page() is just too confusing a name for a function that unmaps a
page: rename it unmap_page(), and rename unfreeze_page() remap_page().

Went to change the mention of freeze_page() added later in mm/rmap.c,
but found it to be incorrect: ordinary page reclaim reaches there too;
but the substance of the comment still seems correct, so edit it down.

Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261514080.2275@eggly.anvils
Fixes: e9b61f19858a5 ("thp: reintroduce split_huge_page()")
Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: <stable@vger.kernel.org>	[4.8+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/huge_memory.c | 12 ++++++------
 mm/rmap.c        | 13 +++----------
 2 files changed, 9 insertions(+), 16 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 55478ab3c83be..30100fac23419 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2350,7 +2350,7 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
 	}
 }
 
-static void freeze_page(struct page *page)
+static void unmap_page(struct page *page)
 {
 	enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS |
 		TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD;
@@ -2365,7 +2365,7 @@ static void freeze_page(struct page *page)
 	VM_BUG_ON_PAGE(!unmap_success, page);
 }
 
-static void unfreeze_page(struct page *page)
+static void remap_page(struct page *page)
 {
 	int i;
 	if (PageTransHuge(page)) {
@@ -2483,7 +2483,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
 
 	spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
 
-	unfreeze_page(head);
+	remap_page(head);
 
 	for (i = 0; i < HPAGE_PMD_NR; i++) {
 		struct page *subpage = head + i;
@@ -2664,7 +2664,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 	}
 
 	/*
-	 * Racy check if we can split the page, before freeze_page() will
+	 * Racy check if we can split the page, before unmap_page() will
 	 * split PMDs
 	 */
 	if (!can_split_huge_page(head, &extra_pins)) {
@@ -2673,7 +2673,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 	}
 
 	mlocked = PageMlocked(page);
-	freeze_page(head);
+	unmap_page(head);
 	VM_BUG_ON_PAGE(compound_mapcount(head), head);
 
 	/* Make sure the page is not on per-CPU pagevec as it takes pin */
@@ -2727,7 +2727,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 fail:		if (mapping)
 			xa_unlock(&mapping->i_pages);
 		spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
-		unfreeze_page(head);
+		remap_page(head);
 		ret = -EBUSY;
 	}
 
diff --git a/mm/rmap.c b/mm/rmap.c
index 1e79fac3186b6..85b7f94233526 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1627,16 +1627,9 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 						      address + PAGE_SIZE);
 		} else {
 			/*
-			 * We should not need to notify here as we reach this
-			 * case only from freeze_page() itself only call from
-			 * split_huge_page_to_list() so everything below must
-			 * be true:
-			 *   - page is not anonymous
-			 *   - page is locked
-			 *
-			 * So as it is a locked file back page thus it can not
-			 * be remove from the page cache and replace by a new
-			 * page before mmu_notifier_invalidate_range_end so no
+			 * This is a locked file-backed page, thus it cannot
+			 * be removed from the page cache and replaced by a new
+			 * page before mmu_notifier_invalidate_range_end, so no
 			 * concurrent thread might update its page table to
 			 * point at new page while a device still is using this
 			 * page.
-- 
GitLab


From 173d9d9fd3ddae84c110fea8aedf1f26af6be9ec Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Fri, 30 Nov 2018 14:10:16 -0800
Subject: [PATCH 1661/1890] mm/huge_memory: splitting set mapping+index before
 unfreeze

Huge tmpfs stress testing has occasionally hit shmem_undo_range()'s
VM_BUG_ON_PAGE(page_to_pgoff(page) != index, page).

Move the setting of mapping and index up before the page_ref_unfreeze()
in __split_huge_page_tail() to fix this: so that a page cache lookup
cannot get a reference while the tail's mapping and index are unstable.

In fact, might as well move them up before the smp_wmb(): I don't see an
actual need for that, but if I'm missing something, this way round is
safer than the other, and no less efficient.

You might argue that VM_BUG_ON_PAGE(page_to_pgoff(page) != index, page) is
misplaced, and should be left until after the trylock_page(); but left as
is has not crashed since, and gives more stringent assurance.

Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261516380.2275@eggly.anvils
Fixes: e9b61f19858a5 ("thp: reintroduce split_huge_page()")
Requires: 605ca5ede764 ("mm/huge_memory.c: reorder operations in __split_huge_page_tail()")
Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: <stable@vger.kernel.org>	[4.8+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/huge_memory.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 30100fac23419..cef2c256e7c47 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2402,6 +2402,12 @@ static void __split_huge_page_tail(struct page *head, int tail,
 			 (1L << PG_unevictable) |
 			 (1L << PG_dirty)));
 
+	/* ->mapping in first tail page is compound_mapcount */
+	VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING,
+			page_tail);
+	page_tail->mapping = head->mapping;
+	page_tail->index = head->index + tail;
+
 	/* Page flags must be visible before we make the page non-compound. */
 	smp_wmb();
 
@@ -2422,12 +2428,6 @@ static void __split_huge_page_tail(struct page *head, int tail,
 	if (page_is_idle(head))
 		set_page_idle(page_tail);
 
-	/* ->mapping in first tail page is compound_mapcount */
-	VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING,
-			page_tail);
-	page_tail->mapping = head->mapping;
-
-	page_tail->index = head->index + tail;
 	page_cpupid_xchg_last(page_tail, page_cpupid_last(head));
 
 	/*
-- 
GitLab


From 006d3ff27e884f80bd7d306b041afc415f63598f Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Fri, 30 Nov 2018 14:10:21 -0800
Subject: [PATCH 1662/1890] mm/huge_memory: fix lockdep complaint on 32-bit
 i_size_read()

Huge tmpfs testing, on 32-bit kernel with lockdep enabled, showed that
__split_huge_page() was using i_size_read() while holding the irq-safe
lru_lock and page tree lock, but the 32-bit i_size_read() uses an
irq-unsafe seqlock which should not be nested inside them.

Instead, read the i_size earlier in split_huge_page_to_list(), and pass
the end offset down to __split_huge_page(): all while holding head page
lock, which is enough to prevent truncation of that extent before the
page tree lock has been taken.

Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261520070.2275@eggly.anvils
Fixes: baa355fd33142 ("thp: file pages support for split_huge_page()")
Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: <stable@vger.kernel.org>	[4.8+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/huge_memory.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index cef2c256e7c47..622cced74fd90 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2439,12 +2439,11 @@ static void __split_huge_page_tail(struct page *head, int tail,
 }
 
 static void __split_huge_page(struct page *page, struct list_head *list,
-		unsigned long flags)
+		pgoff_t end, unsigned long flags)
 {
 	struct page *head = compound_head(page);
 	struct zone *zone = page_zone(head);
 	struct lruvec *lruvec;
-	pgoff_t end = -1;
 	int i;
 
 	lruvec = mem_cgroup_page_lruvec(head, zone->zone_pgdat);
@@ -2452,9 +2451,6 @@ static void __split_huge_page(struct page *page, struct list_head *list,
 	/* complete memcg works before add pages to LRU */
 	mem_cgroup_split_huge_fixup(head);
 
-	if (!PageAnon(page))
-		end = DIV_ROUND_UP(i_size_read(head->mapping->host), PAGE_SIZE);
-
 	for (i = HPAGE_PMD_NR - 1; i >= 1; i--) {
 		__split_huge_page_tail(head, i, lruvec, list);
 		/* Some pages can be beyond i_size: drop them from page cache */
@@ -2626,6 +2622,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 	int count, mapcount, extra_pins, ret;
 	bool mlocked;
 	unsigned long flags;
+	pgoff_t end;
 
 	VM_BUG_ON_PAGE(is_huge_zero_page(page), page);
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
@@ -2648,6 +2645,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 			ret = -EBUSY;
 			goto out;
 		}
+		end = -1;
 		mapping = NULL;
 		anon_vma_lock_write(anon_vma);
 	} else {
@@ -2661,6 +2659,15 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 
 		anon_vma = NULL;
 		i_mmap_lock_read(mapping);
+
+		/*
+		 *__split_huge_page() may need to trim off pages beyond EOF:
+		 * but on 32-bit, i_size_read() takes an irq-unsafe seqlock,
+		 * which cannot be nested inside the page tree lock. So note
+		 * end now: i_size itself may be changed at any moment, but
+		 * head page lock is good enough to serialize the trimming.
+		 */
+		end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
 	}
 
 	/*
@@ -2707,7 +2714,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 		if (mapping)
 			__dec_node_page_state(page, NR_SHMEM_THPS);
 		spin_unlock(&pgdata->split_queue_lock);
-		__split_huge_page(page, list, flags);
+		__split_huge_page(page, list, end, flags);
 		if (PageSwapCache(head)) {
 			swp_entry_t entry = { .val = page_private(head) };
 
-- 
GitLab


From 701270fa193aadf00bdcf607738f64997275d4c7 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Fri, 30 Nov 2018 14:10:25 -0800
Subject: [PATCH 1663/1890] mm/khugepaged: collapse_shmem() stop if punched or
 truncated

Huge tmpfs testing showed that although collapse_shmem() recognizes a
concurrently truncated or hole-punched page correctly, its handling of
holes was liable to refill an emptied extent.  Add check to stop that.

Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261522040.2275@eggly.anvils
Fixes: f3f0e1d2150b2 ("khugepaged: add support of collapse for tmpfs/shmem pages")
Signed-off-by: Hugh Dickins <hughd@google.com>
Reviewed-by: Matthew Wilcox <willy@infradead.org>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Cc: <stable@vger.kernel.org>	[4.8+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/khugepaged.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index c13625c1ad5e5..2070c316f06ef 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1359,6 +1359,17 @@ static void collapse_shmem(struct mm_struct *mm,
 
 		VM_BUG_ON(index != xas.xa_index);
 		if (!page) {
+			/*
+			 * Stop if extent has been truncated or hole-punched,
+			 * and is now completely empty.
+			 */
+			if (index == start) {
+				if (!xas_next_entry(&xas, end - 1)) {
+					result = SCAN_TRUNCATED;
+					break;
+				}
+				xas_set(&xas, index);
+			}
 			if (!shmem_charge(mapping->host, 1)) {
 				result = SCAN_FAIL;
 				break;
-- 
GitLab


From aaa52e340073b7f4593b3c4ddafcafa70cf838b5 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Fri, 30 Nov 2018 14:10:29 -0800
Subject: [PATCH 1664/1890] mm/khugepaged: fix crashes due to misaccounted
 holes

Huge tmpfs testing on a shortish file mapped into a pmd-rounded extent
hit shmem_evict_inode()'s WARN_ON(inode->i_blocks) followed by
clear_inode()'s BUG_ON(inode->i_data.nrpages) when the file was later
closed and unlinked.

khugepaged's collapse_shmem() was forgetting to update mapping->nrpages
on the rollback path, after it had added but then needs to undo some
holes.

There is indeed an irritating asymmetry between shmem_charge(), whose
callers want it to increment nrpages after successfully accounting
blocks, and shmem_uncharge(), when __delete_from_page_cache() already
decremented nrpages itself: oh well, just add a comment on that to them
both.

And shmem_recalc_inode() is supposed to be called when the accounting is
expected to be in balance (so it can deduce from imbalance that reclaim
discarded some pages): so change shmem_charge() to update nrpages
earlier (though it's rare for the difference to matter at all).

Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261523450.2275@eggly.anvils
Fixes: 800d8c63b2e98 ("shmem: add huge pages support")
Fixes: f3f0e1d2150b2 ("khugepaged: add support of collapse for tmpfs/shmem pages")
Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: <stable@vger.kernel.org>	[4.8+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/khugepaged.c | 5 ++++-
 mm/shmem.c      | 6 +++++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 2070c316f06ef..65e82f665c7c2 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1506,9 +1506,12 @@ static void collapse_shmem(struct mm_struct *mm,
 		khugepaged_pages_collapsed++;
 	} else {
 		struct page *page;
+
 		/* Something went wrong: roll back page cache changes */
-		shmem_uncharge(mapping->host, nr_none);
 		xas_lock_irq(&xas);
+		mapping->nrpages -= nr_none;
+		shmem_uncharge(mapping->host, nr_none);
+
 		xas_set(&xas, start);
 		xas_for_each(&xas, page, end - 1) {
 			page = list_first_entry_or_null(&pagelist,
diff --git a/mm/shmem.c b/mm/shmem.c
index 16a3d7044c52b..cddc72ac44d86 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -297,12 +297,14 @@ bool shmem_charge(struct inode *inode, long pages)
 	if (!shmem_inode_acct_block(inode, pages))
 		return false;
 
+	/* nrpages adjustment first, then shmem_recalc_inode() when balanced */
+	inode->i_mapping->nrpages += pages;
+
 	spin_lock_irqsave(&info->lock, flags);
 	info->alloced += pages;
 	inode->i_blocks += pages * BLOCKS_PER_PAGE;
 	shmem_recalc_inode(inode);
 	spin_unlock_irqrestore(&info->lock, flags);
-	inode->i_mapping->nrpages += pages;
 
 	return true;
 }
@@ -312,6 +314,8 @@ void shmem_uncharge(struct inode *inode, long pages)
 	struct shmem_inode_info *info = SHMEM_I(inode);
 	unsigned long flags;
 
+	/* nrpages adjustment done by __delete_from_page_cache() or caller */
+
 	spin_lock_irqsave(&info->lock, flags);
 	info->alloced -= pages;
 	inode->i_blocks -= pages * BLOCKS_PER_PAGE;
-- 
GitLab


From 2af8ff291848cc4b1cce24b6c943394eb2c761e8 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Fri, 30 Nov 2018 14:10:35 -0800
Subject: [PATCH 1665/1890] mm/khugepaged: collapse_shmem() remember to clear
 holes

Huge tmpfs testing reminds us that there is no __GFP_ZERO in the gfp
flags khugepaged uses to allocate a huge page - in all common cases it
would just be a waste of effort - so collapse_shmem() must remember to
clear out any holes that it instantiates.

The obvious place to do so, where they are put into the page cache tree,
is not a good choice: because interrupts are disabled there.  Leave it
until further down, once success is assured, where the other pages are
copied (before setting PageUptodate).

Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261525080.2275@eggly.anvils
Fixes: f3f0e1d2150b2 ("khugepaged: add support of collapse for tmpfs/shmem pages")
Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: <stable@vger.kernel.org>	[4.8+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/khugepaged.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 65e82f665c7c2..1c402d33547e4 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1467,7 +1467,12 @@ static void collapse_shmem(struct mm_struct *mm,
 		 * Replacing old pages with new one has succeeded, now we
 		 * need to copy the content and free the old pages.
 		 */
+		index = start;
 		list_for_each_entry_safe(page, tmp, &pagelist, lru) {
+			while (index < page->index) {
+				clear_highpage(new_page + (index % HPAGE_PMD_NR));
+				index++;
+			}
 			copy_highpage(new_page + (page->index % HPAGE_PMD_NR),
 					page);
 			list_del(&page->lru);
@@ -1477,6 +1482,11 @@ static void collapse_shmem(struct mm_struct *mm,
 			ClearPageActive(page);
 			ClearPageUnevictable(page);
 			put_page(page);
+			index++;
+		}
+		while (index < end) {
+			clear_highpage(new_page + (index % HPAGE_PMD_NR));
+			index++;
 		}
 
 		local_irq_disable();
-- 
GitLab


From 042a30824871fa3149b0127009074b75cc25863c Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Fri, 30 Nov 2018 14:10:39 -0800
Subject: [PATCH 1666/1890] mm/khugepaged: minor reorderings in
 collapse_shmem()

Several cleanups in collapse_shmem(): most of which probably do not
really matter, beyond doing things in a more familiar and reassuring
order.  Simplify the failure gotos in the main loop, and on success
update stats while interrupts still disabled from the last iteration.

Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261526400.2275@eggly.anvils
Fixes: f3f0e1d2150b2 ("khugepaged: add support of collapse for tmpfs/shmem pages")
Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: <stable@vger.kernel.org>	[4.8+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/khugepaged.c | 72 ++++++++++++++++++++++---------------------------
 1 file changed, 32 insertions(+), 40 deletions(-)

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 1c402d33547e4..9d4e9ff1af95e 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1329,10 +1329,10 @@ static void collapse_shmem(struct mm_struct *mm,
 		goto out;
 	}
 
+	__SetPageLocked(new_page);
+	__SetPageSwapBacked(new_page);
 	new_page->index = start;
 	new_page->mapping = mapping;
-	__SetPageSwapBacked(new_page);
-	__SetPageLocked(new_page);
 	BUG_ON(!page_ref_freeze(new_page, 1));
 
 	/*
@@ -1366,13 +1366,13 @@ static void collapse_shmem(struct mm_struct *mm,
 			if (index == start) {
 				if (!xas_next_entry(&xas, end - 1)) {
 					result = SCAN_TRUNCATED;
-					break;
+					goto xa_locked;
 				}
 				xas_set(&xas, index);
 			}
 			if (!shmem_charge(mapping->host, 1)) {
 				result = SCAN_FAIL;
-				break;
+				goto xa_locked;
 			}
 			xas_store(&xas, new_page + (index % HPAGE_PMD_NR));
 			nr_none++;
@@ -1387,13 +1387,12 @@ static void collapse_shmem(struct mm_struct *mm,
 				result = SCAN_FAIL;
 				goto xa_unlocked;
 			}
-			xas_lock_irq(&xas);
-			xas_set(&xas, index);
 		} else if (trylock_page(page)) {
 			get_page(page);
+			xas_unlock_irq(&xas);
 		} else {
 			result = SCAN_PAGE_LOCK;
-			break;
+			goto xa_locked;
 		}
 
 		/*
@@ -1408,11 +1407,10 @@ static void collapse_shmem(struct mm_struct *mm,
 			result = SCAN_TRUNCATED;
 			goto out_unlock;
 		}
-		xas_unlock_irq(&xas);
 
 		if (isolate_lru_page(page)) {
 			result = SCAN_DEL_PAGE_LRU;
-			goto out_isolate_failed;
+			goto out_unlock;
 		}
 
 		if (page_mapped(page))
@@ -1432,7 +1430,9 @@ static void collapse_shmem(struct mm_struct *mm,
 		 */
 		if (!page_ref_freeze(page, 3)) {
 			result = SCAN_PAGE_COUNT;
-			goto out_lru;
+			xas_unlock_irq(&xas);
+			putback_lru_page(page);
+			goto out_unlock;
 		}
 
 		/*
@@ -1444,24 +1444,26 @@ static void collapse_shmem(struct mm_struct *mm,
 		/* Finally, replace with the new page. */
 		xas_store(&xas, new_page + (index % HPAGE_PMD_NR));
 		continue;
-out_lru:
-		xas_unlock_irq(&xas);
-		putback_lru_page(page);
-out_isolate_failed:
-		unlock_page(page);
-		put_page(page);
-		goto xa_unlocked;
 out_unlock:
 		unlock_page(page);
 		put_page(page);
-		break;
+		goto xa_unlocked;
 	}
-	xas_unlock_irq(&xas);
 
+	__inc_node_page_state(new_page, NR_SHMEM_THPS);
+	if (nr_none) {
+		struct zone *zone = page_zone(new_page);
+
+		__mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none);
+		__mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none);
+	}
+
+xa_locked:
+	xas_unlock_irq(&xas);
 xa_unlocked:
+
 	if (result == SCAN_SUCCEED) {
 		struct page *page, *tmp;
-		struct zone *zone = page_zone(new_page);
 
 		/*
 		 * Replacing old pages with new one has succeeded, now we
@@ -1476,11 +1478,11 @@ static void collapse_shmem(struct mm_struct *mm,
 			copy_highpage(new_page + (page->index % HPAGE_PMD_NR),
 					page);
 			list_del(&page->lru);
-			unlock_page(page);
-			page_ref_unfreeze(page, 1);
 			page->mapping = NULL;
+			page_ref_unfreeze(page, 1);
 			ClearPageActive(page);
 			ClearPageUnevictable(page);
+			unlock_page(page);
 			put_page(page);
 			index++;
 		}
@@ -1489,28 +1491,17 @@ static void collapse_shmem(struct mm_struct *mm,
 			index++;
 		}
 
-		local_irq_disable();
-		__inc_node_page_state(new_page, NR_SHMEM_THPS);
-		if (nr_none) {
-			__mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none);
-			__mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none);
-		}
-		local_irq_enable();
-
-		/*
-		 * Remove pte page tables, so we can re-fault
-		 * the page as huge.
-		 */
-		retract_page_tables(mapping, start);
-
 		/* Everything is ready, let's unfreeze the new_page */
-		set_page_dirty(new_page);
 		SetPageUptodate(new_page);
 		page_ref_unfreeze(new_page, HPAGE_PMD_NR);
+		set_page_dirty(new_page);
 		mem_cgroup_commit_charge(new_page, memcg, false, true);
 		lru_cache_add_anon(new_page);
-		unlock_page(new_page);
 
+		/*
+		 * Remove pte page tables, so we can re-fault the page as huge.
+		 */
+		retract_page_tables(mapping, start);
 		*hpage = NULL;
 
 		khugepaged_pages_collapsed++;
@@ -1543,8 +1534,8 @@ static void collapse_shmem(struct mm_struct *mm,
 			xas_store(&xas, page);
 			xas_pause(&xas);
 			xas_unlock_irq(&xas);
-			putback_lru_page(page);
 			unlock_page(page);
+			putback_lru_page(page);
 			xas_lock_irq(&xas);
 		}
 		VM_BUG_ON(nr_none);
@@ -1553,9 +1544,10 @@ static void collapse_shmem(struct mm_struct *mm,
 		/* Unfreeze new_page, caller would take care about freeing it */
 		page_ref_unfreeze(new_page, 1);
 		mem_cgroup_cancel_charge(new_page, memcg, true);
-		unlock_page(new_page);
 		new_page->mapping = NULL;
 	}
+
+	unlock_page(new_page);
 out:
 	VM_BUG_ON(!list_empty(&pagelist));
 	/* TODO: tracepoints */
-- 
GitLab


From 87c460a0bded56195b5eb497d44709777ef7b415 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Fri, 30 Nov 2018 14:10:43 -0800
Subject: [PATCH 1667/1890] mm/khugepaged: collapse_shmem() without freezing
 new_page

khugepaged's collapse_shmem() does almost all of its work, to assemble
the huge new_page from 512 scattered old pages, with the new_page's
refcount frozen to 0 (and refcounts of all old pages so far also frozen
to 0).  Including shmem_getpage() to read in any which were out on swap,
memory reclaim if necessary to allocate their intermediate pages, and
copying over all the data from old to new.

Imagine the frozen refcount as a spinlock held, but without any lock
debugging to highlight the abuse: it's not good, and under serious load
heads into lockups - speculative getters of the page are not expecting
to spin while khugepaged is rescheduled.

One can get a little further under load by hacking around elsewhere; but
fortunately, freezing the new_page turns out to have been entirely
unnecessary, with no hacks needed elsewhere.

The huge new_page lock is already held throughout, and guards all its
subpages as they are brought one by one into the page cache tree; and
anything reading the data in that page, without the lock, before it has
been marked PageUptodate, would already be in the wrong.  So simply
eliminate the freezing of the new_page.

Each of the old pages remains frozen with refcount 0 after it has been
replaced by a new_page subpage in the page cache tree, until they are
all unfrozen on success or failure: just as before.  They could be
unfrozen sooner, but cause no problem once no longer visible to
find_get_entry(), filemap_map_pages() and other speculative lookups.

Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261527570.2275@eggly.anvils
Fixes: f3f0e1d2150b2 ("khugepaged: add support of collapse for tmpfs/shmem pages")
Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: <stable@vger.kernel.org>	[4.8+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/khugepaged.c | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 9d4e9ff1af95e..55930cbed3fd5 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1287,7 +1287,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
  * collapse_shmem - collapse small tmpfs/shmem pages into huge one.
  *
  * Basic scheme is simple, details are more complex:
- *  - allocate and freeze a new huge page;
+ *  - allocate and lock a new huge page;
  *  - scan page cache replacing old pages with the new one
  *    + swap in pages if necessary;
  *    + fill in gaps;
@@ -1295,11 +1295,11 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
  *  - if replacing succeeds:
  *    + copy data over;
  *    + free old pages;
- *    + unfreeze huge page;
+ *    + unlock huge page;
  *  - if replacing failed;
  *    + put all pages back and unfreeze them;
  *    + restore gaps in the page cache;
- *    + free huge page;
+ *    + unlock and free huge page;
  */
 static void collapse_shmem(struct mm_struct *mm,
 		struct address_space *mapping, pgoff_t start,
@@ -1333,13 +1333,11 @@ static void collapse_shmem(struct mm_struct *mm,
 	__SetPageSwapBacked(new_page);
 	new_page->index = start;
 	new_page->mapping = mapping;
-	BUG_ON(!page_ref_freeze(new_page, 1));
 
 	/*
-	 * At this point the new_page is 'frozen' (page_count() is zero),
-	 * locked and not up-to-date. It's safe to insert it into the page
-	 * cache, because nobody would be able to map it or use it in other
-	 * way until we unfreeze it.
+	 * At this point the new_page is locked and not up-to-date.
+	 * It's safe to insert it into the page cache, because nobody would
+	 * be able to map it or use it in another way until we unlock it.
 	 */
 
 	/* This will be less messy when we use multi-index entries */
@@ -1491,9 +1489,8 @@ static void collapse_shmem(struct mm_struct *mm,
 			index++;
 		}
 
-		/* Everything is ready, let's unfreeze the new_page */
 		SetPageUptodate(new_page);
-		page_ref_unfreeze(new_page, HPAGE_PMD_NR);
+		page_ref_add(new_page, HPAGE_PMD_NR - 1);
 		set_page_dirty(new_page);
 		mem_cgroup_commit_charge(new_page, memcg, false, true);
 		lru_cache_add_anon(new_page);
@@ -1541,8 +1538,6 @@ static void collapse_shmem(struct mm_struct *mm,
 		VM_BUG_ON(nr_none);
 		xas_unlock_irq(&xas);
 
-		/* Unfreeze new_page, caller would take care about freeing it */
-		page_ref_unfreeze(new_page, 1);
 		mem_cgroup_cancel_charge(new_page, memcg, true);
 		new_page->mapping = NULL;
 	}
-- 
GitLab


From 06a5e1268a5fb9c2b346a3da6b97e85f2eba0f07 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Fri, 30 Nov 2018 14:10:47 -0800
Subject: [PATCH 1668/1890] mm/khugepaged: collapse_shmem() do not crash on
 Compound

collapse_shmem()'s VM_BUG_ON_PAGE(PageTransCompound) was unsafe: before
it holds page lock of the first page, racing truncation then extension
might conceivably have inserted a hugepage there already.  Fail with the
SCAN_PAGE_COMPOUND result, instead of crashing (CONFIG_DEBUG_VM=y) or
otherwise mishandling the unexpected hugepage - though later we might
code up a more constructive way of handling it, with SCAN_SUCCESS.

Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261529310.2275@eggly.anvils
Fixes: f3f0e1d2150b2 ("khugepaged: add support of collapse for tmpfs/shmem pages")
Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: <stable@vger.kernel.org>	[4.8+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/khugepaged.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 55930cbed3fd5..2c5fe4f7a0c6e 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1399,7 +1399,15 @@ static void collapse_shmem(struct mm_struct *mm,
 		 */
 		VM_BUG_ON_PAGE(!PageLocked(page), page);
 		VM_BUG_ON_PAGE(!PageUptodate(page), page);
-		VM_BUG_ON_PAGE(PageTransCompound(page), page);
+
+		/*
+		 * If file was truncated then extended, or hole-punched, before
+		 * we locked the first page, then a THP might be there already.
+		 */
+		if (PageTransCompound(page)) {
+			result = SCAN_PAGE_COMPOUND;
+			goto out_unlock;
+		}
 
 		if (page_mapping(page) != mapping) {
 			result = SCAN_TRUNCATED;
-- 
GitLab


From 95feeabb77149f7d48f05bde61d75621c57db67e Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Fri, 30 Nov 2018 14:10:50 -0800
Subject: [PATCH 1669/1890] mm/khugepaged: fix the xas_create_range() error
 path

collapse_shmem()'s xas_nomem() is very unlikely to fail, but it is
rightly given a failure path, so move the whole xas_create_range() block
up before __SetPageLocked(new_page): so that it does not need to
remember to unlock_page(new_page).

Add the missing mem_cgroup_cancel_charge(), and set (currently unused)
result to SCAN_FAIL rather than SCAN_SUCCEED.

Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1811261531200.2275@eggly.anvils
Fixes: 77da9389b9d5 ("mm: Convert collapse_shmem to XArray")
Signed-off-by: Hugh Dickins <hughd@kernel.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/khugepaged.c | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 2c5fe4f7a0c6e..8e2ff195ecb30 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1329,6 +1329,20 @@ static void collapse_shmem(struct mm_struct *mm,
 		goto out;
 	}
 
+	/* This will be less messy when we use multi-index entries */
+	do {
+		xas_lock_irq(&xas);
+		xas_create_range(&xas);
+		if (!xas_error(&xas))
+			break;
+		xas_unlock_irq(&xas);
+		if (!xas_nomem(&xas, GFP_KERNEL)) {
+			mem_cgroup_cancel_charge(new_page, memcg, true);
+			result = SCAN_FAIL;
+			goto out;
+		}
+	} while (1);
+
 	__SetPageLocked(new_page);
 	__SetPageSwapBacked(new_page);
 	new_page->index = start;
@@ -1340,17 +1354,6 @@ static void collapse_shmem(struct mm_struct *mm,
 	 * be able to map it or use it in another way until we unlock it.
 	 */
 
-	/* This will be less messy when we use multi-index entries */
-	do {
-		xas_lock_irq(&xas);
-		xas_create_range(&xas);
-		if (!xas_error(&xas))
-			break;
-		xas_unlock_irq(&xas);
-		if (!xas_nomem(&xas, GFP_KERNEL))
-			goto out;
-	} while (1);
-
 	xas_set(&xas, start);
 	for (index = start; index < end; index++) {
 		struct page *page = xas_next(&xas);
-- 
GitLab


From 164f7e586739d07eb56af6f6d66acebb11f315c8 Mon Sep 17 00:00:00 2001
From: Pan Bian <bianpan2016@163.com>
Date: Fri, 30 Nov 2018 14:10:54 -0800
Subject: [PATCH 1670/1890] ocfs2: fix potential use after free

ocfs2_get_dentry() calls iput(inode) to drop the reference count of
inode, and if the reference count hits 0, inode is freed.  However, in
this function, it then reads inode->i_generation, which may result in a
use after free bug.  Move the put operation later.

Link: http://lkml.kernel.org/r/1543109237-110227-1-git-send-email-bianpan2016@163.com
Fixes: 781f200cb7a("ocfs2: Remove masklog ML_EXPORT.")
Signed-off-by: Pan Bian <bianpan2016@163.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Joseph Qi <jiangqi903@gmail.com>
Cc: Changwei Ge <ge.changwei@h3c.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ocfs2/export.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 9f88188060db9..4bf8d5854b271 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -125,10 +125,10 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb,
 
 check_gen:
 	if (handle->ih_generation != inode->i_generation) {
-		iput(inode);
 		trace_ocfs2_get_dentry_generation((unsigned long long)blkno,
 						  handle->ih_generation,
 						  inode->i_generation);
+		iput(inode);
 		result = ERR_PTR(-ESTALE);
 		goto bail;
 	}
-- 
GitLab


From 6584297b78b66acb80917b664084f303317fcff1 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Fri, 30 Nov 2018 11:57:22 -0800
Subject: [PATCH 1671/1890] MAINTAINERS: Update linux-mips mailing list address

The linux-mips.org infrastructure has been unreliable recently & nobody
with sufficient access to fix it is around to do so. As a result we're
moving away from it, and part of this is migrating our mailing list to
kernel.org.

Replace all instances of linux-mips@linux-mips.org in MAINTAINERS with
the shiny new linux-mips@vger.kernel.org address.

The new list is now being archived on kernel.org at
https://lore.kernel.org/linux-mips/ which also holds the history of the
old linux-mips.org list.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
Cc: linux-mips@linux-mips.org
---
 MAINTAINERS | 46 +++++++++++++++++++++++-----------------------
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 0abecc528daca..c05c773b14c84 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2860,7 +2860,7 @@ F:	drivers/staging/vc04_services
 BROADCOM BCM47XX MIPS ARCHITECTURE
 M:	Hauke Mehrtens <hauke@hauke-m.de>
 M:	RafaÅ‚ MiÅ‚ecki <zajec5@gmail.com>
-L:	linux-mips@linux-mips.org
+L:	linux-mips@vger.kernel.org
 S:	Maintained
 F:	Documentation/devicetree/bindings/mips/brcm/
 F:	arch/mips/bcm47xx/*
@@ -2924,7 +2924,7 @@ F:	drivers/cpufreq/bmips-cpufreq.c
 BROADCOM BMIPS MIPS ARCHITECTURE
 M:	Kevin Cernekee <cernekee@gmail.com>
 M:	Florian Fainelli <f.fainelli@gmail.com>
-L:	linux-mips@linux-mips.org
+L:	linux-mips@vger.kernel.org
 T:	git git://github.com/broadcom/stblinux.git
 S:	Maintained
 F:	arch/mips/bmips/*
@@ -3062,7 +3062,7 @@ F:	include/uapi/rdma/bnxt_re-abi.h
 
 BROADCOM NVRAM DRIVER
 M:	RafaÅ‚ MiÅ‚ecki <zajec5@gmail.com>
-L:	linux-mips@linux-mips.org
+L:	linux-mips@vger.kernel.org
 S:	Maintained
 F:	drivers/firmware/broadcom/*
 
@@ -4158,7 +4158,7 @@ F:	net/decnet/
 
 DECSTATION PLATFORM SUPPORT
 M:	"Maciej W. Rozycki" <macro@linux-mips.org>
-L:	linux-mips@linux-mips.org
+L:	linux-mips@vger.kernel.org
 W:	http://www.linux-mips.org/wiki/DECstation
 S:	Maintained
 F:	arch/mips/dec/
@@ -5249,7 +5249,7 @@ EDAC-CAVIUM OCTEON
 M:	Ralf Baechle <ralf@linux-mips.org>
 M:	David Daney <david.daney@cavium.com>
 L:	linux-edac@vger.kernel.org
-L:	linux-mips@linux-mips.org
+L:	linux-mips@vger.kernel.org
 S:	Supported
 F:	drivers/edac/octeon_edac*
 
@@ -7685,7 +7685,7 @@ F:	Documentation/devicetree/bindings/iio/gyroscope/invensense,mpu3050.txt
 
 IOC3 ETHERNET DRIVER
 M:	Ralf Baechle <ralf@linux-mips.org>
-L:	linux-mips@linux-mips.org
+L:	linux-mips@vger.kernel.org
 S:	Maintained
 F:	drivers/net/ethernet/sgi/ioc3-eth.c
 
@@ -8113,7 +8113,7 @@ F:	arch/arm64/kvm/
 
 KERNEL VIRTUAL MACHINE FOR MIPS (KVM/mips)
 M:	James Hogan <jhogan@kernel.org>
-L:	linux-mips@linux-mips.org
+L:	linux-mips@vger.kernel.org
 S:	Supported
 F:	arch/mips/include/uapi/asm/kvm*
 F:	arch/mips/include/asm/kvm*
@@ -8286,7 +8286,7 @@ F:	drivers/net/dsa/lantiq_gswip.c
 
 LANTIQ MIPS ARCHITECTURE
 M:	John Crispin <john@phrozen.org>
-L:	linux-mips@linux-mips.org
+L:	linux-mips@vger.kernel.org
 S:	Maintained
 F:	arch/mips/lantiq
 F:	drivers/soc/lantiq
@@ -8849,7 +8849,7 @@ S:	Maintained
 
 MARDUK (CREATOR CI40) DEVICE TREE SUPPORT
 M:	Rahul Bedarkar <rahulbedarkar89@gmail.com>
-L:	linux-mips@linux-mips.org
+L:	linux-mips@vger.kernel.org
 S:	Maintained
 F:	arch/mips/boot/dts/img/pistachio_marduk.dts
 
@@ -9808,7 +9808,7 @@ F:	drivers/dma/at_xdmac.c
 
 MICROSEMI MIPS SOCS
 M:	Alexandre Belloni <alexandre.belloni@bootlin.com>
-L:	linux-mips@linux-mips.org
+L:	linux-mips@vger.kernel.org
 S:	Maintained
 F:	arch/mips/generic/board-ocelot.c
 F:	arch/mips/configs/generic/board-ocelot.config
@@ -9848,7 +9848,7 @@ MIPS
 M:	Ralf Baechle <ralf@linux-mips.org>
 M:	Paul Burton <paul.burton@mips.com>
 M:	James Hogan <jhogan@kernel.org>
-L:	linux-mips@linux-mips.org
+L:	linux-mips@vger.kernel.org
 W:	http://www.linux-mips.org/
 T:	git git://git.linux-mips.org/pub/scm/ralf/linux.git
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mips/linux.git
@@ -9861,7 +9861,7 @@ F:	drivers/platform/mips/
 
 MIPS BOSTON DEVELOPMENT BOARD
 M:	Paul Burton <paul.burton@mips.com>
-L:	linux-mips@linux-mips.org
+L:	linux-mips@vger.kernel.org
 S:	Maintained
 F:	Documentation/devicetree/bindings/clock/img,boston-clock.txt
 F:	arch/mips/boot/dts/img/boston.dts
@@ -9871,7 +9871,7 @@ F:	include/dt-bindings/clock/boston-clock.h
 
 MIPS GENERIC PLATFORM
 M:	Paul Burton <paul.burton@mips.com>
-L:	linux-mips@linux-mips.org
+L:	linux-mips@vger.kernel.org
 S:	Supported
 F:	Documentation/devicetree/bindings/power/mti,mips-cpc.txt
 F:	arch/mips/generic/
@@ -9879,7 +9879,7 @@ F:	arch/mips/tools/generic-board-config.sh
 
 MIPS/LOONGSON1 ARCHITECTURE
 M:	Keguang Zhang <keguang.zhang@gmail.com>
-L:	linux-mips@linux-mips.org
+L:	linux-mips@vger.kernel.org
 S:	Maintained
 F:	arch/mips/loongson32/
 F:	arch/mips/include/asm/mach-loongson32/
@@ -9888,7 +9888,7 @@ F:	drivers/*/*/*loongson1*
 
 MIPS/LOONGSON2 ARCHITECTURE
 M:	Jiaxun Yang <jiaxun.yang@flygoat.com>
-L:	linux-mips@linux-mips.org
+L:	linux-mips@vger.kernel.org
 S:	Maintained
 F:	arch/mips/loongson64/fuloong-2e/
 F:	arch/mips/loongson64/lemote-2f/
@@ -9898,7 +9898,7 @@ F:	drivers/*/*/*loongson2*
 
 MIPS/LOONGSON3 ARCHITECTURE
 M:	Huacai Chen <chenhc@lemote.com>
-L:	linux-mips@linux-mips.org
+L:	linux-mips@vger.kernel.org
 S:	Maintained
 F:	arch/mips/loongson64/
 F:	arch/mips/include/asm/mach-loongson64/
@@ -9908,7 +9908,7 @@ F:	drivers/*/*/*loongson3*
 
 MIPS RINT INSTRUCTION EMULATION
 M:	Aleksandar Markovic <aleksandar.markovic@mips.com>
-L:	linux-mips@linux-mips.org
+L:	linux-mips@vger.kernel.org
 S:	Supported
 F:	arch/mips/math-emu/sp_rint.c
 F:	arch/mips/math-emu/dp_rint.c
@@ -10893,7 +10893,7 @@ F:	include/linux/platform_data/i2c-omap.h
 
 ONION OMEGA2+ BOARD
 M:	Harvey Hunt <harveyhuntnexus@gmail.com>
-L:	linux-mips@linux-mips.org
+L:	linux-mips@vger.kernel.org
 S:	Maintained
 F:	arch/mips/boot/dts/ralink/omega2p.dts
 
@@ -11801,7 +11801,7 @@ F:	drivers/pinctrl/spear/
 
 PISTACHIO SOC SUPPORT
 M:	James Hartley <james.hartley@sondrel.com>
-L:	linux-mips@linux-mips.org
+L:	linux-mips@vger.kernel.org
 S:	Odd Fixes
 F:	arch/mips/pistachio/
 F:	arch/mips/include/asm/mach-pistachio/
@@ -12460,7 +12460,7 @@ F:	drivers/media/usb/rainshadow-cec/*
 
 RALINK MIPS ARCHITECTURE
 M:	John Crispin <john@phrozen.org>
-L:	linux-mips@linux-mips.org
+L:	linux-mips@vger.kernel.org
 S:	Maintained
 F:	arch/mips/ralink
 
@@ -12480,7 +12480,7 @@ F:	drivers/block/brd.c
 
 RANCHU VIRTUAL BOARD FOR MIPS
 M:	Miodrag Dinic <miodrag.dinic@mips.com>
-L:	linux-mips@linux-mips.org
+L:	linux-mips@vger.kernel.org
 S:	Supported
 F:	arch/mips/generic/board-ranchu.c
 F:	arch/mips/configs/generic/board-ranchu.config
@@ -15215,7 +15215,7 @@ F:	arch/um/os-Linux/drivers/
 TURBOCHANNEL SUBSYSTEM
 M:	"Maciej W. Rozycki" <macro@linux-mips.org>
 M:	Ralf Baechle <ralf@linux-mips.org>
-L:	linux-mips@linux-mips.org
+L:	linux-mips@vger.kernel.org
 Q:	http://patchwork.linux-mips.org/project/linux-mips/list/
 S:	Maintained
 F:	drivers/tc/
@@ -16036,7 +16036,7 @@ F:	drivers/net/vmxnet3/
 
 VOCORE VOCORE2 BOARD
 M:	Harvey Hunt <harveyhuntnexus@gmail.com>
-L:	linux-mips@linux-mips.org
+L:	linux-mips@vger.kernel.org
 S:	Maintained
 F:	arch/mips/boot/dts/ralink/vocore2.dts
 
-- 
GitLab


From b7df9ada9a7700dbcca1ba53d217c01e3d48179c Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat, 1 Dec 2018 01:18:53 +0100
Subject: [PATCH 1672/1890] bpf: fix pointer offsets in context for 32 bit

Currently, pointer offsets in three BPF context structures are
broken in two scenarios: i) 32 bit compiled applications running
on 64 bit kernels, and ii) LLVM compiled BPF programs running
on 32 bit kernels. The latter is due to BPF target machine being
strictly 64 bit. So in each of the cases the offsets will mismatch
in verifier when checking / rewriting context access. Fix this by
providing a helper macro __bpf_md_ptr() that will enforce padding
up to 64 bit and proper alignment, and for context access a macro
bpf_ctx_range_ptr() which will cover full 64 bit member range on
32 bit archs. For flow_keys, we additionally need to force the
size check to sizeof(__u64) as with other pointer types.

Fixes: d58e468b1112 ("flow_dissector: implements flow dissector BPF hook")
Fixes: 4f738adba30a ("bpf: create tcp_bpf_ulp allowing BPF to monitor socket TX/RX data")
Fixes: 2dbb9b9e6df6 ("bpf: Introduce BPF_PROG_TYPE_SK_REUSEPORT")
Reported-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: David S. Miller <davem@davemloft.net>
Tested-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/filter.h         |  7 +++++++
 include/uapi/linux/bpf.h       | 17 ++++++++++++-----
 net/core/filter.c              | 16 ++++++++--------
 tools/include/uapi/linux/bpf.h | 17 ++++++++++++-----
 4 files changed, 39 insertions(+), 18 deletions(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 448dcc448f1fe..795ff0b869bbf 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -449,6 +449,13 @@ struct sock_reuseport;
 	offsetof(TYPE, MEMBER) ... offsetofend(TYPE, MEMBER) - 1
 #define bpf_ctx_range_till(TYPE, MEMBER1, MEMBER2)				\
 	offsetof(TYPE, MEMBER1) ... offsetofend(TYPE, MEMBER2) - 1
+#if BITS_PER_LONG == 64
+# define bpf_ctx_range_ptr(TYPE, MEMBER)					\
+	offsetof(TYPE, MEMBER) ... offsetofend(TYPE, MEMBER) - 1
+#else
+# define bpf_ctx_range_ptr(TYPE, MEMBER)					\
+	offsetof(TYPE, MEMBER) ... offsetof(TYPE, MEMBER) + 8 - 1
+#endif /* BITS_PER_LONG == 64 */
 
 #define bpf_target_off(TYPE, MEMBER, SIZE, PTR_SIZE)				\
 	({									\
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 852dc17ab47a0..426b5c8a245ba 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2422,6 +2422,12 @@ enum bpf_lwt_encap_mode {
 	BPF_LWT_ENCAP_SEG6_INLINE
 };
 
+#define __bpf_md_ptr(type, name)	\
+union {					\
+	type name;			\
+	__u64 :64;			\
+} __attribute__((aligned(8)))
+
 /* user accessible mirror of in-kernel sk_buff.
  * new fields can only be added to the end of this structure
  */
@@ -2456,7 +2462,7 @@ struct __sk_buff {
 	/* ... here. */
 
 	__u32 data_meta;
-	struct bpf_flow_keys *flow_keys;
+	__bpf_md_ptr(struct bpf_flow_keys *, flow_keys);
 };
 
 struct bpf_tunnel_key {
@@ -2572,8 +2578,8 @@ enum sk_action {
  * be added to the end of this structure
  */
 struct sk_msg_md {
-	void *data;
-	void *data_end;
+	__bpf_md_ptr(void *, data);
+	__bpf_md_ptr(void *, data_end);
 
 	__u32 family;
 	__u32 remote_ip4;	/* Stored in network byte order */
@@ -2589,8 +2595,9 @@ struct sk_reuseport_md {
 	 * Start of directly accessible data. It begins from
 	 * the tcp/udp header.
 	 */
-	void *data;
-	void *data_end;		/* End of directly accessible data */
+	__bpf_md_ptr(void *, data);
+	/* End of directly accessible data */
+	__bpf_md_ptr(void *, data_end);
 	/*
 	 * Total length of packet (starting from the tcp/udp header).
 	 * Note that the directly accessible bytes (data_end - data)
diff --git a/net/core/filter.c b/net/core/filter.c
index 9a1327eb25faf..6ee605da990fc 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5435,8 +5435,8 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
 		if (size != size_default)
 			return false;
 		break;
-	case bpf_ctx_range(struct __sk_buff, flow_keys):
-		if (size != sizeof(struct bpf_flow_keys *))
+	case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
+		if (size != sizeof(__u64))
 			return false;
 		break;
 	default:
@@ -5464,7 +5464,7 @@ static bool sk_filter_is_valid_access(int off, int size,
 	case bpf_ctx_range(struct __sk_buff, data):
 	case bpf_ctx_range(struct __sk_buff, data_meta):
 	case bpf_ctx_range(struct __sk_buff, data_end):
-	case bpf_ctx_range(struct __sk_buff, flow_keys):
+	case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
 	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
 		return false;
 	}
@@ -5489,7 +5489,7 @@ static bool cg_skb_is_valid_access(int off, int size,
 	switch (off) {
 	case bpf_ctx_range(struct __sk_buff, tc_classid):
 	case bpf_ctx_range(struct __sk_buff, data_meta):
-	case bpf_ctx_range(struct __sk_buff, flow_keys):
+	case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
 		return false;
 	case bpf_ctx_range(struct __sk_buff, data):
 	case bpf_ctx_range(struct __sk_buff, data_end):
@@ -5530,7 +5530,7 @@ static bool lwt_is_valid_access(int off, int size,
 	case bpf_ctx_range(struct __sk_buff, tc_classid):
 	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
 	case bpf_ctx_range(struct __sk_buff, data_meta):
-	case bpf_ctx_range(struct __sk_buff, flow_keys):
+	case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
 		return false;
 	}
 
@@ -5756,7 +5756,7 @@ static bool tc_cls_act_is_valid_access(int off, int size,
 	case bpf_ctx_range(struct __sk_buff, data_end):
 		info->reg_type = PTR_TO_PACKET_END;
 		break;
-	case bpf_ctx_range(struct __sk_buff, flow_keys):
+	case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
 	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
 		return false;
 	}
@@ -5958,7 +5958,7 @@ static bool sk_skb_is_valid_access(int off, int size,
 	switch (off) {
 	case bpf_ctx_range(struct __sk_buff, tc_classid):
 	case bpf_ctx_range(struct __sk_buff, data_meta):
-	case bpf_ctx_range(struct __sk_buff, flow_keys):
+	case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
 		return false;
 	}
 
@@ -6039,7 +6039,7 @@ static bool flow_dissector_is_valid_access(int off, int size,
 	case bpf_ctx_range(struct __sk_buff, data_end):
 		info->reg_type = PTR_TO_PACKET_END;
 		break;
-	case bpf_ctx_range(struct __sk_buff, flow_keys):
+	case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
 		info->reg_type = PTR_TO_FLOW_KEYS;
 		break;
 	case bpf_ctx_range(struct __sk_buff, tc_classid):
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 852dc17ab47a0..426b5c8a245ba 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2422,6 +2422,12 @@ enum bpf_lwt_encap_mode {
 	BPF_LWT_ENCAP_SEG6_INLINE
 };
 
+#define __bpf_md_ptr(type, name)	\
+union {					\
+	type name;			\
+	__u64 :64;			\
+} __attribute__((aligned(8)))
+
 /* user accessible mirror of in-kernel sk_buff.
  * new fields can only be added to the end of this structure
  */
@@ -2456,7 +2462,7 @@ struct __sk_buff {
 	/* ... here. */
 
 	__u32 data_meta;
-	struct bpf_flow_keys *flow_keys;
+	__bpf_md_ptr(struct bpf_flow_keys *, flow_keys);
 };
 
 struct bpf_tunnel_key {
@@ -2572,8 +2578,8 @@ enum sk_action {
  * be added to the end of this structure
  */
 struct sk_msg_md {
-	void *data;
-	void *data_end;
+	__bpf_md_ptr(void *, data);
+	__bpf_md_ptr(void *, data_end);
 
 	__u32 family;
 	__u32 remote_ip4;	/* Stored in network byte order */
@@ -2589,8 +2595,9 @@ struct sk_reuseport_md {
 	 * Start of directly accessible data. It begins from
 	 * the tcp/udp header.
 	 */
-	void *data;
-	void *data_end;		/* End of directly accessible data */
+	__bpf_md_ptr(void *, data);
+	/* End of directly accessible data */
+	__bpf_md_ptr(void *, data_end);
 	/*
 	 * Total length of packet (starting from the tcp/udp header).
 	 * Note that the directly accessible bytes (data_end - data)
-- 
GitLab


From fd6d433865a2ad1f7e018ef80408cb3dc3be1ab3 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Wed, 28 Nov 2018 18:43:42 +0100
Subject: [PATCH 1673/1890] net/sched: act_police: fix memory leak in case of
 invalid control action

when users set an invalid control action, kmemleak complains as follows:

 # echo clear >/sys/kernel/debug/kmemleak
 # ./tdc.py -e b48b
 Test b48b: Add police action with exceed goto chain control action
 All test results:

 1..1
 ok 1 - b48b # Add police action with exceed goto chain control action
 about to flush the tap output if tests need to be skipped
 done flushing skipped test tap output
 # echo scan >/sys/kernel/debug/kmemleak
 # cat /sys/kernel/debug/kmemleak
 unreferenced object 0xffffa0fafbc3dde0 (size 96):
  comm "tc", pid 2358, jiffies 4294922738 (age 17.022s)
  hex dump (first 32 bytes):
    2a 00 00 20 00 00 00 00 00 00 7d 00 00 00 00 00  *.. ......}.....
    f8 07 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<00000000648803d2>] tcf_action_init_1+0x384/0x4c0
    [<00000000cb69382e>] tcf_action_init+0x12b/0x1a0
    [<00000000847ef0d4>] tcf_action_add+0x73/0x170
    [<0000000093656e14>] tc_ctl_action+0x122/0x160
    [<0000000023c98e32>] rtnetlink_rcv_msg+0x263/0x2d0
    [<000000003493ae9c>] netlink_rcv_skb+0x4d/0x130
    [<00000000de63f8ba>] netlink_unicast+0x209/0x2d0
    [<00000000c3da0ebe>] netlink_sendmsg+0x2c1/0x3c0
    [<000000007a9e0753>] sock_sendmsg+0x33/0x40
    [<00000000457c6d2e>] ___sys_sendmsg+0x2a0/0x2f0
    [<00000000c5c6a086>] __sys_sendmsg+0x5e/0xa0
    [<00000000446eafce>] do_syscall_64+0x5b/0x180
    [<000000004aa871f2>] entry_SYSCALL_64_after_hwframe+0x44/0xa9
    [<00000000450c38ef>] 0xffffffffffffffff

change tcf_police_init() to avoid leaking 'new' in case TCA_POLICE_RESULT
contains TC_ACT_GOTO_CHAIN extended action.

Fixes: c08f5ed5d625 ("net/sched: act_police: disallow 'goto chain' on fallback control action")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_police.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 37c9b8f0e10f0..ec8ec55e0fe87 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -85,7 +85,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
 			       int ovr, int bind, bool rtnl_held,
 			       struct netlink_ext_ack *extack)
 {
-	int ret = 0, err;
+	int ret = 0, tcfp_result = TC_ACT_OK, err, size;
 	struct nlattr *tb[TCA_POLICE_MAX + 1];
 	struct tc_police *parm;
 	struct tcf_police *police;
@@ -93,7 +93,6 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
 	struct tc_action_net *tn = net_generic(net, police_net_id);
 	struct tcf_police_params *new;
 	bool exists = false;
-	int size;
 
 	if (nla == NULL)
 		return -EINVAL;
@@ -160,6 +159,16 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
 		goto failure;
 	}
 
+	if (tb[TCA_POLICE_RESULT]) {
+		tcfp_result = nla_get_u32(tb[TCA_POLICE_RESULT]);
+		if (TC_ACT_EXT_CMP(tcfp_result, TC_ACT_GOTO_CHAIN)) {
+			NL_SET_ERR_MSG(extack,
+				       "goto chain not allowed on fallback");
+			err = -EINVAL;
+			goto failure;
+		}
+	}
+
 	new = kzalloc(sizeof(*new), GFP_KERNEL);
 	if (unlikely(!new)) {
 		err = -ENOMEM;
@@ -167,6 +176,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
 	}
 
 	/* No failure allowed after this point */
+	new->tcfp_result = tcfp_result;
 	new->tcfp_mtu = parm->mtu;
 	if (!new->tcfp_mtu) {
 		new->tcfp_mtu = ~0;
@@ -196,16 +206,6 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
 	if (tb[TCA_POLICE_AVRATE])
 		new->tcfp_ewma_rate = nla_get_u32(tb[TCA_POLICE_AVRATE]);
 
-	if (tb[TCA_POLICE_RESULT]) {
-		new->tcfp_result = nla_get_u32(tb[TCA_POLICE_RESULT]);
-		if (TC_ACT_EXT_CMP(new->tcfp_result, TC_ACT_GOTO_CHAIN)) {
-			NL_SET_ERR_MSG(extack,
-				       "goto chain not allowed on fallback");
-			err = -EINVAL;
-			goto failure;
-		}
-	}
-
 	spin_lock_bh(&police->tcf_lock);
 	spin_lock_bh(&police->tcfp_lock);
 	police->tcfp_t_c = ktime_get_ns();
-- 
GitLab


From f71c6143c2038df1cb43a4b9c90740d14f77467c Mon Sep 17 00:00:00 2001
From: Joe Stringer <joe@wand.net.nz>
Date: Fri, 30 Nov 2018 15:32:20 -0800
Subject: [PATCH 1674/1890] bpf: Support sk lookup in netns with id 0

David Ahern and Nicolas Dichtel report that the handling of the netns id
0 is incorrect for the BPF socket lookup helpers: rather than finding
the netns with id 0, it is resolving to the current netns. This renders
the netns_id 0 inaccessible.

To fix this, adjust the API for the netns to treat all negative s32
values as a lookup in the current netns (including u64 values which when
truncated to s32 become negative), while any values with a positive
value in the signed 32-bit integer space would result in a lookup for a
socket in the netns corresponding to that id. As before, if the netns
with that ID does not exist, no socket will be found. Any netns outside
of these ranges will fail to find a corresponding socket, as those
values are reserved for future usage.

Signed-off-by: Joe Stringer <joe@wand.net.nz>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Acked-by: Joey Pabalinas <joeypabalinas@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/bpf.h                      | 35 ++++++++++-------
 net/core/filter.c                             | 11 +++---
 tools/include/uapi/linux/bpf.h                | 39 ++++++++++++-------
 tools/testing/selftests/bpf/bpf_helpers.h     |  4 +-
 .../selftests/bpf/test_sk_lookup_kern.c       | 18 ++++-----
 5 files changed, 63 insertions(+), 44 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 426b5c8a245ba..cba518c572293 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2170,7 +2170,7 @@ union bpf_attr {
  *	Return
  *		0 on success, or a negative error in case of failure.
  *
- * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags)
+ * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
  *	Description
  *		Look for TCP socket matching *tuple*, optionally in a child
  *		network namespace *netns*. The return value must be checked,
@@ -2187,12 +2187,14 @@ union bpf_attr {
  *		**sizeof**\ (*tuple*\ **->ipv6**)
  *			Look for an IPv6 socket.
  *
- *		If the *netns* is zero, then the socket lookup table in the
- *		netns associated with the *ctx* will be used. For the TC hooks,
- *		this in the netns of the device in the skb. For socket hooks,
- *		this in the netns of the socket. If *netns* is non-zero, then
- *		it specifies the ID of the netns relative to the netns
- *		associated with the *ctx*.
+ *		If the *netns* is a negative signed 32-bit integer, then the
+ *		socket lookup table in the netns associated with the *ctx* will
+ *		will be used. For the TC hooks, this is the netns of the device
+ *		in the skb. For socket hooks, this is the netns of the socket.
+ *		If *netns* is any other signed 32-bit value greater than or
+ *		equal to zero then it specifies the ID of the netns relative to
+ *		the netns associated with the *ctx*. *netns* values beyond the
+ *		range of 32-bit integers are reserved for future use.
  *
  *		All values for *flags* are reserved for future usage, and must
  *		be left at zero.
@@ -2202,7 +2204,7 @@ union bpf_attr {
  *	Return
  *		Pointer to *struct bpf_sock*, or NULL in case of failure.
  *
- * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags)
+ * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
  *	Description
  *		Look for UDP socket matching *tuple*, optionally in a child
  *		network namespace *netns*. The return value must be checked,
@@ -2219,12 +2221,14 @@ union bpf_attr {
  *		**sizeof**\ (*tuple*\ **->ipv6**)
  *			Look for an IPv6 socket.
  *
- *		If the *netns* is zero, then the socket lookup table in the
- *		netns associated with the *ctx* will be used. For the TC hooks,
- *		this in the netns of the device in the skb. For socket hooks,
- *		this in the netns of the socket. If *netns* is non-zero, then
- *		it specifies the ID of the netns relative to the netns
- *		associated with the *ctx*.
+ *		If the *netns* is a negative signed 32-bit integer, then the
+ *		socket lookup table in the netns associated with the *ctx* will
+ *		will be used. For the TC hooks, this is the netns of the device
+ *		in the skb. For socket hooks, this is the netns of the socket.
+ *		If *netns* is any other signed 32-bit value greater than or
+ *		equal to zero then it specifies the ID of the netns relative to
+ *		the netns associated with the *ctx*. *netns* values beyond the
+ *		range of 32-bit integers are reserved for future use.
  *
  *		All values for *flags* are reserved for future usage, and must
  *		be left at zero.
@@ -2405,6 +2409,9 @@ enum bpf_func_id {
 /* BPF_FUNC_perf_event_output for sk_buff input context. */
 #define BPF_F_CTXLEN_MASK		(0xfffffULL << 32)
 
+/* Current network namespace */
+#define BPF_F_CURRENT_NETNS		(-1L)
+
 /* Mode for BPF_FUNC_skb_adjust_room helper. */
 enum bpf_adj_room_mode {
 	BPF_ADJ_ROOM_NET,
diff --git a/net/core/filter.c b/net/core/filter.c
index 6ee605da990fc..8d2c629501e2d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4890,22 +4890,23 @@ bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
 	struct net *net;
 
 	family = len == sizeof(tuple->ipv4) ? AF_INET : AF_INET6;
-	if (unlikely(family == AF_UNSPEC || netns_id > U32_MAX || flags))
+	if (unlikely(family == AF_UNSPEC || flags ||
+		     !((s32)netns_id < 0 || netns_id <= S32_MAX)))
 		goto out;
 
 	if (skb->dev)
 		caller_net = dev_net(skb->dev);
 	else
 		caller_net = sock_net(skb->sk);
-	if (netns_id) {
+	if ((s32)netns_id < 0) {
+		net = caller_net;
+		sk = sk_lookup(net, tuple, skb, family, proto);
+	} else {
 		net = get_net_ns_by_id(caller_net, netns_id);
 		if (unlikely(!net))
 			goto out;
 		sk = sk_lookup(net, tuple, skb, family, proto);
 		put_net(net);
-	} else {
-		net = caller_net;
-		sk = sk_lookup(net, tuple, skb, family, proto);
 	}
 
 	if (sk)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 426b5c8a245ba..76b265c7d93e9 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2170,7 +2170,7 @@ union bpf_attr {
  *	Return
  *		0 on success, or a negative error in case of failure.
  *
- * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags)
+ * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
  *	Description
  *		Look for TCP socket matching *tuple*, optionally in a child
  *		network namespace *netns*. The return value must be checked,
@@ -2187,12 +2187,14 @@ union bpf_attr {
  *		**sizeof**\ (*tuple*\ **->ipv6**)
  *			Look for an IPv6 socket.
  *
- *		If the *netns* is zero, then the socket lookup table in the
- *		netns associated with the *ctx* will be used. For the TC hooks,
- *		this in the netns of the device in the skb. For socket hooks,
- *		this in the netns of the socket. If *netns* is non-zero, then
- *		it specifies the ID of the netns relative to the netns
- *		associated with the *ctx*.
+ *		If the *netns* is a negative signed 32-bit integer, then the
+ *		socket lookup table in the netns associated with the *ctx* will
+ *		will be used. For the TC hooks, this is the netns of the device
+ *		in the skb. For socket hooks, this is the netns of the socket.
+ *		If *netns* is any other signed 32-bit value greater than or
+ *		equal to zero then it specifies the ID of the netns relative to
+ *		the netns associated with the *ctx*. *netns* values beyond the
+ *		range of 32-bit integers are reserved for future use.
  *
  *		All values for *flags* are reserved for future usage, and must
  *		be left at zero.
@@ -2201,8 +2203,10 @@ union bpf_attr {
  *		**CONFIG_NET** configuration option.
  *	Return
  *		Pointer to *struct bpf_sock*, or NULL in case of failure.
+ *		For sockets with reuseport option, *struct bpf_sock*
+ *		return is from reuse->socks[] using hash of the packet.
  *
- * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u32 netns, u64 flags)
+ * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
  *	Description
  *		Look for UDP socket matching *tuple*, optionally in a child
  *		network namespace *netns*. The return value must be checked,
@@ -2219,12 +2223,14 @@ union bpf_attr {
  *		**sizeof**\ (*tuple*\ **->ipv6**)
  *			Look for an IPv6 socket.
  *
- *		If the *netns* is zero, then the socket lookup table in the
- *		netns associated with the *ctx* will be used. For the TC hooks,
- *		this in the netns of the device in the skb. For socket hooks,
- *		this in the netns of the socket. If *netns* is non-zero, then
- *		it specifies the ID of the netns relative to the netns
- *		associated with the *ctx*.
+ *		If the *netns* is a negative signed 32-bit integer, then the
+ *		socket lookup table in the netns associated with the *ctx* will
+ *		will be used. For the TC hooks, this is the netns of the device
+ *		in the skb. For socket hooks, this is the netns of the socket.
+ *		If *netns* is any other signed 32-bit value greater than or
+ *		equal to zero then it specifies the ID of the netns relative to
+ *		the netns associated with the *ctx*. *netns* values beyond the
+ *		range of 32-bit integers are reserved for future use.
  *
  *		All values for *flags* are reserved for future usage, and must
  *		be left at zero.
@@ -2233,6 +2239,8 @@ union bpf_attr {
  *		**CONFIG_NET** configuration option.
  *	Return
  *		Pointer to *struct bpf_sock*, or NULL in case of failure.
+ *		For sockets with reuseport option, *struct bpf_sock*
+ *		return is from reuse->socks[] using hash of the packet.
  *
  * int bpf_sk_release(struct bpf_sock *sk)
  *	Description
@@ -2405,6 +2413,9 @@ enum bpf_func_id {
 /* BPF_FUNC_perf_event_output for sk_buff input context. */
 #define BPF_F_CTXLEN_MASK		(0xfffffULL << 32)
 
+/* Current network namespace */
+#define BPF_F_CURRENT_NETNS		(-1L)
+
 /* Mode for BPF_FUNC_skb_adjust_room helper. */
 enum bpf_adj_room_mode {
 	BPF_ADJ_ROOM_NET,
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index 686e57ce40f43..efb6c13ab0deb 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -154,12 +154,12 @@ static unsigned long long (*bpf_skb_ancestor_cgroup_id)(void *ctx, int level) =
 	(void *) BPF_FUNC_skb_ancestor_cgroup_id;
 static struct bpf_sock *(*bpf_sk_lookup_tcp)(void *ctx,
 					     struct bpf_sock_tuple *tuple,
-					     int size, unsigned int netns_id,
+					     int size, unsigned long long netns_id,
 					     unsigned long long flags) =
 	(void *) BPF_FUNC_sk_lookup_tcp;
 static struct bpf_sock *(*bpf_sk_lookup_udp)(void *ctx,
 					     struct bpf_sock_tuple *tuple,
-					     int size, unsigned int netns_id,
+					     int size, unsigned long long netns_id,
 					     unsigned long long flags) =
 	(void *) BPF_FUNC_sk_lookup_udp;
 static int (*bpf_sk_release)(struct bpf_sock *sk) =
diff --git a/tools/testing/selftests/bpf/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/test_sk_lookup_kern.c
index b745bdc08c2bd..e21cd736c196e 100644
--- a/tools/testing/selftests/bpf/test_sk_lookup_kern.c
+++ b/tools/testing/selftests/bpf/test_sk_lookup_kern.c
@@ -72,7 +72,7 @@ int bpf_sk_lookup_test0(struct __sk_buff *skb)
 		return TC_ACT_SHOT;
 
 	tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6);
-	sk = bpf_sk_lookup_tcp(skb, tuple, tuple_len, 0, 0);
+	sk = bpf_sk_lookup_tcp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0);
 	if (sk)
 		bpf_sk_release(sk);
 	return sk ? TC_ACT_OK : TC_ACT_UNSPEC;
@@ -84,7 +84,7 @@ int bpf_sk_lookup_test1(struct __sk_buff *skb)
 	struct bpf_sock_tuple tuple = {};
 	struct bpf_sock *sk;
 
-	sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0);
+	sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
 	if (sk)
 		bpf_sk_release(sk);
 	return 0;
@@ -97,7 +97,7 @@ int bpf_sk_lookup_uaf(struct __sk_buff *skb)
 	struct bpf_sock *sk;
 	__u32 family = 0;
 
-	sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0);
+	sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
 	if (sk) {
 		bpf_sk_release(sk);
 		family = sk->family;
@@ -112,7 +112,7 @@ int bpf_sk_lookup_modptr(struct __sk_buff *skb)
 	struct bpf_sock *sk;
 	__u32 family;
 
-	sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0);
+	sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
 	if (sk) {
 		sk += 1;
 		bpf_sk_release(sk);
@@ -127,7 +127,7 @@ int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb)
 	struct bpf_sock *sk;
 	__u32 family;
 
-	sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0);
+	sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
 	sk += 1;
 	if (sk)
 		bpf_sk_release(sk);
@@ -139,7 +139,7 @@ int bpf_sk_lookup_test2(struct __sk_buff *skb)
 {
 	struct bpf_sock_tuple tuple = {};
 
-	bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0);
+	bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
 	return 0;
 }
 
@@ -149,7 +149,7 @@ int bpf_sk_lookup_test3(struct __sk_buff *skb)
 	struct bpf_sock_tuple tuple = {};
 	struct bpf_sock *sk;
 
-	sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0);
+	sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
 	bpf_sk_release(sk);
 	bpf_sk_release(sk);
 	return 0;
@@ -161,7 +161,7 @@ int bpf_sk_lookup_test4(struct __sk_buff *skb)
 	struct bpf_sock_tuple tuple = {};
 	struct bpf_sock *sk;
 
-	sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0);
+	sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
 	bpf_sk_release(sk);
 	return 0;
 }
@@ -169,7 +169,7 @@ int bpf_sk_lookup_test4(struct __sk_buff *skb)
 void lookup_no_release(struct __sk_buff *skb)
 {
 	struct bpf_sock_tuple tuple = {};
-	bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), 0, 0);
+	bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
 }
 
 SEC("fail_no_release_subcall")
-- 
GitLab


From d74286d2c25ad29dbf9e342955dd8dc31f21653b Mon Sep 17 00:00:00 2001
From: Joe Stringer <joe@wand.net.nz>
Date: Fri, 30 Nov 2018 15:32:21 -0800
Subject: [PATCH 1675/1890] bpf: Improve socket lookup reuseport documentation

Improve the wording around socket lookup for reuseport sockets, and
ensure that both bpf.h headers are in sync.

Signed-off-by: Joe Stringer <joe@wand.net.nz>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/bpf.h       | 4 ++++
 tools/include/uapi/linux/bpf.h | 8 ++++----
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index cba518c572293..72c453a8bf50e 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2203,6 +2203,8 @@ union bpf_attr {
  *		**CONFIG_NET** configuration option.
  *	Return
  *		Pointer to *struct bpf_sock*, or NULL in case of failure.
+ *		For sockets with reuseport option, the *struct bpf_sock*
+ *		result is from reuse->socks[] using the hash of the tuple.
  *
  * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
  *	Description
@@ -2237,6 +2239,8 @@ union bpf_attr {
  *		**CONFIG_NET** configuration option.
  *	Return
  *		Pointer to *struct bpf_sock*, or NULL in case of failure.
+ *		For sockets with reuseport option, the *struct bpf_sock*
+ *		result is from reuse->socks[] using the hash of the tuple.
  *
  * int bpf_sk_release(struct bpf_sock *sk)
  *	Description
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 76b265c7d93e9..72c453a8bf50e 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2203,8 +2203,8 @@ union bpf_attr {
  *		**CONFIG_NET** configuration option.
  *	Return
  *		Pointer to *struct bpf_sock*, or NULL in case of failure.
- *		For sockets with reuseport option, *struct bpf_sock*
- *		return is from reuse->socks[] using hash of the packet.
+ *		For sockets with reuseport option, the *struct bpf_sock*
+ *		result is from reuse->socks[] using the hash of the tuple.
  *
  * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
  *	Description
@@ -2239,8 +2239,8 @@ union bpf_attr {
  *		**CONFIG_NET** configuration option.
  *	Return
  *		Pointer to *struct bpf_sock*, or NULL in case of failure.
- *		For sockets with reuseport option, *struct bpf_sock*
- *		return is from reuse->socks[] using hash of the packet.
+ *		For sockets with reuseport option, the *struct bpf_sock*
+ *		result is from reuse->socks[] using the hash of the tuple.
  *
  * int bpf_sk_release(struct bpf_sock *sk)
  *	Description
-- 
GitLab


From a3d7e01da06013dc580641a1da57c3b482d58157 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 28 Nov 2018 13:40:04 -0800
Subject: [PATCH 1676/1890] net: dsa: Fix tagging attribute location

While introducing the DSA tagging protocol attribute, it was added to the DSA
slave network devices, but those actually see untagged traffic (that is their
whole purpose). Correct this mistake by putting the tagging sysfs attribute
under the DSA master network device where this is the information that we need.

While at it, also correct the sysfs documentation mistake that missed the
"dsa/" directory component of the attribute.

Fixes: 98cdb4807123 ("net: dsa: Expose tagging protocol to user-space")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/ABI/testing/sysfs-class-net-dsa |  2 +-
 net/dsa/master.c                              | 34 ++++++++++++++++++-
 net/dsa/slave.c                               | 28 ---------------
 3 files changed, 34 insertions(+), 30 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-class-net-dsa b/Documentation/ABI/testing/sysfs-class-net-dsa
index f240221e071ef..985d84c585c66 100644
--- a/Documentation/ABI/testing/sysfs-class-net-dsa
+++ b/Documentation/ABI/testing/sysfs-class-net-dsa
@@ -1,4 +1,4 @@
-What:		/sys/class/net/<iface>/tagging
+What:		/sys/class/net/<iface>/dsa/tagging
 Date:		August 2018
 KernelVersion:	4.20
 Contact:	netdev@vger.kernel.org
diff --git a/net/dsa/master.c b/net/dsa/master.c
index c90ee3227deab..5e8c9bef78bd2 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -158,8 +158,31 @@ static void dsa_master_ethtool_teardown(struct net_device *dev)
 	cpu_dp->orig_ethtool_ops = NULL;
 }
 
+static ssize_t tagging_show(struct device *d, struct device_attribute *attr,
+			    char *buf)
+{
+	struct net_device *dev = to_net_dev(d);
+	struct dsa_port *cpu_dp = dev->dsa_ptr;
+
+	return sprintf(buf, "%s\n",
+		       dsa_tag_protocol_to_str(cpu_dp->tag_ops));
+}
+static DEVICE_ATTR_RO(tagging);
+
+static struct attribute *dsa_slave_attrs[] = {
+	&dev_attr_tagging.attr,
+	NULL
+};
+
+static const struct attribute_group dsa_group = {
+	.name	= "dsa",
+	.attrs	= dsa_slave_attrs,
+};
+
 int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
 {
+	int ret;
+
 	/* If we use a tagging format that doesn't have an ethertype
 	 * field, make sure that all packets from this point on get
 	 * sent to the tag format's receive function.
@@ -168,11 +191,20 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
 
 	dev->dsa_ptr = cpu_dp;
 
-	return dsa_master_ethtool_setup(dev);
+	ret = dsa_master_ethtool_setup(dev);
+	if (ret)
+		return ret;
+
+	ret = sysfs_create_group(&dev->dev.kobj, &dsa_group);
+	if (ret)
+		dsa_master_ethtool_teardown(dev);
+
+	return ret;
 }
 
 void dsa_master_teardown(struct net_device *dev)
 {
+	sysfs_remove_group(&dev->dev.kobj, &dsa_group);
 	dsa_master_ethtool_teardown(dev);
 
 	dev->dsa_ptr = NULL;
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 7d0c19e7edcf0..aec78f5aca72d 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1058,27 +1058,6 @@ static struct device_type dsa_type = {
 	.name	= "dsa",
 };
 
-static ssize_t tagging_show(struct device *d, struct device_attribute *attr,
-			    char *buf)
-{
-	struct net_device *dev = to_net_dev(d);
-	struct dsa_port *dp = dsa_slave_to_port(dev);
-
-	return sprintf(buf, "%s\n",
-		       dsa_tag_protocol_to_str(dp->cpu_dp->tag_ops));
-}
-static DEVICE_ATTR_RO(tagging);
-
-static struct attribute *dsa_slave_attrs[] = {
-	&dev_attr_tagging.attr,
-	NULL
-};
-
-static const struct attribute_group dsa_group = {
-	.name	= "dsa",
-	.attrs	= dsa_slave_attrs,
-};
-
 static void dsa_slave_phylink_validate(struct net_device *dev,
 				       unsigned long *supported,
 				       struct phylink_link_state *state)
@@ -1374,14 +1353,8 @@ int dsa_slave_create(struct dsa_port *port)
 		goto out_phy;
 	}
 
-	ret = sysfs_create_group(&slave_dev->dev.kobj, &dsa_group);
-	if (ret)
-		goto out_unreg;
-
 	return 0;
 
-out_unreg:
-	unregister_netdev(slave_dev);
 out_phy:
 	rtnl_lock();
 	phylink_disconnect_phy(p->dp->pl);
@@ -1405,7 +1378,6 @@ void dsa_slave_destroy(struct net_device *slave_dev)
 	rtnl_unlock();
 
 	dsa_slave_notify(slave_dev, DSA_PORT_UNREGISTER);
-	sysfs_remove_group(&slave_dev->dev.kobj, &dsa_group);
 	unregister_netdev(slave_dev);
 	phylink_destroy(dp->pl);
 	free_percpu(p->stats64);
-- 
GitLab


From ef6fcd455278c2be3032a346cc66d9dd9866b787 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Wed, 28 Nov 2018 15:04:05 -0800
Subject: [PATCH 1677/1890] mlx5: fix get_ip_proto()

IP header is not necessarily located right after struct ethhdr,
there could be multiple 802.1Q headers in between, this is why
we call __vlan_get_protocol().

Fixes: fe1dc069990c ("net/mlx5e: don't set CHECKSUM_COMPLETE on SCTP packets")
Cc: Alaa Hleihel <alaa@mellanox.com>
Cc: Or Gerlitz <ogerlitz@mellanox.com>
Cc: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Acked-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 16985ca3248d7..624eed345b5d2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -724,9 +724,9 @@ static u32 mlx5e_get_fcs(const struct sk_buff *skb)
 	return __get_unaligned_cpu32(fcs_bytes);
 }
 
-static u8 get_ip_proto(struct sk_buff *skb, __be16 proto)
+static u8 get_ip_proto(struct sk_buff *skb, int network_depth, __be16 proto)
 {
-	void *ip_p = skb->data + sizeof(struct ethhdr);
+	void *ip_p = skb->data + network_depth;
 
 	return (proto == htons(ETH_P_IP)) ? ((struct iphdr *)ip_p)->protocol :
 					    ((struct ipv6hdr *)ip_p)->nexthdr;
@@ -755,7 +755,7 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
 		goto csum_unnecessary;
 
 	if (likely(is_last_ethertype_ip(skb, &network_depth, &proto))) {
-		if (unlikely(get_ip_proto(skb, proto) == IPPROTO_SCTP))
+		if (unlikely(get_ip_proto(skb, network_depth, proto) == IPPROTO_SCTP))
 			goto csum_unnecessary;
 
 		skb->ip_summed = CHECKSUM_COMPLETE;
-- 
GitLab


From c0f53771ba45745e5870daf880127925c93f232f Mon Sep 17 00:00:00 2001
From: Pan Bian <bianpan2016@163.com>
Date: Thu, 29 Nov 2018 07:54:22 +0800
Subject: [PATCH 1678/1890] liquidio: read sc->iq_no before release sc

The function lio_vf_rep_packet_sent_callback releases the occupation of
sc via octeon_free_soft_command. sc should not be used after that.
Unfortunately, sc->iq_no is read. To fix this, the patch stores sc->iq_no
into a local variable before releasing sc and then uses the local variable
instead of sc->iq_no.

Signed-off-by: Pan Bian <bianpan2016@163.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c
index ea9859e028d48..de61060721c4a 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c
@@ -349,13 +349,15 @@ lio_vf_rep_packet_sent_callback(struct octeon_device *oct,
 	struct octeon_soft_command *sc = (struct octeon_soft_command *)buf;
 	struct sk_buff *skb = sc->ctxptr;
 	struct net_device *ndev = skb->dev;
+	u32 iq_no;
 
 	dma_unmap_single(&oct->pci_dev->dev, sc->dmadptr,
 			 sc->datasize, DMA_TO_DEVICE);
 	dev_kfree_skb_any(skb);
+	iq_no = sc->iq_no;
 	octeon_free_soft_command(oct, sc);
 
-	if (octnet_iq_is_full(oct, sc->iq_no))
+	if (octnet_iq_is_full(oct, iq_no))
 		return;
 
 	if (netif_queue_stopped(ndev))
-- 
GitLab


From 3976535af0cb9fe34a55f2ffb8d7e6b39a2f8188 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Wed, 28 Nov 2018 16:06:43 -0800
Subject: [PATCH 1679/1890] tcp: fix off-by-one bug on aborting window-probing
 socket

Previously there is an off-by-one bug on determining when to abort
a stalled window-probing socket. This patch fixes that so it is
consistent with tcp_write_timeout().

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_timer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 091c53925e4da..25efdae4368a3 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -378,7 +378,7 @@ static void tcp_probe_timer(struct sock *sk)
 			return;
 	}
 
-	if (icsk->icsk_probes_out > max_probes) {
+	if (icsk->icsk_probes_out >= max_probes) {
 abort:		tcp_write_err(sk);
 	} else {
 		/* Only send another probe if we didn't close things up. */
-- 
GitLab


From ec641b39457e17774313b66697a8a1dc070257bd Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Wed, 28 Nov 2018 16:06:44 -0800
Subject: [PATCH 1680/1890] tcp: fix SNMP under-estimation on failed
 retransmission

Previously the SNMP counter LINUX_MIB_TCPRETRANSFAIL is not counting
the TSO/GSO properly on failed retransmission. This patch fixes that.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 3f510cad0b3ec..68b5326f73212 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2920,7 +2920,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
 		TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
 		trace_tcp_retransmit_skb(sk, skb);
 	} else if (err != -EBUSY) {
-		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
+		NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL, segs);
 	}
 	return err;
 }
-- 
GitLab


From e1561fe2dd69dc5dddd69bd73aa65355bdfb048b Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Wed, 28 Nov 2018 16:06:45 -0800
Subject: [PATCH 1681/1890] tcp: fix SNMP TCP timeout under-estimation

Previously the SNMP TCPTIMEOUTS counter has inconsistent accounting:
1. It counts all SYN and SYN-ACK timeouts
2. It counts timeouts in other states except recurring timeouts and
   timeouts after fast recovery or disorder state.

Such selective accounting makes analysis difficult and complicated. For
example the monitoring system needs to collect many other SNMP counters
to infer the total amount of timeout events. This patch makes TCPTIMEOUTS
counter simply counts all the retransmit timeout (SYN or data or FIN).

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_timer.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 25efdae4368a3..f87dbc78b6bcb 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -484,11 +484,12 @@ void tcp_retransmit_timer(struct sock *sk)
 		goto out_reset_timer;
 	}
 
+	__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEOUTS);
 	if (tcp_write_timeout(sk))
 		goto out;
 
 	if (icsk->icsk_retransmits == 0) {
-		int mib_idx;
+		int mib_idx = 0;
 
 		if (icsk->icsk_ca_state == TCP_CA_Recovery) {
 			if (tcp_is_sack(tp))
@@ -503,10 +504,9 @@ void tcp_retransmit_timer(struct sock *sk)
 				mib_idx = LINUX_MIB_TCPSACKFAILURES;
 			else
 				mib_idx = LINUX_MIB_TCPRENOFAILURES;
-		} else {
-			mib_idx = LINUX_MIB_TCPTIMEOUTS;
 		}
-		__NET_INC_STATS(sock_net(sk), mib_idx);
+		if (mib_idx)
+			__NET_INC_STATS(sock_net(sk), mib_idx);
 	}
 
 	tcp_enter_loss(sk);
-- 
GitLab


From 436c9453a1ac0944b82870ef2e0d9be956b396d9 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Thu, 29 Nov 2018 13:53:16 +0800
Subject: [PATCH 1682/1890] virtio-net: keep vnet header zeroed after
 processing XDP

We copy vnet header unconditionally in page_to_skb() this is wrong
since XDP may modify the packet data. So let's keep a zeroed vnet
header for not confusing the conversion between vnet header and skb
metadata.

In the future, we should able to detect whether or not the packet was
modified and keep using the vnet header when packet was not touched.

Fixes: f600b6905015 ("virtio_net: Add XDP support")
Reported-by: Pavel Popa <pashinho1990@gmail.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/virtio_net.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index cecfd77c9f3ca..ea672145f6a66 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -365,7 +365,8 @@ static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx)
 static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 				   struct receive_queue *rq,
 				   struct page *page, unsigned int offset,
-				   unsigned int len, unsigned int truesize)
+				   unsigned int len, unsigned int truesize,
+				   bool hdr_valid)
 {
 	struct sk_buff *skb;
 	struct virtio_net_hdr_mrg_rxbuf *hdr;
@@ -387,7 +388,8 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 	else
 		hdr_padded_len = sizeof(struct padded_vnet_hdr);
 
-	memcpy(hdr, p, hdr_len);
+	if (hdr_valid)
+		memcpy(hdr, p, hdr_len);
 
 	len -= hdr_len;
 	offset += hdr_padded_len;
@@ -739,7 +741,8 @@ static struct sk_buff *receive_big(struct net_device *dev,
 				   struct virtnet_rq_stats *stats)
 {
 	struct page *page = buf;
-	struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE);
+	struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len,
+					  PAGE_SIZE, true);
 
 	stats->bytes += len - vi->hdr_len;
 	if (unlikely(!skb))
@@ -842,7 +845,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 				rcu_read_unlock();
 				put_page(page);
 				head_skb = page_to_skb(vi, rq, xdp_page,
-						       offset, len, PAGE_SIZE);
+						       offset, len,
+						       PAGE_SIZE, false);
 				return head_skb;
 			}
 			break;
@@ -898,7 +902,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 		goto err_skb;
 	}
 
-	head_skb = page_to_skb(vi, rq, page, offset, len, truesize);
+	head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog);
 	curr_skb = head_skb;
 
 	if (unlikely(!curr_skb))
-- 
GitLab


From 35b827b6d06199841a83839e8bb69c0cd13a28be Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Thu, 29 Nov 2018 14:45:39 +0100
Subject: [PATCH 1683/1890] tun: forbid iface creation with rtnl ops

It's not supported right now (the goal of the initial patch was to support
'ip link del' only).

Before the patch:
$ ip link add foo type tun
[  239.632660] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
[snip]
[  239.636410] RIP: 0010:register_netdevice+0x8e/0x3a0

This panic occurs because dev->netdev_ops is not set by tun_setup(). But to
have something usable, it will require more than just setting
netdev_ops.

Fixes: f019a7a594d9 ("tun: Implement ip link del tunXXX")
CC: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tun.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index e244f5d7512a6..cf349e65a66b3 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -2293,9 +2293,9 @@ static void tun_setup(struct net_device *dev)
 static int tun_validate(struct nlattr *tb[], struct nlattr *data[],
 			struct netlink_ext_ack *extack)
 {
-	if (!data)
-		return 0;
-	return -EINVAL;
+	NL_SET_ERR_MSG(extack,
+		       "tun/tap creation via rtnetlink is not supported.");
+	return -EOPNOTSUPP;
 }
 
 static size_t tun_get_size(const struct net_device *dev)
-- 
GitLab


From f1f90e254e46e0a14220e4090041f68256fbe297 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Mon, 26 Nov 2018 10:37:13 -0600
Subject: [PATCH 1684/1890] PCI: Fix incorrect value returned from
 pcie_get_speed_cap()

The macros PCI_EXP_LNKCAP_SLS_*GB are values, not bit masks.  We must mask
the register and compare it against them.

This fixes errors like this:

  amdgpu: [powerplay] failed to send message 261 ret is 0

when a PCIe-v3 card is plugged into a PCIe-v1 slot, because the slot is
being incorrectly reported as PCIe-v3 capable.

6cf57be0f78e, which appeared in v4.17, added pcie_get_speed_cap() with the
incorrect test of PCI_EXP_LNKCAP_SLS as a bitmask.  5d9a63304032, which
appeared in v4.19, changed amdgpu to use pcie_get_speed_cap(), so the
amdgpu bug reports below are regressions in v4.19.

Fixes: 6cf57be0f78e ("PCI: Add pcie_get_speed_cap() to find max supported link speed")
Fixes: 5d9a63304032 ("drm/amdgpu: use pcie functions for link width and speed")
Link: https://bugs.freedesktop.org/show_bug.cgi?id=108704
Link: https://bugs.freedesktop.org/show_bug.cgi?id=108778
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
[bhelgaas: update comment, remove use of PCI_EXP_LNKCAP_SLS_8_0GB and
PCI_EXP_LNKCAP_SLS_16_0GB since those should be covered by PCI_EXP_LNKCAP2,
remove test of PCI_EXP_LNKCAP for zero, since that register is required]
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org	# v4.17+
---
 drivers/pci/pci.c | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index d068f11d08a70..c9d8e3c837de7 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -5556,9 +5556,13 @@ enum pci_bus_speed pcie_get_speed_cap(struct pci_dev *dev)
 	u32 lnkcap2, lnkcap;
 
 	/*
-	 * PCIe r4.0 sec 7.5.3.18 recommends using the Supported Link
-	 * Speeds Vector in Link Capabilities 2 when supported, falling
-	 * back to Max Link Speed in Link Capabilities otherwise.
+	 * Link Capabilities 2 was added in PCIe r3.0, sec 7.8.18.  The
+	 * implementation note there recommends using the Supported Link
+	 * Speeds Vector in Link Capabilities 2 when supported.
+	 *
+	 * Without Link Capabilities 2, i.e., prior to PCIe r3.0, software
+	 * should use the Supported Link Speeds field in Link Capabilities,
+	 * where only 2.5 GT/s and 5.0 GT/s speeds were defined.
 	 */
 	pcie_capability_read_dword(dev, PCI_EXP_LNKCAP2, &lnkcap2);
 	if (lnkcap2) { /* PCIe r3.0-compliant */
@@ -5574,16 +5578,10 @@ enum pci_bus_speed pcie_get_speed_cap(struct pci_dev *dev)
 	}
 
 	pcie_capability_read_dword(dev, PCI_EXP_LNKCAP, &lnkcap);
-	if (lnkcap) {
-		if (lnkcap & PCI_EXP_LNKCAP_SLS_16_0GB)
-			return PCIE_SPEED_16_0GT;
-		else if (lnkcap & PCI_EXP_LNKCAP_SLS_8_0GB)
-			return PCIE_SPEED_8_0GT;
-		else if (lnkcap & PCI_EXP_LNKCAP_SLS_5_0GB)
-			return PCIE_SPEED_5_0GT;
-		else if (lnkcap & PCI_EXP_LNKCAP_SLS_2_5GB)
-			return PCIE_SPEED_2_5GT;
-	}
+	if ((lnkcap & PCI_EXP_LNKCAP_SLS) == PCI_EXP_LNKCAP_SLS_5_0GB)
+		return PCIE_SPEED_5_0GT;
+	else if ((lnkcap & PCI_EXP_LNKCAP_SLS) == PCI_EXP_LNKCAP_SLS_2_5GB)
+		return PCIE_SPEED_2_5GT;
 
 	return PCI_SPEED_UNKNOWN;
 }
-- 
GitLab


From dcb40590e69e306030e944a39d0e4bf54247fb68 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Sat, 1 Dec 2018 10:39:44 -0800
Subject: [PATCH 1685/1890] bpf: refactor bpf_test_run() to separate own
 failures and test program result

After commit f42ee093be29 ("bpf/test_run: support cgroup local
storage") the bpf_test_run() function may fail with -ENOMEM, if
it's not possible to allocate memory for a cgroup local storage.

This error shouldn't be mixed with the return value of the testing
program. Let's add an additional argument with a pointer where to
store the testing program's result; and make bpf_test_run()
return either 0 or -ENOMEM.

Fixes: f42ee093be29 ("bpf/test_run: support cgroup local storage")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Suggested-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 net/bpf/test_run.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index c89c22c49015f..25001913d03b5 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -28,12 +28,13 @@ static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx,
 	return ret;
 }
 
-static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time)
+static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *ret,
+			u32 *time)
 {
 	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { 0 };
 	enum bpf_cgroup_storage_type stype;
 	u64 time_start, time_spent = 0;
-	u32 ret = 0, i;
+	u32 i;
 
 	for_each_cgroup_storage_type(stype) {
 		storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
@@ -49,7 +50,7 @@ static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time)
 		repeat = 1;
 	time_start = ktime_get_ns();
 	for (i = 0; i < repeat; i++) {
-		ret = bpf_test_run_one(prog, ctx, storage);
+		*ret = bpf_test_run_one(prog, ctx, storage);
 		if (need_resched()) {
 			if (signal_pending(current))
 				break;
@@ -65,7 +66,7 @@ static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time)
 	for_each_cgroup_storage_type(stype)
 		bpf_cgroup_storage_free(storage[stype]);
 
-	return ret;
+	return 0;
 }
 
 static int bpf_test_finish(const union bpf_attr *kattr,
@@ -165,7 +166,12 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
 		__skb_push(skb, hh_len);
 	if (is_direct_pkt_access)
 		bpf_compute_data_pointers(skb);
-	retval = bpf_test_run(prog, skb, repeat, &duration);
+	ret = bpf_test_run(prog, skb, repeat, &retval, &duration);
+	if (ret) {
+		kfree_skb(skb);
+		kfree(sk);
+		return ret;
+	}
 	if (!is_l2) {
 		if (skb_headroom(skb) < hh_len) {
 			int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb));
@@ -212,11 +218,14 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
 	rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0);
 	xdp.rxq = &rxqueue->xdp_rxq;
 
-	retval = bpf_test_run(prog, &xdp, repeat, &duration);
+	ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration);
+	if (ret)
+		goto out;
 	if (xdp.data != data + XDP_PACKET_HEADROOM + NET_IP_ALIGN ||
 	    xdp.data_end != xdp.data + size)
 		size = xdp.data_end - xdp.data;
 	ret = bpf_test_finish(kattr, uattr, xdp.data, size, retval, duration);
+out:
 	kfree(data);
 	return ret;
 }
-- 
GitLab


From 320f35b7bf8cccf1997ca3126843535e1b95e9c4 Mon Sep 17 00:00:00 2001
From: Tigran Mkrtchyan <tigran.mkrtchyan@desy.de>
Date: Mon, 26 Nov 2018 18:35:14 +0100
Subject: [PATCH 1686/1890] flexfiles: enforce per-mirror stateid only for v4
 DSes

Since commit bb21ce0ad227 we always enforce per-mirror stateid.
However, this makes sense only for v4+ servers.

Signed-off-by: Tigran Mkrtchyan <tigran.mkrtchyan@desy.de>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/flexfilelayout/flexfilelayout.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 74b36ed883caa..310d7500f6652 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1733,7 +1733,8 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
 	if (fh)
 		hdr->args.fh = fh;
 
-	if (!nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid))
+	if (vers == 4 &&
+		!nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid))
 		goto out_failed;
 
 	/*
@@ -1798,7 +1799,8 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
 	if (fh)
 		hdr->args.fh = fh;
 
-	if (!nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid))
+	if (vers == 4 &&
+		!nfs4_ff_layout_select_ds_stateid(lseg, idx, &hdr->args.stateid))
 		goto out_failed;
 
 	/*
-- 
GitLab


From ad3cba223ac02dc769c3bbe88efe277bbb457566 Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <dave.kleikamp@oracle.com>
Date: Tue, 27 Nov 2018 19:31:30 +0000
Subject: [PATCH 1687/1890] nfs: don't dirty kernel pages read by direct-io

When we use direct_IO with an NFS backing store, we can trigger a
WARNING in __set_page_dirty(), as below, since we're dirtying the page
unnecessarily in nfs_direct_read_completion().

To fix, replicate the logic in commit 53cbf3b157a0 ("fs: direct-io:
don't dirtying pages for ITER_BVEC/ITER_KVEC direct read").

Other filesystems that implement direct_IO handle this; most use
blockdev_direct_IO(). ceph and cifs have similar logic.

mount 127.0.0.1:/export /nfs
dd if=/dev/zero of=/nfs/image bs=1M count=200
losetup --direct-io=on -f /nfs/image
mkfs.btrfs /dev/loop0
mount -t btrfs /dev/loop0 /mnt/

kernel: WARNING: CPU: 0 PID: 8067 at fs/buffer.c:580 __set_page_dirty+0xaf/0xd0
kernel: Modules linked in: loop(E) nfsv3(E) rpcsec_gss_krb5(E) nfsv4(E) dns_resolver(E) nfs(E) fscache(E) nfsd(E) auth_rpcgss(E) nfs_acl(E) lockd(E) grace(E) fuse(E) tun(E) ip6t_rpfilter(E) ipt_REJECT(E) nf_
kernel:  snd_seq(E) snd_seq_device(E) snd_pcm(E) video(E) snd_timer(E) snd(E) soundcore(E) ip_tables(E) xfs(E) libcrc32c(E) sd_mod(E) sr_mod(E) cdrom(E) ata_generic(E) pata_acpi(E) crc32c_intel(E) ahci(E) li
kernel: CPU: 0 PID: 8067 Comm: kworker/0:2 Tainted: G            E     4.20.0-rc1.master.20181111.ol7.x86_64 #1
kernel: Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
kernel: Workqueue: nfsiod rpc_async_release [sunrpc]
kernel: RIP: 0010:__set_page_dirty+0xaf/0xd0
kernel: Code: c3 48 8b 02 f6 c4 04 74 d4 48 89 df e8 ba 05 f7 ff 48 89 c6 eb cb 48 8b 43 08 a8 01 75 1f 48 89 d8 48 8b 00 a8 04 74 02 eb 87 <0f> 0b eb 83 48 83 e8 01 eb 9f 48 83 ea 01 0f 1f 00 eb 8b 48 83 e8
kernel: RSP: 0000:ffffc1c8825b7d78 EFLAGS: 00013046
kernel: RAX: 000fffffc0020089 RBX: fffff2b603308b80 RCX: 0000000000000001
kernel: RDX: 0000000000000001 RSI: ffff9d11478115c8 RDI: ffff9d11478115d0
kernel: RBP: ffffc1c8825b7da0 R08: 0000646f6973666e R09: 8080808080808080
kernel: R10: 0000000000000001 R11: 0000000000000000 R12: ffff9d11478115d0
kernel: R13: ffff9d11478115c8 R14: 0000000000003246 R15: 0000000000000001
kernel: FS:  0000000000000000(0000) GS:ffff9d115ba00000(0000) knlGS:0000000000000000
kernel: CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
kernel: CR2: 00007f408686f640 CR3: 0000000104d8e004 CR4: 00000000000606f0
kernel: DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
kernel: DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
kernel: Call Trace:
kernel:  __set_page_dirty_buffers+0xb6/0x110
kernel:  set_page_dirty+0x52/0xb0
kernel:  nfs_direct_read_completion+0xc4/0x120 [nfs]
kernel:  nfs_pgio_release+0x10/0x20 [nfs]
kernel:  rpc_free_task+0x30/0x70 [sunrpc]
kernel:  rpc_async_release+0x12/0x20 [sunrpc]
kernel:  process_one_work+0x174/0x390
kernel:  worker_thread+0x4f/0x3e0
kernel:  kthread+0x102/0x140
kernel:  ? drain_workqueue+0x130/0x130
kernel:  ? kthread_stop+0x110/0x110
kernel:  ret_from_fork+0x35/0x40
kernel: ---[ end trace 01341980905412c9 ]---

Signed-off-by: Dave Kleikamp <dave.kleikamp@oracle.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>

[forward-ported to v4.20]
Signed-off-by: Calum Mackay <calum.mackay@oracle.com>
Reviewed-by: Dave Kleikamp <dave.kleikamp@oracle.com>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/direct.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index aa12c3063baec..33824a0a57bfe 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -98,8 +98,11 @@ struct nfs_direct_req {
 	struct pnfs_ds_commit_info ds_cinfo;	/* Storage for cinfo */
 	struct work_struct	work;
 	int			flags;
+	/* for write */
 #define NFS_ODIRECT_DO_COMMIT		(1)	/* an unstable reply was received */
 #define NFS_ODIRECT_RESCHED_WRITES	(2)	/* write verification failed */
+	/* for read */
+#define NFS_ODIRECT_SHOULD_DIRTY	(3)	/* dirty user-space page after read */
 	struct nfs_writeverf	verf;		/* unstable write verifier */
 };
 
@@ -412,7 +415,8 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
 		struct nfs_page *req = nfs_list_entry(hdr->pages.next);
 		struct page *page = req->wb_page;
 
-		if (!PageCompound(page) && bytes < hdr->good_bytes)
+		if (!PageCompound(page) && bytes < hdr->good_bytes &&
+		    (dreq->flags == NFS_ODIRECT_SHOULD_DIRTY))
 			set_page_dirty(page);
 		bytes += req->wb_bytes;
 		nfs_list_remove_request(req);
@@ -587,6 +591,9 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter)
 	if (!is_sync_kiocb(iocb))
 		dreq->iocb = iocb;
 
+	if (iter_is_iovec(iter))
+		dreq->flags = NFS_ODIRECT_SHOULD_DIRTY;
+
 	nfs_start_io_direct(inode);
 
 	NFS_I(inode)->read_io += count;
-- 
GitLab


From 9bd11523dc1b9293d1eee2c0b8cfc986a312bbce Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Fri, 30 Nov 2018 12:48:47 -0500
Subject: [PATCH 1688/1890] SUNRPC: call_connect_status() must handle tasks
 that got transmitted

If a task failed to get the write lock in the call to xprt_connect(), then
it will be queued on xprt->sending. In that case, it is possible for it
to get transmitted before the call to call_connect_status(), in which
case it needs to be handled by call_transmit_status() instead.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 net/sunrpc/clnt.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index ae3b8145da35a..e35d642558e7a 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1915,6 +1915,13 @@ call_connect_status(struct rpc_task *task)
 	struct rpc_clnt *clnt = task->tk_client;
 	int status = task->tk_status;
 
+	/* Check if the task was already transmitted */
+	if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) {
+		xprt_end_transmit(task);
+		task->tk_action = call_transmit_status;
+		return;
+	}
+
 	dprint_status(task);
 
 	trace_rpc_connect_status(task);
-- 
GitLab


From 8dae5398ab1ac107b1517e8195ed043d5f422bd0 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 30 Nov 2018 15:39:57 -0500
Subject: [PATCH 1689/1890] SUNRPC: Fix leak of krb5p encode pages

call_encode can be invoked more than once per RPC call. Ensure that
each call to gss_wrap_req_priv does not overwrite pointers to
previously allocated memory.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Cc: stable@kernel.org
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 net/sunrpc/auth_gss/auth_gss.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 5d3f252659f19..ba765473d1f06 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1791,6 +1791,7 @@ priv_release_snd_buf(struct rpc_rqst *rqstp)
 	for (i=0; i < rqstp->rq_enc_pages_num; i++)
 		__free_page(rqstp->rq_enc_pages[i]);
 	kfree(rqstp->rq_enc_pages);
+	rqstp->rq_release_snd_buf = NULL;
 }
 
 static int
@@ -1799,6 +1800,9 @@ alloc_enc_pages(struct rpc_rqst *rqstp)
 	struct xdr_buf *snd_buf = &rqstp->rq_snd_buf;
 	int first, last, i;
 
+	if (rqstp->rq_release_snd_buf)
+		rqstp->rq_release_snd_buf(rqstp);
+
 	if (snd_buf->page_len == 0) {
 		rqstp->rq_enc_pages_num = 0;
 		return 0;
-- 
GitLab


From 71700bb96047f68a0aae3932466fc7c9ad5ce6c0 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Fri, 30 Nov 2018 16:11:15 -0500
Subject: [PATCH 1690/1890] SUNRPC: Fix a memory leak in call_encode()

If we retransmit an RPC request, we currently end up clobbering the
value of req->rq_rcv_buf.bvec that was allocated by the initial call to
xprt_request_prepare(req).

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/xdr.h | 1 -
 net/sunrpc/clnt.c          | 1 +
 net/sunrpc/xprt.c          | 2 ++
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 43106ffa6788a..2ec1280602390 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -72,7 +72,6 @@ xdr_buf_init(struct xdr_buf *buf, void *start, size_t len)
 	buf->head[0].iov_base = start;
 	buf->head[0].iov_len = len;
 	buf->tail[0].iov_len = 0;
-	buf->bvec = NULL;
 	buf->pages = NULL;
 	buf->page_len = 0;
 	buf->flags = 0;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index e35d642558e7a..c6782aa475257 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -2309,6 +2309,7 @@ call_decode(struct rpc_task *task)
 	task->tk_status = 0;
 	/* Note: rpc_verify_header() may have freed the RPC slot */
 	if (task->tk_rqstp == req) {
+		xdr_free_bvec(&req->rq_rcv_buf);
 		req->rq_reply_bytes_recvd = req->rq_rcv_buf.len = 0;
 		if (task->tk_client->cl_discrtry)
 			xprt_conditional_disconnect(req->rq_xprt,
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 86bea4520c4d1..122c91c28e7c1 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1623,6 +1623,8 @@ xprt_request_init(struct rpc_task *task)
 	req->rq_snd_buf.buflen = 0;
 	req->rq_rcv_buf.len = 0;
 	req->rq_rcv_buf.buflen = 0;
+	req->rq_snd_buf.bvec = NULL;
+	req->rq_rcv_buf.bvec = NULL;
 	req->rq_release_snd_buf = NULL;
 	xprt_reset_majortimeo(req);
 	dprintk("RPC: %5u reserved req %p xid %08x\n", task->tk_pid,
-- 
GitLab


From 0a9a4304f3614e25d9de9b63502ca633c01c0d70 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Sat, 1 Dec 2018 23:18:00 -0500
Subject: [PATCH 1691/1890] SUNRPC: Fix a potential race in xprt_connect()

If an asynchronous connection attempt completes while another task is
in xprt_connect(), then the call to rpc_sleep_on() could end up
racing with the call to xprt_wake_pending_tasks().
So add a second test of the connection state after we've put the
task to sleep and set the XPRT_CONNECTING flag, when we know that there
can be no asynchronous connection attempts still in progress.

Fixes: 0b9e79431377d ("SUNRPC: Move the test for XPRT_CONNECTING into...")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 net/sunrpc/xprt.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 122c91c28e7c1..ce927002862a6 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -826,8 +826,15 @@ void xprt_connect(struct rpc_task *task)
 			return;
 		if (xprt_test_and_set_connecting(xprt))
 			return;
-		xprt->stat.connect_start = jiffies;
-		xprt->ops->connect(xprt, task);
+		/* Race breaker */
+		if (!xprt_connected(xprt)) {
+			xprt->stat.connect_start = jiffies;
+			xprt->ops->connect(xprt, task);
+		} else {
+			xprt_clear_connecting(xprt);
+			task->tk_status = 0;
+			rpc_wake_up_queued_task(&xprt->pending, task);
+		}
 	}
 	xprt_release_write(xprt, task);
 }
-- 
GitLab


From 1e8249b8a4e960018e4baca6b523b8a4500af600 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Thu, 29 Nov 2018 17:05:47 +0100
Subject: [PATCH 1692/1890] parisc: Enable -ffunction-sections for modules on
 32-bit kernel

Frank Schreiner reported, that since kernel 4.18 he faces sysfs-warnings
when loading modules on a 32-bit kernel. Here is one such example:

 sysfs: cannot create duplicate filename '/module/nfs/sections/.text'
 CPU: 0 PID: 98 Comm: modprobe Not tainted 4.18.0-2-parisc #1 Debian 4.18.10-2
 Backtrace:
  [<1017ce2c>] show_stack+0x3c/0x50
  [<107a7210>] dump_stack+0x28/0x38
  [<103f900c>] sysfs_warn_dup+0x88/0xac
  [<103f8b1c>] sysfs_add_file_mode_ns+0x164/0x1d0
  [<103f9e70>] internal_create_group+0x11c/0x304
  [<103fa0a0>] sysfs_create_group+0x48/0x60
  [<1022abe8>] load_module.constprop.35+0x1f9c/0x23b8
  [<1022b278>] sys_finit_module+0xd0/0x11c
  [<101831dc>] syscall_exit+0x0/0x14

This warning gets triggered by the fact, that due to commit 24b6c22504a2
("parisc: Build kernel without -ffunction-sections") we now get multiple .text
sections in the kernel modules for which sysfs_create_group() can't create
multiple virtual files.

This patch works around the problem by re-enabling the -ffunction-sections
compiler option for modules, while keeping it disabled for the non-module
kernel code.

Reported-by: Frank Scheiner <frank.scheiner@web.de>
Fixes: 24b6c22504a2 ("parisc: Build kernel without -ffunction-sections")
Cc: <stable@vger.kernel.org> # v4.18+
Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/Makefile | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile
index d047a09d660f0..1085385e1f06a 100644
--- a/arch/parisc/Makefile
+++ b/arch/parisc/Makefile
@@ -71,6 +71,13 @@ ifdef CONFIG_MLONGCALLS
 KBUILD_CFLAGS_KERNEL += -mlong-calls
 endif
 
+# Without this, "ld -r" results in .text sections that are too big (> 0x40000)
+# for branches to reach stubs. And multiple .text sections trigger a warning
+# when creating the sysfs module information section.
+ifndef CONFIG_64BIT
+KBUILD_CFLAGS_MODULE += -ffunction-sections
+endif
+
 # select which processor to optimise for
 cflags-$(CONFIG_PA7000)		+= -march=1.1 -mschedule=7100
 cflags-$(CONFIG_PA7200)		+= -march=1.1 -mschedule=7200
-- 
GitLab


From 2595646791c319cadfdbf271563aac97d0843dc7 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 2 Dec 2018 15:07:55 -0800
Subject: [PATCH 1693/1890] Linux 4.20-rc5

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 0ce4e29ee342f..e9fd22c8445ec 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 4
 PATCHLEVEL = 20
 SUBLEVEL = 0
-EXTRAVERSION = -rc4
+EXTRAVERSION = -rc5
 NAME = Shy Crocodile
 
 # *DOCUMENTATION*
-- 
GitLab


From b4b84da36403bf1142d17fe7b06c2d639c9c9b8b Mon Sep 17 00:00:00 2001
From: Guo Ren <ren_guo@c-sky.com>
Date: Tue, 6 Nov 2018 15:21:14 +0800
Subject: [PATCH 1694/1890] MAINTAINERS: add maintainer for C-SKY drivers

There are two intc drivers and two clocksource drivers, also include
related dt-bindings' documentations.

Change ren_guo@c-sky.com to guoren@kernel.org

Signed-off-by: Guo Ren <ren_guo@c-sky.com>
Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 MAINTAINERS | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index f4855974f3250..9a44cef597df9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3212,11 +3212,16 @@ S:	Maintained
 F:	sound/pci/oxygen/
 
 C-SKY ARCHITECTURE
-M:	Guo Ren <ren_guo@c-sky.com>
+M:	Guo Ren <guoren@kernel.org>
 T:	git https://github.com/c-sky/csky-linux.git
 S:	Supported
 F:	arch/csky/
 F:	Documentation/devicetree/bindings/csky/
+F:	drivers/irqchip/irq-csky-*
+F:	Documentation/devicetree/bindings/interrupt-controller/csky,*
+F:	drivers/clocksource/timer-gx6605s.c
+F:	drivers/clocksource/timer-mp-csky.c
+F:	Documentation/devicetree/bindings/timer/csky,*
 K:	csky
 N:	csky
 
-- 
GitLab


From 63e19c8216bb03a1b4d10f6637d1324ae7a2b612 Mon Sep 17 00:00:00 2001
From: Guo Ren <ren_guo@c-sky.com>
Date: Tue, 20 Nov 2018 16:06:57 +0800
Subject: [PATCH 1695/1890] csky: bugfix tlb_get_pgd error.

It's wrong to mask/unmask highest bit in addr to translate the vaddr
to paddr. We should use PAGE_OFFSET and PHYS_OFFSET.

Wrong implement:
  return ((get_pgd()|(1<<31)) - PHYS_OFFSET) & ~1;

When PHYS_OFFSET=0xc0000000 and get_pgd() return 0xe0000000, it'll
return 0x60000000. It's wrong and should be 0xa0000000.

Now correct it to:
  return ((get_pgd() - PHYS_OFFSET) & ~1) + PAGE_OFFSET;

Signed-off-by: Guo Ren <ren_guo@c-sky.com>
---
 arch/csky/include/asm/mmu_context.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/csky/include/asm/mmu_context.h b/arch/csky/include/asm/mmu_context.h
index c410aa4fff1a1..b2905c0485a72 100644
--- a/arch/csky/include/asm/mmu_context.h
+++ b/arch/csky/include/asm/mmu_context.h
@@ -16,7 +16,7 @@
 
 static inline void tlbmiss_handler_setup_pgd(unsigned long pgd, bool kernel)
 {
-	pgd &= ~(1<<31);
+	pgd -= PAGE_OFFSET;
 	pgd += PHYS_OFFSET;
 	pgd |= 1;
 	setup_pgd(pgd, kernel);
@@ -29,7 +29,7 @@ static inline void tlbmiss_handler_setup_pgd(unsigned long pgd, bool kernel)
 
 static inline unsigned long tlb_get_pgd(void)
 {
-	return ((get_pgd()|(1<<31)) - PHYS_OFFSET) & ~1;
+	return ((get_pgd() - PHYS_OFFSET) & ~1) + PAGE_OFFSET;
 }
 
 #define cpu_context(cpu, mm)	((mm)->context.asid[cpu])
-- 
GitLab


From 87d81a23e24f24ebe014891e8bdf3ff8785031e8 Mon Sep 17 00:00:00 2001
From: Yangtao Li <tiny.windzz@gmail.com>
Date: Tue, 20 Nov 2018 08:30:40 -0500
Subject: [PATCH 1696/1890] sbus: char: add of_node_put()

use of_node_put() to release the refcount.

Signed-off-by: Yangtao Li <tiny.windzz@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/sbus/char/display7seg.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/sbus/char/display7seg.c b/drivers/sbus/char/display7seg.c
index 5c8ed7350a04a..a36e4cf1841d9 100644
--- a/drivers/sbus/char/display7seg.c
+++ b/drivers/sbus/char/display7seg.c
@@ -220,6 +220,7 @@ static int d7s_probe(struct platform_device *op)
 	dev_set_drvdata(&op->dev, p);
 	d7s_device = p;
 	err = 0;
+	of_node_put(opts);
 
 out:
 	return err;
-- 
GitLab


From 6bd520ab7cf69486ea81fd3cdfd2d5a390ad1100 Mon Sep 17 00:00:00 2001
From: Yangtao Li <tiny.windzz@gmail.com>
Date: Tue, 20 Nov 2018 08:38:26 -0500
Subject: [PATCH 1697/1890] drivers/sbus/char: add of_node_put()

use of_node_put() to release the refcount.

Signed-off-by: Yangtao Li <tiny.windzz@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/sbus/char/envctrl.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/sbus/char/envctrl.c b/drivers/sbus/char/envctrl.c
index 56e962a014939..b8481927bfe40 100644
--- a/drivers/sbus/char/envctrl.c
+++ b/drivers/sbus/char/envctrl.c
@@ -910,8 +910,10 @@ static void envctrl_init_i2c_child(struct device_node *dp,
 			for (len = 0; len < PCF8584_MAX_CHANNELS; ++len) {
 				pchild->mon_type[len] = ENVCTRL_NOMON;
 			}
+			of_node_put(root_node);
 			return;
 		}
+		of_node_put(root_node);
 	}
 
 	/* Get the monitor channels. */
-- 
GitLab


From dac097c4546e4c5b16dd303a1e97c1d319c8ab3e Mon Sep 17 00:00:00 2001
From: Yangtao Li <tiny.windzz@gmail.com>
Date: Wed, 21 Nov 2018 10:22:54 -0500
Subject: [PATCH 1698/1890] drivers/tty: add missing of_node_put()

of_find_node_by_path() acquires a reference to the node
returned by it and that reference needs to be dropped by its caller.
This place is not doing this, so fix it.

Signed-off-by: Yangtao Li <tiny.windzz@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/tty/serial/suncore.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/tty/serial/suncore.c b/drivers/tty/serial/suncore.c
index 70a4ea4eaa6e7..990376576970a 100644
--- a/drivers/tty/serial/suncore.c
+++ b/drivers/tty/serial/suncore.c
@@ -112,6 +112,7 @@ void sunserial_console_termios(struct console *con, struct device_node *uart_dp)
 		mode = of_get_property(dp, mode_prop, NULL);
 		if (!mode)
 			mode = "9600,8,n,1,-";
+		of_node_put(dp);
 	}
 
 	cflag = CREAD | HUPCL | CLOCAL;
-- 
GitLab


From a51921c0db3fd26c4ed83dc0ec5d32988fa02aa5 Mon Sep 17 00:00:00 2001
From: Yangtao Li <tiny.windzz@gmail.com>
Date: Tue, 20 Nov 2018 08:02:49 -0500
Subject: [PATCH 1699/1890] ide: pmac: add of_node_put()

use of_node_put() to release the refcount.

Signed-off-by: Yangtao Li <tiny.windzz@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ide/pmac.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/ide/pmac.c b/drivers/ide/pmac.c
index c5b902b86b444..203ed4adc04ae 100644
--- a/drivers/ide/pmac.c
+++ b/drivers/ide/pmac.c
@@ -920,6 +920,7 @@ static u8 pmac_ide_cable_detect(ide_hwif_t *hwif)
 	struct device_node *root = of_find_node_by_path("/");
 	const char *model = of_get_property(root, "model", NULL);
 
+	of_node_put(root);
 	/* Get cable type from device-tree. */
 	if (cable && !strncmp(cable, "80-", 3)) {
 		/* Some drives fail to detect 80c cable in PowerBook */
-- 
GitLab


From 94d0fb159da94cb4522e14d6004bb7acf2ff0387 Mon Sep 17 00:00:00 2001
From: Yangtao Li <tiny.windzz@gmail.com>
Date: Fri, 30 Nov 2018 21:20:48 -0500
Subject: [PATCH 1700/1890] ide: Change to use DEFINE_SHOW_ATTRIBUTE macro

Use DEFINE_SHOW_ATTRIBUTE macro to simplify the code.

Signed-off-by: Yangtao Li <tiny.windzz@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ide/ide-proc.c | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c
index 45c9974303328..4c8c7a620d08d 100644
--- a/drivers/ide/ide-proc.c
+++ b/drivers/ide/ide-proc.c
@@ -614,18 +614,7 @@ static int ide_drivers_show(struct seq_file *s, void *p)
 	return 0;
 }
 
-static int ide_drivers_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, &ide_drivers_show, NULL);
-}
-
-static const struct file_operations ide_drivers_operations = {
-	.owner		= THIS_MODULE,
-	.open		= ide_drivers_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(ide_drivers);
 
 void proc_ide_create(void)
 {
@@ -634,7 +623,7 @@ void proc_ide_create(void)
 	if (!proc_ide_root)
 		return;
 
-	proc_create("drivers", 0, proc_ide_root, &ide_drivers_operations);
+	proc_create("drivers", 0, proc_ide_root, &ide_drivers_fops);
 }
 
 void proc_ide_destroy(void)
-- 
GitLab


From 37c2578c0c40e286bc0d30bdc05290b2058cf66e Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Mon, 3 Dec 2018 00:54:35 +0000
Subject: [PATCH 1701/1890] Drivers: hv: vmbus: Offload the handling of
 channels to two workqueues

vmbus_process_offer() mustn't call channel->sc_creation_callback()
directly for sub-channels, because sc_creation_callback() ->
vmbus_open() may never get the host's response to the
OPEN_CHANNEL message (the host may rescind a channel at any time,
e.g. in the case of hot removing a NIC), and vmbus_onoffer_rescind()
may not wake up the vmbus_open() as it's blocked due to a non-zero
vmbus_connection.offer_in_progress, and finally we have a deadlock.

The above is also true for primary channels, if the related device
drivers use sync probing mode by default.

And, usually the handling of primary channels and sub-channels can
depend on each other, so we should offload them to different
workqueues to avoid possible deadlock, e.g. in sync-probing mode,
NIC1's netvsc_subchan_work() can race with NIC2's netvsc_probe() ->
rtnl_lock(), and causes deadlock: the former gets the rtnl_lock
and waits for all the sub-channels to appear, but the latter
can't get the rtnl_lock and this blocks the handling of sub-channels.

The patch can fix the multiple-NIC deadlock described above for
v3.x kernels (e.g. RHEL 7.x) which don't support async-probing
of devices, and v4.4, v4.9, v4.14 and v4.18 which support async-probing
but don't enable async-probing for Hyper-V drivers (yet).

The patch can also fix the hang issue in sub-channel's handling described
above for all versions of kernels, including v4.19 and v4.20-rc4.

So actually the patch should be applied to all the existing kernels,
not only the kernels that have 8195b1396ec8.

Fixes: 8195b1396ec8 ("hv_netvsc: fix deadlock on hotplug")
Cc: stable@vger.kernel.org
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Cc: K. Y. Srinivasan <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel_mgmt.c | 189 +++++++++++++++++++++++++-------------
 drivers/hv/connection.c   |  24 ++++-
 drivers/hv/hyperv_vmbus.h |   7 ++
 include/linux/hyperv.h    |   7 ++
 4 files changed, 161 insertions(+), 66 deletions(-)

diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 6277597d3d581..edd34c167a9bd 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -435,61 +435,16 @@ void vmbus_free_channels(void)
 	}
 }
 
-/*
- * vmbus_process_offer - Process the offer by creating a channel/device
- * associated with this offer
- */
-static void vmbus_process_offer(struct vmbus_channel *newchannel)
+/* Note: the function can run concurrently for primary/sub channels. */
+static void vmbus_add_channel_work(struct work_struct *work)
 {
-	struct vmbus_channel *channel;
-	bool fnew = true;
+	struct vmbus_channel *newchannel =
+		container_of(work, struct vmbus_channel, add_channel_work);
+	struct vmbus_channel *primary_channel = newchannel->primary_channel;
 	unsigned long flags;
 	u16 dev_type;
 	int ret;
 
-	/* Make sure this is a new offer */
-	mutex_lock(&vmbus_connection.channel_mutex);
-
-	/*
-	 * Now that we have acquired the channel_mutex,
-	 * we can release the potentially racing rescind thread.
-	 */
-	atomic_dec(&vmbus_connection.offer_in_progress);
-
-	list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
-		if (!uuid_le_cmp(channel->offermsg.offer.if_type,
-			newchannel->offermsg.offer.if_type) &&
-			!uuid_le_cmp(channel->offermsg.offer.if_instance,
-				newchannel->offermsg.offer.if_instance)) {
-			fnew = false;
-			break;
-		}
-	}
-
-	if (fnew)
-		list_add_tail(&newchannel->listentry,
-			      &vmbus_connection.chn_list);
-
-	mutex_unlock(&vmbus_connection.channel_mutex);
-
-	if (!fnew) {
-		/*
-		 * Check to see if this is a sub-channel.
-		 */
-		if (newchannel->offermsg.offer.sub_channel_index != 0) {
-			/*
-			 * Process the sub-channel.
-			 */
-			newchannel->primary_channel = channel;
-			spin_lock_irqsave(&channel->lock, flags);
-			list_add_tail(&newchannel->sc_list, &channel->sc_list);
-			channel->num_sc++;
-			spin_unlock_irqrestore(&channel->lock, flags);
-		} else {
-			goto err_free_chan;
-		}
-	}
-
 	dev_type = hv_get_dev_type(newchannel);
 
 	init_vp_index(newchannel, dev_type);
@@ -507,27 +462,26 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
 	/*
 	 * This state is used to indicate a successful open
 	 * so that when we do close the channel normally, we
-	 * can cleanup properly
+	 * can cleanup properly.
 	 */
 	newchannel->state = CHANNEL_OPEN_STATE;
 
-	if (!fnew) {
-		struct hv_device *dev
-			= newchannel->primary_channel->device_obj;
+	if (primary_channel != NULL) {
+		/* newchannel is a sub-channel. */
+		struct hv_device *dev = primary_channel->device_obj;
 
 		if (vmbus_add_channel_kobj(dev, newchannel))
-			goto err_free_chan;
+			goto err_deq_chan;
+
+		if (primary_channel->sc_creation_callback != NULL)
+			primary_channel->sc_creation_callback(newchannel);
 
-		if (channel->sc_creation_callback != NULL)
-			channel->sc_creation_callback(newchannel);
 		newchannel->probe_done = true;
 		return;
 	}
 
 	/*
-	 * Start the process of binding this offer to the driver
-	 * We need to set the DeviceObject field before calling
-	 * vmbus_child_dev_add()
+	 * Start the process of binding the primary channel to the driver
 	 */
 	newchannel->device_obj = vmbus_device_create(
 		&newchannel->offermsg.offer.if_type,
@@ -556,13 +510,28 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
 
 err_deq_chan:
 	mutex_lock(&vmbus_connection.channel_mutex);
-	list_del(&newchannel->listentry);
+
+	/*
+	 * We need to set the flag, otherwise
+	 * vmbus_onoffer_rescind() can be blocked.
+	 */
+	newchannel->probe_done = true;
+
+	if (primary_channel == NULL) {
+		list_del(&newchannel->listentry);
+	} else {
+		spin_lock_irqsave(&primary_channel->lock, flags);
+		list_del(&newchannel->sc_list);
+		spin_unlock_irqrestore(&primary_channel->lock, flags);
+	}
+
 	mutex_unlock(&vmbus_connection.channel_mutex);
 
 	if (newchannel->target_cpu != get_cpu()) {
 		put_cpu();
 		smp_call_function_single(newchannel->target_cpu,
-					 percpu_channel_deq, newchannel, true);
+					 percpu_channel_deq,
+					 newchannel, true);
 	} else {
 		percpu_channel_deq(newchannel);
 		put_cpu();
@@ -570,14 +539,104 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
 
 	vmbus_release_relid(newchannel->offermsg.child_relid);
 
-err_free_chan:
 	free_channel(newchannel);
 }
 
+/*
+ * vmbus_process_offer - Process the offer by creating a channel/device
+ * associated with this offer
+ */
+static void vmbus_process_offer(struct vmbus_channel *newchannel)
+{
+	struct vmbus_channel *channel;
+	struct workqueue_struct *wq;
+	unsigned long flags;
+	bool fnew = true;
+
+	mutex_lock(&vmbus_connection.channel_mutex);
+
+	/*
+	 * Now that we have acquired the channel_mutex,
+	 * we can release the potentially racing rescind thread.
+	 */
+	atomic_dec(&vmbus_connection.offer_in_progress);
+
+	list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
+		if (!uuid_le_cmp(channel->offermsg.offer.if_type,
+				 newchannel->offermsg.offer.if_type) &&
+		    !uuid_le_cmp(channel->offermsg.offer.if_instance,
+				 newchannel->offermsg.offer.if_instance)) {
+			fnew = false;
+			break;
+		}
+	}
+
+	if (fnew)
+		list_add_tail(&newchannel->listentry,
+			      &vmbus_connection.chn_list);
+	else {
+		/*
+		 * Check to see if this is a valid sub-channel.
+		 */
+		if (newchannel->offermsg.offer.sub_channel_index == 0) {
+			mutex_unlock(&vmbus_connection.channel_mutex);
+			/*
+			 * Don't call free_channel(), because newchannel->kobj
+			 * is not initialized yet.
+			 */
+			kfree(newchannel);
+			WARN_ON_ONCE(1);
+			return;
+		}
+		/*
+		 * Process the sub-channel.
+		 */
+		newchannel->primary_channel = channel;
+		spin_lock_irqsave(&channel->lock, flags);
+		list_add_tail(&newchannel->sc_list, &channel->sc_list);
+		spin_unlock_irqrestore(&channel->lock, flags);
+	}
+
+	mutex_unlock(&vmbus_connection.channel_mutex);
+
+	/*
+	 * vmbus_process_offer() mustn't call channel->sc_creation_callback()
+	 * directly for sub-channels, because sc_creation_callback() ->
+	 * vmbus_open() may never get the host's response to the
+	 * OPEN_CHANNEL message (the host may rescind a channel at any time,
+	 * e.g. in the case of hot removing a NIC), and vmbus_onoffer_rescind()
+	 * may not wake up the vmbus_open() as it's blocked due to a non-zero
+	 * vmbus_connection.offer_in_progress, and finally we have a deadlock.
+	 *
+	 * The above is also true for primary channels, if the related device
+	 * drivers use sync probing mode by default.
+	 *
+	 * And, usually the handling of primary channels and sub-channels can
+	 * depend on each other, so we should offload them to different
+	 * workqueues to avoid possible deadlock, e.g. in sync-probing mode,
+	 * NIC1's netvsc_subchan_work() can race with NIC2's netvsc_probe() ->
+	 * rtnl_lock(), and causes deadlock: the former gets the rtnl_lock
+	 * and waits for all the sub-channels to appear, but the latter
+	 * can't get the rtnl_lock and this blocks the handling of
+	 * sub-channels.
+	 */
+	INIT_WORK(&newchannel->add_channel_work, vmbus_add_channel_work);
+	wq = fnew ? vmbus_connection.handle_primary_chan_wq :
+		    vmbus_connection.handle_sub_chan_wq;
+	queue_work(wq, &newchannel->add_channel_work);
+}
+
 /*
  * We use this state to statically distribute the channel interrupt load.
  */
 static int next_numa_node_id;
+/*
+ * init_vp_index() accesses global variables like next_numa_node_id, and
+ * it can run concurrently for primary channels and sub-channels: see
+ * vmbus_process_offer(), so we need the lock to protect the global
+ * variables.
+ */
+static DEFINE_SPINLOCK(bind_channel_to_cpu_lock);
 
 /*
  * Starting with Win8, we can statically distribute the incoming
@@ -613,6 +672,8 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
 		return;
 	}
 
+	spin_lock(&bind_channel_to_cpu_lock);
+
 	/*
 	 * Based on the channel affinity policy, we will assign the NUMA
 	 * nodes.
@@ -695,6 +756,8 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
 	channel->target_cpu = cur_cpu;
 	channel->target_vp = hv_cpu_number_to_vp_number(cur_cpu);
 
+	spin_unlock(&bind_channel_to_cpu_lock);
+
 	free_cpumask_var(available_mask);
 }
 
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index f4d08c8ac7f8f..4fe117b761ce0 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -190,6 +190,20 @@ int vmbus_connect(void)
 		goto cleanup;
 	}
 
+	vmbus_connection.handle_primary_chan_wq =
+		create_workqueue("hv_pri_chan");
+	if (!vmbus_connection.handle_primary_chan_wq) {
+		ret = -ENOMEM;
+		goto cleanup;
+	}
+
+	vmbus_connection.handle_sub_chan_wq =
+		create_workqueue("hv_sub_chan");
+	if (!vmbus_connection.handle_sub_chan_wq) {
+		ret = -ENOMEM;
+		goto cleanup;
+	}
+
 	INIT_LIST_HEAD(&vmbus_connection.chn_msg_list);
 	spin_lock_init(&vmbus_connection.channelmsg_lock);
 
@@ -280,10 +294,14 @@ void vmbus_disconnect(void)
 	 */
 	vmbus_initiate_unload(false);
 
-	if (vmbus_connection.work_queue) {
-		drain_workqueue(vmbus_connection.work_queue);
+	if (vmbus_connection.handle_sub_chan_wq)
+		destroy_workqueue(vmbus_connection.handle_sub_chan_wq);
+
+	if (vmbus_connection.handle_primary_chan_wq)
+		destroy_workqueue(vmbus_connection.handle_primary_chan_wq);
+
+	if (vmbus_connection.work_queue)
 		destroy_workqueue(vmbus_connection.work_queue);
-	}
 
 	if (vmbus_connection.int_page) {
 		free_pages((unsigned long)vmbus_connection.int_page, 0);
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 72eaba3d50fc2..87d3d7da78f87 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -335,7 +335,14 @@ struct vmbus_connection {
 	struct list_head chn_list;
 	struct mutex channel_mutex;
 
+	/*
+	 * An offer message is handled first on the work_queue, and then
+	 * is further handled on handle_primary_chan_wq or
+	 * handle_sub_chan_wq.
+	 */
 	struct workqueue_struct *work_queue;
+	struct workqueue_struct *handle_primary_chan_wq;
+	struct workqueue_struct *handle_sub_chan_wq;
 };
 
 
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index b3e24368930a2..14131b6fae68d 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -905,6 +905,13 @@ struct vmbus_channel {
 
 	bool probe_done;
 
+	/*
+	 * We must offload the handling of the primary/sub channels
+	 * from the single-threaded vmbus_connection.work_queue to
+	 * two different workqueue, otherwise we can block
+	 * vmbus_connection.work_queue and hang: see vmbus_process_offer().
+	 */
+	struct work_struct add_channel_work;
 };
 
 static inline bool is_hvsock_channel(const struct vmbus_channel *c)
-- 
GitLab


From 54947cd64c1b8290f64bb2958e343c07270e3a58 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 3 Dec 2018 10:44:15 +0100
Subject: [PATCH 1702/1890] ALSA: hda/realtek - Fix speaker output regression
 on Thinkpad T570

We've got a regression report for some Thinkpad models (at least
T570s) which shows the too low speaker output volume.  The bisection
leaded to the commit 61fcf8ece9b6 ("ALSA: hda/realtek - Enable Thinkpad
Dock device for ALC298 platform"), and it's basically adding the two
pin configurations for the dock, and looks harmless.

The real culprit seems, though, that the DAC assignment for the
speaker pin is implicitly assumed on these devices, i.e. pin NID 0x14
to be coupled with DAC NID 0x03.  When more pins are configured by the
commit above, the auto-parser changes the DAC assignment, and this
resulted in the regression.

As a workaround, just provide the fixed pin / DAC mapping table for
this Thinkpad fixup function.  It's no generic solution, but the
problem itself is pretty much device-specific, so must be good
enough.

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1554304
Fixes: 61fcf8ece9b6 ("ALSA: hda/realtek - Enable Thinkpad Dock device for ALC298 platform")
Cc: <stable@vger.kernel.org>
Reported-and-tested-by: Jeremy Cline <jcline@redhat.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 06f93032d0ccf..802f1f1b3a193 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -4988,9 +4988,18 @@ static void alc_fixup_tpt470_dock(struct hda_codec *codec,
 		{ 0x19, 0x21a11010 }, /* dock mic */
 		{ }
 	};
+	/* Assure the speaker pin to be coupled with DAC NID 0x03; otherwise
+	 * the speaker output becomes too low by some reason on Thinkpads with
+	 * ALC298 codec
+	 */
+	static hda_nid_t preferred_pairs[] = {
+		0x14, 0x03, 0x17, 0x02, 0x21, 0x02,
+		0
+	};
 	struct alc_spec *spec = codec->spec;
 
 	if (action == HDA_FIXUP_ACT_PRE_PROBE) {
+		spec->gen.preferred_dacs = preferred_pairs;
 		spec->parse_flags = HDA_PINCFG_NO_HP_FIXUP;
 		snd_hda_apply_pincfgs(codec, pincfgs);
 	} else if (action == HDA_FIXUP_ACT_INIT) {
-- 
GitLab


From 182ddd16194cd082f25fa1b063dae3c7c5cce384 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Mon, 3 Dec 2018 11:38:11 +0100
Subject: [PATCH 1703/1890] x86/boot: Clear RSDP address in boot_params for
 broken loaders

Gunnar Krueger reported a systemd-boot failure and bisected it down to:

  e6e094e053af75 ("x86/acpi, x86/boot: Take RSDP address from boot params if available")

In case a broken boot loader doesn't clear its 'struct boot_params', clear
rsdp_addr in sanitize_boot_params().

Reported-by: Gunnar Krueger <taijian@posteo.de>
Tested-by: Gunnar Krueger <taijian@posteo.de>
Signed-off-by: Juergen Gross <jgross@suse.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: bp@alien8.de
Cc: sstabellini@kernel.org
Fixes: e6e094e053af75 ("x86/acpi, x86/boot: Take RSDP address from boot params if available")
Link: http://lkml.kernel.org/r/20181203103811.17056-1-jgross@suse.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/bootparam_utils.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h
index a07ffd23e4dd6..f6f6ef436599a 100644
--- a/arch/x86/include/asm/bootparam_utils.h
+++ b/arch/x86/include/asm/bootparam_utils.h
@@ -36,6 +36,7 @@ static void sanitize_boot_params(struct boot_params *boot_params)
 	 */
 	if (boot_params->sentinel) {
 		/* fields in boot_params are left uninitialized, clear them */
+		boot_params->acpi_rsdp_addr = 0;
 		memset(&boot_params->ext_ramdisk_image, 0,
 		       (char *)&boot_params->efi_info -
 			(char *)&boot_params->ext_ramdisk_image);
-- 
GitLab


From 0a7f54ea0e1ed5a8f666e2adee7943b3991b4987 Mon Sep 17 00:00:00 2001
From: Tomi Valkeinen <tomi.valkeinen@ti.com>
Date: Mon, 26 Nov 2018 11:24:47 +0200
Subject: [PATCH 1704/1890] drm/omap: fix bus_flags for panel-dpi

panel-dpi used to convey the bus-flags via the videomode, but recent
changes changed the use of videomode to DRM's drm_display_mode which
does not contain bus-flags. This broke panel-dpi, which didn't
explicitly store the bus-flags into dssdev->bus_flags.

Fix this by setting dssdev->bus_flags. Also change the bus_flags type to
u32, as that is the type used in the DRM framework, and we would get a
warning with drm_bus_flags_from_videomode() otherwise.

Fixes: 3fbda31e814868d8477ddf52d74b7b8f596578e8 ("drm/omap: Split mode fixup and mode set from encoder enable")
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
Reported-by: H. Nikolaus Schaller <hns@goldelico.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181126092447.11864-1-tomi.valkeinen@ti.com
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
---
 drivers/gpu/drm/omapdrm/displays/panel-dpi.c | 1 +
 drivers/gpu/drm/omapdrm/dss/omapdss.h        | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/omapdrm/displays/panel-dpi.c b/drivers/gpu/drm/omapdrm/displays/panel-dpi.c
index 1f8161b041be6..465120809eb3b 100644
--- a/drivers/gpu/drm/omapdrm/displays/panel-dpi.c
+++ b/drivers/gpu/drm/omapdrm/displays/panel-dpi.c
@@ -177,6 +177,7 @@ static int panel_dpi_probe(struct platform_device *pdev)
 	dssdev->type = OMAP_DISPLAY_TYPE_DPI;
 	dssdev->owner = THIS_MODULE;
 	dssdev->of_ports = BIT(0);
+	drm_bus_flags_from_videomode(&ddata->vm, &dssdev->bus_flags);
 
 	omapdss_display_init(dssdev);
 	omapdss_device_register(dssdev);
diff --git a/drivers/gpu/drm/omapdrm/dss/omapdss.h b/drivers/gpu/drm/omapdrm/dss/omapdss.h
index 1f698a95a94a5..33e15cb77efa7 100644
--- a/drivers/gpu/drm/omapdrm/dss/omapdss.h
+++ b/drivers/gpu/drm/omapdrm/dss/omapdss.h
@@ -432,7 +432,7 @@ struct omap_dss_device {
 	const struct omap_dss_driver *driver;
 	const struct omap_dss_device_ops *ops;
 	unsigned long ops_flags;
-	unsigned long bus_flags;
+	u32 bus_flags;
 
 	/* helper variable for driver suspend/resume */
 	bool activate_after_resume;
-- 
GitLab


From 0a02d495531e0bbe32c3f7361232ba61b981199a Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.com>
Date: Wed, 21 Nov 2018 17:09:12 +0100
Subject: [PATCH 1705/1890] drm/omap: populate DSI platform bus earlier

After the changes from 4.20 the DSI encoder tries to find the
attached panel before populating the DSI bus. If the panel is
not found -EPROBE_DEFER is returned, so the DSI bus is never
populated and the panel never added.

Fix this by populating the DSI bus before searching for the
video sink in dsi_init_output().

Fixes: 27d624527d992 ("drm/omap: dss: Acquire next dssdev at probe time")
Acked-by: Pavel Machek <pavel@ucw.cz>
Tested-by: Tony Lindgren <tony@atomide.com>
Tested-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181121160916.22017-3-sebastian.reichel@collabora.com
---
 drivers/gpu/drm/omapdrm/dss/dsi.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/omapdrm/dss/dsi.c b/drivers/gpu/drm/omapdrm/dss/dsi.c
index 0a485c5b982eb..00a9c2ab9e6c8 100644
--- a/drivers/gpu/drm/omapdrm/dss/dsi.c
+++ b/drivers/gpu/drm/omapdrm/dss/dsi.c
@@ -5418,9 +5418,15 @@ static int dsi_probe(struct platform_device *pdev)
 		dsi->num_lanes_supported = 3;
 	}
 
+	r = of_platform_populate(dev->of_node, NULL, NULL, dev);
+	if (r) {
+		DSSERR("Failed to populate DSI child devices: %d\n", r);
+		goto err_pm_disable;
+	}
+
 	r = dsi_init_output(dsi);
 	if (r)
-		goto err_pm_disable;
+		goto err_of_depopulate;
 
 	r = dsi_probe_of(dsi);
 	if (r) {
@@ -5428,22 +5434,16 @@ static int dsi_probe(struct platform_device *pdev)
 		goto err_uninit_output;
 	}
 
-	r = of_platform_populate(dev->of_node, NULL, NULL, dev);
-	if (r) {
-		DSSERR("Failed to populate DSI child devices: %d\n", r);
-		goto err_uninit_output;
-	}
-
 	r = component_add(&pdev->dev, &dsi_component_ops);
 	if (r)
-		goto err_of_depopulate;
+		goto err_uninit_output;
 
 	return 0;
 
-err_of_depopulate:
-	of_platform_depopulate(dev);
 err_uninit_output:
 	dsi_uninit_output(dsi);
+err_of_depopulate:
+	of_platform_depopulate(dev);
 err_pm_disable:
 	pm_runtime_disable(dev);
 	return r;
-- 
GitLab


From 3c613a3bddd322c87677604d81e267fee22c6f14 Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.com>
Date: Wed, 21 Nov 2018 17:09:14 +0100
Subject: [PATCH 1706/1890] drm/omap: fix incorrect union usage

The DSI encoder sets dssdev->ops->dsi.set_config, which is stored at the
same offset as dssdev->ops->hdmi.set_hdmi_mode. The code in omap_encoder
only checks if dssdev->ops->hdmi.set_hdmi_mode is NULL. Due to the way
union works, it won't be NULL if dsi.set_config is set. This means
dsi_set_config will be called with config=hdmi_mode=false=NULL parameter
resulting in a NULL dereference. Also the dereference happens while
console is locked, so kernel hangs without any debug output without
"fb.lockless_register_fb=1" parameter.

This restructures the code, so that the HDMI mode is only configured
for HDMI output types.

Fixes: 83910ad3f51fb ("drm/omap: Move most omap_dss_driver operations to omap_dss_device_ops")
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
Tested-by: Tony Lindgren <tony@atomide.com>
[tomi.valkeinen@ti.com: dropped the safeguard]
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181121160916.22017-5-sebastian.reichel@collabora.com
---
 drivers/gpu/drm/omapdrm/omap_encoder.c | 58 +++++++++++++++-----------
 1 file changed, 33 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/omapdrm/omap_encoder.c b/drivers/gpu/drm/omapdrm/omap_encoder.c
index 452e625f6ce33..933ebc9f9faaa 100644
--- a/drivers/gpu/drm/omapdrm/omap_encoder.c
+++ b/drivers/gpu/drm/omapdrm/omap_encoder.c
@@ -52,17 +52,44 @@ static const struct drm_encoder_funcs omap_encoder_funcs = {
 	.destroy = omap_encoder_destroy,
 };
 
+static void omap_encoder_hdmi_mode_set(struct drm_encoder *encoder,
+				       struct drm_display_mode *adjusted_mode)
+{
+	struct drm_device *dev = encoder->dev;
+	struct omap_encoder *omap_encoder = to_omap_encoder(encoder);
+	struct omap_dss_device *dssdev = omap_encoder->output;
+	struct drm_connector *connector;
+	bool hdmi_mode;
+
+	hdmi_mode = false;
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		if (connector->encoder == encoder) {
+			hdmi_mode = omap_connector_get_hdmi_mode(connector);
+			break;
+		}
+	}
+
+	if (dssdev->ops->hdmi.set_hdmi_mode)
+		dssdev->ops->hdmi.set_hdmi_mode(dssdev, hdmi_mode);
+
+	if (hdmi_mode && dssdev->ops->hdmi.set_infoframe) {
+		struct hdmi_avi_infoframe avi;
+		int r;
+
+		r = drm_hdmi_avi_infoframe_from_display_mode(&avi, adjusted_mode,
+							     false);
+		if (r == 0)
+			dssdev->ops->hdmi.set_infoframe(dssdev, &avi);
+	}
+}
+
 static void omap_encoder_mode_set(struct drm_encoder *encoder,
 				  struct drm_display_mode *mode,
 				  struct drm_display_mode *adjusted_mode)
 {
-	struct drm_device *dev = encoder->dev;
 	struct omap_encoder *omap_encoder = to_omap_encoder(encoder);
-	struct drm_connector *connector;
 	struct omap_dss_device *dssdev;
 	struct videomode vm = { 0 };
-	bool hdmi_mode;
-	int r;
 
 	drm_display_mode_to_videomode(adjusted_mode, &vm);
 
@@ -112,27 +139,8 @@ static void omap_encoder_mode_set(struct drm_encoder *encoder,
 	}
 
 	/* Set the HDMI mode and HDMI infoframe if applicable. */
-	hdmi_mode = false;
-	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
-		if (connector->encoder == encoder) {
-			hdmi_mode = omap_connector_get_hdmi_mode(connector);
-			break;
-		}
-	}
-
-	dssdev = omap_encoder->output;
-
-	if (dssdev->ops->hdmi.set_hdmi_mode)
-		dssdev->ops->hdmi.set_hdmi_mode(dssdev, hdmi_mode);
-
-	if (hdmi_mode && dssdev->ops->hdmi.set_infoframe) {
-		struct hdmi_avi_infoframe avi;
-
-		r = drm_hdmi_avi_infoframe_from_display_mode(&avi, adjusted_mode,
-							     false);
-		if (r == 0)
-			dssdev->ops->hdmi.set_infoframe(dssdev, &avi);
-	}
+	if (omap_encoder->output->output_type == OMAP_DISPLAY_TYPE_HDMI)
+		omap_encoder_hdmi_mode_set(encoder, adjusted_mode);
 }
 
 static void omap_encoder_disable(struct drm_encoder *encoder)
-- 
GitLab


From 7f9743abaa79d3491fee7a0446461b0fdd2aeaa5 Mon Sep 17 00:00:00 2001
From: Jeykumar Sankaran <jsanka@codeaurora.org>
Date: Wed, 10 Oct 2018 14:11:16 -0700
Subject: [PATCH 1707/1890] drm/msm: validate display and event threads

While creating display and event threads per crtc, validate
them before setting their priorities.

changes in v2:
	- use dev_warn (Abhinav Kumar)
changes in v3:
	- fix compilation error
changes in v4:
	- Remove Change-Id (Sean Paul)
	- Keep logging within 80 char limit (Sean Paul)

Reviewed-by: Sean Paul <seanpaul@chromium.org>
Signed-off-by: Jeykumar Sankaran <jsanka@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
Signed-off-by: Sean Paul <seanpaul@chromium.org>
---
 drivers/gpu/drm/msm/msm_drv.c | 49 ++++++++++++-----------------------
 1 file changed, 16 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 4904d0d414094..dcff812c63d07 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -553,17 +553,18 @@ static int msm_drm_init(struct device *dev, struct drm_driver *drv)
 			kthread_run(kthread_worker_fn,
 				&priv->disp_thread[i].worker,
 				"crtc_commit:%d", priv->disp_thread[i].crtc_id);
-		ret = sched_setscheduler(priv->disp_thread[i].thread,
-							SCHED_FIFO, &param);
-		if (ret)
-			pr_warn("display thread priority update failed: %d\n",
-									ret);
-
 		if (IS_ERR(priv->disp_thread[i].thread)) {
 			dev_err(dev, "failed to create crtc_commit kthread\n");
 			priv->disp_thread[i].thread = NULL;
+			goto err_msm_uninit;
 		}
 
+		ret = sched_setscheduler(priv->disp_thread[i].thread,
+					 SCHED_FIFO, &param);
+		if (ret)
+			dev_warn(dev, "disp_thread set priority failed: %d\n",
+				 ret);
+
 		/* initialize event thread */
 		priv->event_thread[i].crtc_id = priv->crtcs[i]->base.id;
 		kthread_init_worker(&priv->event_thread[i].worker);
@@ -572,6 +573,12 @@ static int msm_drm_init(struct device *dev, struct drm_driver *drv)
 			kthread_run(kthread_worker_fn,
 				&priv->event_thread[i].worker,
 				"crtc_event:%d", priv->event_thread[i].crtc_id);
+		if (IS_ERR(priv->event_thread[i].thread)) {
+			dev_err(dev, "failed to create crtc_event kthread\n");
+			priv->event_thread[i].thread = NULL;
+			goto err_msm_uninit;
+		}
+
 		/**
 		 * event thread should also run at same priority as disp_thread
 		 * because it is handling frame_done events. A lower priority
@@ -580,34 +587,10 @@ static int msm_drm_init(struct device *dev, struct drm_driver *drv)
 		 * failure at crtc commit level.
 		 */
 		ret = sched_setscheduler(priv->event_thread[i].thread,
-							SCHED_FIFO, &param);
+					 SCHED_FIFO, &param);
 		if (ret)
-			pr_warn("display event thread priority update failed: %d\n",
-									ret);
-
-		if (IS_ERR(priv->event_thread[i].thread)) {
-			dev_err(dev, "failed to create crtc_event kthread\n");
-			priv->event_thread[i].thread = NULL;
-		}
-
-		if ((!priv->disp_thread[i].thread) ||
-				!priv->event_thread[i].thread) {
-			/* clean up previously created threads if any */
-			for ( ; i >= 0; i--) {
-				if (priv->disp_thread[i].thread) {
-					kthread_stop(
-						priv->disp_thread[i].thread);
-					priv->disp_thread[i].thread = NULL;
-				}
-
-				if (priv->event_thread[i].thread) {
-					kthread_stop(
-						priv->event_thread[i].thread);
-					priv->event_thread[i].thread = NULL;
-				}
-			}
-			goto err_msm_uninit;
-		}
+			dev_warn(dev, "event_thread set priority failed:%d\n",
+				 ret);
 	}
 
 	ret = drm_vblank_init(ddev, priv->num_crtcs);
-- 
GitLab


From ee4456359640defe3f51cc6b728bfce4bc444c9e Mon Sep 17 00:00:00 2001
From: Todor Tomov <todor.tomov@linaro.org>
Date: Fri, 19 Oct 2018 17:07:22 +0300
Subject: [PATCH 1708/1890] drm/msm/hdmi: Enable HPD after HDMI IRQ is set up

SoCs that contain MDP5 have a top level wrapper called MDSS that
manages locks, power and irq for the sub-blocks within it.

Irq for HDMI is also routed through the MDSS.

Shortly after the Hot Plug Detection (HPD) is enabled in HDMI,
HDMI interrupts are recieved by the MDSS interrupt handler.
However at this moment the HDMI irq is still not mapped to
the MDSS irq domain so the HDMI irq handler cannot be called
to process the interrupts.

This leads to a flood of HDMI interrupts on CPU 0.

If we are lucky to have the HDMI initialization running on a
different CPU, it will eventually map the HDMI irq to MDSS irq
domain, the next HDMI interrupt will be handled by the HDMI irq
handler, the interrupt flood will stop and we will recover.

If the HDMI initialization is running on CPU 0, then it cannot
complete and there is nothing to stop the interrupt flood on
CPU 0. The system is stuck.

Fix this by moving the HPD enablement after the HDMI irq is
mapped to the MDSS irq domain.

Signed-off-by: Todor Tomov <todor.tomov@linaro.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
Signed-off-by: Sean Paul <seanpaul@chromium.org>
---
 drivers/gpu/drm/msm/hdmi/hdmi.c           |  6 ++++++
 drivers/gpu/drm/msm/hdmi/hdmi.h           |  1 +
 drivers/gpu/drm/msm/hdmi/hdmi_connector.c | 10 ++--------
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.c b/drivers/gpu/drm/msm/hdmi/hdmi.c
index c79659ca57065..33e083f71a170 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi.c
@@ -332,6 +332,12 @@ int msm_hdmi_modeset_init(struct hdmi *hdmi,
 		goto fail;
 	}
 
+	ret = msm_hdmi_hpd_enable(hdmi->connector);
+	if (ret < 0) {
+		DRM_DEV_ERROR(&hdmi->pdev->dev, "failed to enable HPD: %d\n", ret);
+		goto fail;
+	}
+
 	encoder->bridge = hdmi->bridge;
 
 	priv->bridges[priv->num_bridges++]       = hdmi->bridge;
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.h b/drivers/gpu/drm/msm/hdmi/hdmi.h
index accc9a61611d3..5c5df6ab2a573 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi.h
+++ b/drivers/gpu/drm/msm/hdmi/hdmi.h
@@ -245,6 +245,7 @@ void msm_hdmi_bridge_destroy(struct drm_bridge *bridge);
 
 void msm_hdmi_connector_irq(struct drm_connector *connector);
 struct drm_connector *msm_hdmi_connector_init(struct hdmi *hdmi);
+int msm_hdmi_hpd_enable(struct drm_connector *connector);
 
 /*
  * i2c adapter for ddc:
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_connector.c b/drivers/gpu/drm/msm/hdmi/hdmi_connector.c
index e9c9a0af508e8..30e908dfded7e 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi_connector.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi_connector.c
@@ -167,8 +167,9 @@ static void enable_hpd_clocks(struct hdmi *hdmi, bool enable)
 	}
 }
 
-static int hpd_enable(struct hdmi_connector *hdmi_connector)
+int msm_hdmi_hpd_enable(struct drm_connector *connector)
 {
+	struct hdmi_connector *hdmi_connector = to_hdmi_connector(connector);
 	struct hdmi *hdmi = hdmi_connector->hdmi;
 	const struct hdmi_platform_config *config = hdmi->config;
 	struct device *dev = &hdmi->pdev->dev;
@@ -450,7 +451,6 @@ struct drm_connector *msm_hdmi_connector_init(struct hdmi *hdmi)
 {
 	struct drm_connector *connector = NULL;
 	struct hdmi_connector *hdmi_connector;
-	int ret;
 
 	hdmi_connector = kzalloc(sizeof(*hdmi_connector), GFP_KERNEL);
 	if (!hdmi_connector)
@@ -471,12 +471,6 @@ struct drm_connector *msm_hdmi_connector_init(struct hdmi *hdmi)
 	connector->interlace_allowed = 0;
 	connector->doublescan_allowed = 0;
 
-	ret = hpd_enable(hdmi_connector);
-	if (ret) {
-		dev_err(&hdmi->pdev->dev, "failed to enable HPD: %d\n", ret);
-		return ERR_PTR(ret);
-	}
-
 	drm_connector_attach_encoder(connector, hdmi->encoder);
 
 	return connector;
-- 
GitLab


From d52900437e95e2799953af21971a54fe5f8e33ff Mon Sep 17 00:00:00 2001
From: Jordan Crouse <jcrouse@codeaurora.org>
Date: Mon, 15 Oct 2018 15:31:54 -0600
Subject: [PATCH 1709/1890] drm/msm/gpu: Don't map command buffers with
 nr_relocs equal to 0

If a command buffer doesn't have any relocs assigned to it there then
is no need to map it in the kernel address space.

Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
Signed-off-by: Sean Paul <seanpaul@chromium.org>
---
 drivers/gpu/drm/msm/msm_gem_submit.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 7a7923e6220da..66673ea9bf6f8 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -317,6 +317,9 @@ static int submit_reloc(struct msm_gem_submit *submit, struct msm_gem_object *ob
 	uint32_t *ptr;
 	int ret = 0;
 
+	if (!nr_relocs)
+		return 0;
+
 	if (offset % 4) {
 		DRM_ERROR("non-aligned cmdstream buffer: %u\n", offset);
 		return -EINVAL;
-- 
GitLab


From 081679c51ef2fd7b23cf9ddb7d775b17f75de18c Mon Sep 17 00:00:00 2001
From: Sean Paul <seanpaul@chromium.org>
Date: Tue, 16 Oct 2018 11:52:45 -0400
Subject: [PATCH 1710/1890] drm/msm: dpu: Don't set legacy plane->crtc pointer

It causes a WARN in drm_atomic_get_plane_state(), and is not used by
atomic (or dpu).

Signed-off-by: Sean Paul <seanpaul@chromium.org>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
index d4530d60767b8..ca169f013a14e 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
@@ -1594,7 +1594,6 @@ struct drm_crtc *dpu_crtc_init(struct drm_device *dev, struct drm_plane *plane,
 				NULL);
 
 	drm_crtc_helper_add(crtc, &dpu_crtc_helper_funcs);
-	plane->crtc = crtc;
 
 	/* save user friendly CRTC name for later */
 	snprintf(dpu_crtc->name, DPU_CRTC_NAME_SIZE, "crtc%u", crtc->base.id);
-- 
GitLab


From 2189463dba3eac10d7264a40ede12fc1a3c06fb1 Mon Sep 17 00:00:00 2001
From: Robert Foss <robert.foss@collabora.com>
Date: Mon, 5 Nov 2018 11:13:12 +0100
Subject: [PATCH 1711/1890] drm/msm: Move fence put to where failure occurs

If dma_fence_wait fails to wait for a supplied in-fence in
msm_ioctl_gem_submit, make sure we release that in-fence.

Also remove this dma_fence_put() from the 'out' label.

Signed-off-by: Robert Foss <robert.foss@collabora.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: stable@vger.kernel.org
Signed-off-by: Rob Clark <robdclark@gmail.com>
Signed-off-by: Sean Paul <seanpaul@chromium.org>
---
 drivers/gpu/drm/msm/msm_gem_submit.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 66673ea9bf6f8..6942604ad9a8b 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -413,7 +413,6 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 	struct msm_file_private *ctx = file->driver_priv;
 	struct msm_gem_submit *submit;
 	struct msm_gpu *gpu = priv->gpu;
-	struct dma_fence *in_fence = NULL;
 	struct sync_file *sync_file = NULL;
 	struct msm_gpu_submitqueue *queue;
 	struct msm_ringbuffer *ring;
@@ -446,6 +445,8 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 	ring = gpu->rb[queue->prio];
 
 	if (args->flags & MSM_SUBMIT_FENCE_FD_IN) {
+		struct dma_fence *in_fence;
+
 		in_fence = sync_file_get_fence(args->fence_fd);
 
 		if (!in_fence)
@@ -455,11 +456,13 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 		 * Wait if the fence is from a foreign context, or if the fence
 		 * array contains any fence from a foreign context.
 		 */
-		if (!dma_fence_match_context(in_fence, ring->fctx->context)) {
+		ret = 0;
+		if (!dma_fence_match_context(in_fence, ring->fctx->context))
 			ret = dma_fence_wait(in_fence, true);
-			if (ret)
-				return ret;
-		}
+
+		dma_fence_put(in_fence);
+		if (ret)
+			return ret;
 	}
 
 	ret = mutex_lock_interruptible(&dev->struct_mutex);
@@ -585,8 +588,6 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 	}
 
 out:
-	if (in_fence)
-		dma_fence_put(in_fence);
 	submit_cleanup(submit);
 	if (ret)
 		msm_gem_submit_free(submit);
-- 
GitLab


From 671465198e39706a125d832857e987ec47ce3ae8 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Wed, 14 Nov 2018 09:55:34 +0000
Subject: [PATCH 1712/1890] drm/msm/hdmi: Drop pointless static qualifier in
 msm_hdmi_bind()

There is no need to have the 'struct hdmi_platform_config *hdmi_cfg'
variable static since new value always be assigned before use it.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Rob Clark <robdclark@gmail.com>
Signed-off-by: Sean Paul <seanpaul@chromium.org>
---
 drivers/gpu/drm/msm/hdmi/hdmi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.c b/drivers/gpu/drm/msm/hdmi/hdmi.c
index 33e083f71a170..adbdce3aeda00 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi.c
@@ -577,7 +577,7 @@ static int msm_hdmi_bind(struct device *dev, struct device *master, void *data)
 {
 	struct drm_device *drm = dev_get_drvdata(master);
 	struct msm_drm_private *priv = drm->dev_private;
-	static struct hdmi_platform_config *hdmi_cfg;
+	struct hdmi_platform_config *hdmi_cfg;
 	struct hdmi *hdmi;
 	struct device_node *of_node = dev->of_node;
 	int i, err;
-- 
GitLab


From ce25aa3ee6939d83979cccf7adc5737cba9a0cb7 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Fri, 16 Nov 2018 19:25:26 +0800
Subject: [PATCH 1713/1890] drm/msm: dpu: Fix "WARNING: invalid free of devm_
 allocated data"

'dpu_enc' is a member of 'drm_enc'
And 'drm_enc' got allocated with devm_kzalloc in dpu_encoder_init.

This gives this error message:
./drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c:459:1-6:
 WARNING: invalid free of devm_ allocated data

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Rob Clark <robdclark@gmail.com>
Signed-off-by: Sean Paul <seanpaul@chromium.org>
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
index 96cdf06e7da21..d31d8281424ef 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
@@ -488,8 +488,6 @@ static void dpu_encoder_destroy(struct drm_encoder *drm_enc)
 
 	drm_encoder_cleanup(drm_enc);
 	mutex_destroy(&dpu_enc->enc_lock);
-
-	kfree(dpu_enc);
 }
 
 void dpu_encoder_helper_split_config(
-- 
GitLab


From d9a75a6201d9367a452de59d7759b708633f1a1f Mon Sep 17 00:00:00 2001
From: Jayant Shekhar <jshekhar@codeaurora.org>
Date: Fri, 30 Nov 2018 17:22:50 +0530
Subject: [PATCH 1714/1890] drm/msm/dpu: Ignore alpha for XBGR8888 format

Alpha enable in the pixel format will help in
selecting the blend rule. By keeping alpha enable
to true we are allowing foreground alpha to blend
with the layer. If alpha is don't care, then we
should not allow pixel alpha to be part of blend
equation.

Signed-off-by: Jayant Shekhar <jshekhar@codeaurora.org>
Signed-off-by: Sean Paul <seanpaul@chromium.org>
---
 drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c
index bfcd165e96dfe..d743e7ca6a3c8 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c
@@ -216,7 +216,7 @@ static const struct dpu_format dpu_format_map[] = {
 	INTERLEAVED_RGB_FMT(XBGR8888,
 		COLOR_8BIT, COLOR_8BIT, COLOR_8BIT, COLOR_8BIT,
 		C2_R_Cr, C0_G_Y, C1_B_Cb, C3_ALPHA, 4,
-		true, 4, 0,
+		false, 4, 0,
 		DPU_FETCH_LINEAR, 1),
 
 	INTERLEAVED_RGB_FMT(RGBA8888,
-- 
GitLab


From 098336deb946f37a70afc0979af388b615c378bf Mon Sep 17 00:00:00 2001
From: Wen Yang <wen.yang99@zte.com.cn>
Date: Thu, 29 Nov 2018 14:01:50 +0800
Subject: [PATCH 1715/1890] drm/msm: Fix error return checking

The error checks on ret for a negative error return always fails because
the return value of iommu_map_sg() is unsigned and can never be negative.

Detected with Coccinelle:
drivers/gpu/drm/msm/msm_iommu.c:69:9-12: WARNING: Unsigned expression
compared with zero: ret < 0

Signed-off-by: Wen Yang <wen.yang99@zte.com.cn>
CC: Rob Clark <robdclark@gmail.com>
CC: David Airlie <airlied@linux.ie>
CC: Julia Lawall <julia.lawall@lip6.fr>
CC: linux-arm-msm@vger.kernel.org
CC: dri-devel@lists.freedesktop.org
CC: freedreno@lists.freedesktop.org
CC: linux-kernel@vger.kernel.org
Signed-off-by: Sean Paul <seanpaul@chromium.org>
---
 drivers/gpu/drm/msm/msm_iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c
index b23d33622f374..2a90aa4caec08 100644
--- a/drivers/gpu/drm/msm/msm_iommu.c
+++ b/drivers/gpu/drm/msm/msm_iommu.c
@@ -66,7 +66,7 @@ static int msm_iommu_map(struct msm_mmu *mmu, uint64_t iova,
 //	pm_runtime_get_sync(mmu->dev);
 	ret = iommu_map_sg(iommu->domain, iova, sgt->sgl, sgt->nents, prot);
 //	pm_runtime_put_sync(mmu->dev);
-	WARN_ON(ret < 0);
+	WARN_ON(!ret);
 
 	return (ret == len) ? 0 : -EINVAL;
 }
-- 
GitLab


From d7fd67653f847327e545bdb198b901ee124afd7c Mon Sep 17 00:00:00 2001
From: Junwei Zhang <Jerry.Zhang@amd.com>
Date: Thu, 22 Nov 2018 17:53:00 +0800
Subject: [PATCH 1716/1890] drm/amdgpu: update mc firmware image for polaris12
 variants

Some new variants require updated firmware.

Signed-off-by: Junwei Zhang <Jerry.Zhang@amd.com>
Reviewed-by: Evan Quan <evan.quan@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 1d3265c97b704..93fa6b60576e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -56,6 +56,7 @@ MODULE_FIRMWARE("amdgpu/tonga_mc.bin");
 MODULE_FIRMWARE("amdgpu/polaris11_mc.bin");
 MODULE_FIRMWARE("amdgpu/polaris10_mc.bin");
 MODULE_FIRMWARE("amdgpu/polaris12_mc.bin");
+MODULE_FIRMWARE("amdgpu/polaris12_k_mc.bin");
 
 static const u32 golden_settings_tonga_a11[] =
 {
@@ -231,6 +232,15 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev)
 		break;
 	case CHIP_POLARIS12:
 		chip_name = "polaris12";
+		if (((adev->pdev->device == 0x6987) &&
+		     ((adev->pdev->revision == 0xc0) ||
+		      (adev->pdev->revision == 0xc3))) ||
+		    ((adev->pdev->device == 0x6981) &&
+		     ((adev->pdev->revision == 0x00) ||
+		      (adev->pdev->revision == 0x01) ||
+		      (adev->pdev->revision == 0x10)))) {
+			chip_name = "polaris12_k";
+		}
 		break;
 	case CHIP_FIJI:
 	case CHIP_CARRIZO:
-- 
GitLab


From 5f8cf712582617d523120df67d392059eaf2fc4b Mon Sep 17 00:00:00 2001
From: Hui Peng <benquike@gmail.com>
Date: Mon, 3 Dec 2018 16:09:34 +0100
Subject: [PATCH 1717/1890] ALSA: usb-audio: Fix UAF decrement if card has no
 live interfaces in card.c

If a USB sound card reports 0 interfaces, an error condition is triggered
and the function usb_audio_probe errors out. In the error path, there was a
use-after-free vulnerability where the memory object of the card was first
freed, followed by a decrement of the number of active chips. Moving the
decrement above the atomic_dec fixes the UAF.

[ The original problem was introduced in 3.1 kernel, while it was
  developed in a different form.  The Fixes tag below indicates the
  original commit but it doesn't mean that the patch is applicable
  cleanly. -- tiwai ]

Fixes: 362e4e49abe5 ("ALSA: usb-audio - clear chip->probing on error exit")
Reported-by: Hui Peng <benquike@gmail.com>
Reported-by: Mathias Payer <mathias.payer@nebelwelt.net>
Signed-off-by: Hui Peng <benquike@gmail.com>
Signed-off-by: Mathias Payer <mathias.payer@nebelwelt.net>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/usb/card.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sound/usb/card.c b/sound/usb/card.c
index 2bfe4e80a6b92..a105947eaf55c 100644
--- a/sound/usb/card.c
+++ b/sound/usb/card.c
@@ -682,9 +682,12 @@ static int usb_audio_probe(struct usb_interface *intf,
 
  __error:
 	if (chip) {
+		/* chip->active is inside the chip->card object,
+		 * decrement before memory is possibly returned.
+		 */
+		atomic_dec(&chip->active);
 		if (!chip->num_interfaces)
 			snd_card_free(chip->card);
-		atomic_dec(&chip->active);
 	}
 	mutex_unlock(&register_mutex);
 	return err;
-- 
GitLab


From a81a7c9c9ea3042ab02d66ac35def74abf091c15 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Wed, 28 Nov 2018 23:25:41 -0500
Subject: [PATCH 1718/1890] drm/amdgpu/gmc8: update MC firmware for polaris
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some variants require different MC firmware images.

Acked-by: Christian KÃ¶nig <christian.koenig@amd.com>
Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 29 ++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 93fa6b60576e9..8dd9bae9e2dae 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -56,6 +56,8 @@ MODULE_FIRMWARE("amdgpu/tonga_mc.bin");
 MODULE_FIRMWARE("amdgpu/polaris11_mc.bin");
 MODULE_FIRMWARE("amdgpu/polaris10_mc.bin");
 MODULE_FIRMWARE("amdgpu/polaris12_mc.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_k_mc.bin");
+MODULE_FIRMWARE("amdgpu/polaris10_k_mc.bin");
 MODULE_FIRMWARE("amdgpu/polaris12_k_mc.bin");
 
 static const u32 golden_settings_tonga_a11[] =
@@ -225,22 +227,39 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev)
 		chip_name = "tonga";
 		break;
 	case CHIP_POLARIS11:
-		chip_name = "polaris11";
+		if (((adev->pdev->device == 0x67ef) &&
+		     ((adev->pdev->revision == 0xe0) ||
+		      (adev->pdev->revision == 0xe5))) ||
+		    ((adev->pdev->device == 0x67ff) &&
+		     ((adev->pdev->revision == 0xcf) ||
+		      (adev->pdev->revision == 0xef) ||
+		      (adev->pdev->revision == 0xff))))
+			chip_name = "polaris11_k";
+		else if ((adev->pdev->device == 0x67ef) &&
+			 (adev->pdev->revision == 0xe2))
+			chip_name = "polaris11_k";
+		else
+			chip_name = "polaris11";
 		break;
 	case CHIP_POLARIS10:
-		chip_name = "polaris10";
+		if ((adev->pdev->device == 0x67df) &&
+		    ((adev->pdev->revision == 0xe1) ||
+		     (adev->pdev->revision == 0xf7)))
+			chip_name = "polaris10_k";
+		else
+			chip_name = "polaris10";
 		break;
 	case CHIP_POLARIS12:
-		chip_name = "polaris12";
 		if (((adev->pdev->device == 0x6987) &&
 		     ((adev->pdev->revision == 0xc0) ||
 		      (adev->pdev->revision == 0xc3))) ||
 		    ((adev->pdev->device == 0x6981) &&
 		     ((adev->pdev->revision == 0x00) ||
 		      (adev->pdev->revision == 0x01) ||
-		      (adev->pdev->revision == 0x10)))) {
+		      (adev->pdev->revision == 0x10))))
 			chip_name = "polaris12_k";
-		}
+		else
+			chip_name = "polaris12";
 		break;
 	case CHIP_FIJI:
 	case CHIP_CARRIZO:
-- 
GitLab


From b52b6738cc2d50d2a8f4d0095bcb5a86716008a5 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Wed, 28 Nov 2018 23:28:17 -0500
Subject: [PATCH 1719/1890] drm/amdgpu/gmc8: always load MC firmware in the
 driver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some power features rely on the driver loaded version so always
load the MC firmware from the driver even if the vbios loaded
a version already.

Acked-by: Christian KÃ¶nig <christian.koenig@amd.com>
Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 8dd9bae9e2dae..747c068379dc7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -366,7 +366,7 @@ static int gmc_v8_0_polaris_mc_load_microcode(struct amdgpu_device *adev)
 	const struct mc_firmware_header_v1_0 *hdr;
 	const __le32 *fw_data = NULL;
 	const __le32 *io_mc_regs = NULL;
-	u32 data, vbios_version;
+	u32 data;
 	int i, ucode_size, regs_size;
 
 	/* Skip MC ucode loading on SR-IOV capable boards.
@@ -377,13 +377,6 @@ static int gmc_v8_0_polaris_mc_load_microcode(struct amdgpu_device *adev)
 	if (amdgpu_sriov_bios(adev))
 		return 0;
 
-	WREG32(mmMC_SEQ_IO_DEBUG_INDEX, 0x9F);
-	data = RREG32(mmMC_SEQ_IO_DEBUG_DATA);
-	vbios_version = data & 0xf;
-
-	if (vbios_version == 0)
-		return 0;
-
 	if (!adev->gmc.fw)
 		return -EINVAL;
 
-- 
GitLab


From f02be8279a15c28302d72efa9ff21bdc586d5357 Mon Sep 17 00:00:00 2001
From: Evan Quan <evan.quan@amd.com>
Date: Wed, 28 Nov 2018 16:33:30 +0800
Subject: [PATCH 1720/1890] drm/amd/powerplay: support new pptable upload on
 Vega20

New pptable upload through sysfs interface is supported.

Signed-off-by: Evan Quan <evan.quan@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
index 85119c2bdcc8f..a2a7e0e94aa6b 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
@@ -80,7 +80,9 @@ int phm_enable_dynamic_state_management(struct pp_hwmgr *hwmgr)
 	PHM_FUNC_CHECK(hwmgr);
 	adev = hwmgr->adev;
 
-	if (smum_is_dpm_running(hwmgr) && !amdgpu_passthrough(adev)) {
+	/* Skip for suspend/resume case */
+	if (smum_is_dpm_running(hwmgr) && !amdgpu_passthrough(adev)
+		&& adev->in_suspend) {
 		pr_info("dpm has been enabled\n");
 		return 0;
 	}
-- 
GitLab


From 10cb3e6b63bf4266a5198813526fdd7259ffb8be Mon Sep 17 00:00:00 2001
From: Evan Quan <evan.quan@amd.com>
Date: Wed, 28 Nov 2018 16:36:12 +0800
Subject: [PATCH 1721/1890] drm/amd/powerplay: issue pre-display settings for
 display change event

For display config change event only, pre-display config settings are
needed.

Signed-off-by: Evan Quan <evan.quan@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c  | 3 +++
 drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c | 2 --
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
index 47ac923697394..0173d04800245 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
@@ -352,6 +352,9 @@ int hwmgr_handle_task(struct pp_hwmgr *hwmgr, enum amd_pp_task task_id,
 
 	switch (task_id) {
 	case AMD_PP_TASK_DISPLAY_CONFIG_CHANGE:
+		ret = phm_pre_display_configuration_changed(hwmgr);
+		if (ret)
+			return ret;
 		ret = phm_set_cpu_power_state(hwmgr);
 		if (ret)
 			return ret;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c
index 91ffb7bc4ee72..56437866d1206 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c
@@ -265,8 +265,6 @@ int psm_adjust_power_state_dynamic(struct pp_hwmgr *hwmgr, bool skip,
 	if (skip)
 		return 0;
 
-	phm_pre_display_configuration_changed(hwmgr);
-
 	phm_display_configuration_changed(hwmgr);
 
 	if (hwmgr->ps)
-- 
GitLab


From 106ea7feb98f1183e0cb5fe23da14fde662a8b15 Mon Sep 17 00:00:00 2001
From: Evan Quan <evan.quan@amd.com>
Date: Wed, 28 Nov 2018 16:41:02 +0800
Subject: [PATCH 1722/1890] drm/amd/powerplay: support SoftMin/Max setting for
 some specific DPM

For some case, no need to force SoftMin/Max settings for all DPMs.
It's OK to force on some specific DPM only.

Signed-off-by: Evan Quan <evan.quan@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../drm/amd/powerplay/hwmgr/vega20_hwmgr.c    | 54 +++++++++++--------
 1 file changed, 32 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
index b4eadd47f3a44..3367dd30cdd0d 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
@@ -1660,14 +1660,15 @@ static uint32_t vega20_find_highest_dpm_level(
 	return i;
 }
 
-static int vega20_upload_dpm_min_level(struct pp_hwmgr *hwmgr)
+static int vega20_upload_dpm_min_level(struct pp_hwmgr *hwmgr, uint32_t feature_mask)
 {
 	struct vega20_hwmgr *data =
 			(struct vega20_hwmgr *)(hwmgr->backend);
 	uint32_t min_freq;
 	int ret = 0;
 
-	if (data->smu_features[GNLD_DPM_GFXCLK].enabled) {
+	if (data->smu_features[GNLD_DPM_GFXCLK].enabled &&
+	   (feature_mask & FEATURE_DPM_GFXCLK_MASK)) {
 		min_freq = data->dpm_table.gfx_table.dpm_state.soft_min_level;
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
 					hwmgr, PPSMC_MSG_SetSoftMinByFreq,
@@ -1676,7 +1677,8 @@ static int vega20_upload_dpm_min_level(struct pp_hwmgr *hwmgr)
 					return ret);
 	}
 
-	if (data->smu_features[GNLD_DPM_UCLK].enabled) {
+	if (data->smu_features[GNLD_DPM_UCLK].enabled &&
+	   (feature_mask & FEATURE_DPM_UCLK_MASK)) {
 		min_freq = data->dpm_table.mem_table.dpm_state.soft_min_level;
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
 					hwmgr, PPSMC_MSG_SetSoftMinByFreq,
@@ -1692,7 +1694,8 @@ static int vega20_upload_dpm_min_level(struct pp_hwmgr *hwmgr)
 					return ret);
 	}
 
-	if (data->smu_features[GNLD_DPM_UVD].enabled) {
+	if (data->smu_features[GNLD_DPM_UVD].enabled &&
+	   (feature_mask & FEATURE_DPM_UVD_MASK)) {
 		min_freq = data->dpm_table.vclk_table.dpm_state.soft_min_level;
 
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
@@ -1710,7 +1713,8 @@ static int vega20_upload_dpm_min_level(struct pp_hwmgr *hwmgr)
 					return ret);
 	}
 
-	if (data->smu_features[GNLD_DPM_VCE].enabled) {
+	if (data->smu_features[GNLD_DPM_VCE].enabled &&
+	   (feature_mask & FEATURE_DPM_VCE_MASK)) {
 		min_freq = data->dpm_table.eclk_table.dpm_state.soft_min_level;
 
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
@@ -1720,7 +1724,8 @@ static int vega20_upload_dpm_min_level(struct pp_hwmgr *hwmgr)
 					return ret);
 	}
 
-	if (data->smu_features[GNLD_DPM_SOCCLK].enabled) {
+	if (data->smu_features[GNLD_DPM_SOCCLK].enabled &&
+	   (feature_mask & FEATURE_DPM_SOCCLK_MASK)) {
 		min_freq = data->dpm_table.soc_table.dpm_state.soft_min_level;
 
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
@@ -1733,14 +1738,15 @@ static int vega20_upload_dpm_min_level(struct pp_hwmgr *hwmgr)
 	return ret;
 }
 
-static int vega20_upload_dpm_max_level(struct pp_hwmgr *hwmgr)
+static int vega20_upload_dpm_max_level(struct pp_hwmgr *hwmgr, uint32_t feature_mask)
 {
 	struct vega20_hwmgr *data =
 			(struct vega20_hwmgr *)(hwmgr->backend);
 	uint32_t max_freq;
 	int ret = 0;
 
-	if (data->smu_features[GNLD_DPM_GFXCLK].enabled) {
+	if (data->smu_features[GNLD_DPM_GFXCLK].enabled &&
+	   (feature_mask & FEATURE_DPM_GFXCLK_MASK)) {
 		max_freq = data->dpm_table.gfx_table.dpm_state.soft_max_level;
 
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
@@ -1750,7 +1756,8 @@ static int vega20_upload_dpm_max_level(struct pp_hwmgr *hwmgr)
 					return ret);
 	}
 
-	if (data->smu_features[GNLD_DPM_UCLK].enabled) {
+	if (data->smu_features[GNLD_DPM_UCLK].enabled &&
+	   (feature_mask & FEATURE_DPM_UCLK_MASK)) {
 		max_freq = data->dpm_table.mem_table.dpm_state.soft_max_level;
 
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
@@ -1760,7 +1767,8 @@ static int vega20_upload_dpm_max_level(struct pp_hwmgr *hwmgr)
 					return ret);
 	}
 
-	if (data->smu_features[GNLD_DPM_UVD].enabled) {
+	if (data->smu_features[GNLD_DPM_UVD].enabled &&
+	   (feature_mask & FEATURE_DPM_UVD_MASK)) {
 		max_freq = data->dpm_table.vclk_table.dpm_state.soft_max_level;
 
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
@@ -1777,7 +1785,8 @@ static int vega20_upload_dpm_max_level(struct pp_hwmgr *hwmgr)
 					return ret);
 	}
 
-	if (data->smu_features[GNLD_DPM_VCE].enabled) {
+	if (data->smu_features[GNLD_DPM_VCE].enabled &&
+	   (feature_mask & FEATURE_DPM_VCE_MASK)) {
 		max_freq = data->dpm_table.eclk_table.dpm_state.soft_max_level;
 
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
@@ -1787,7 +1796,8 @@ static int vega20_upload_dpm_max_level(struct pp_hwmgr *hwmgr)
 					return ret);
 	}
 
-	if (data->smu_features[GNLD_DPM_SOCCLK].enabled) {
+	if (data->smu_features[GNLD_DPM_SOCCLK].enabled &&
+	   (feature_mask & FEATURE_DPM_SOCCLK_MASK)) {
 		max_freq = data->dpm_table.soc_table.dpm_state.soft_max_level;
 
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
@@ -2126,12 +2136,12 @@ static int vega20_force_dpm_highest(struct pp_hwmgr *hwmgr)
 		data->dpm_table.mem_table.dpm_state.soft_max_level =
 		data->dpm_table.mem_table.dpm_levels[soft_level].value;
 
-	ret = vega20_upload_dpm_min_level(hwmgr);
+	ret = vega20_upload_dpm_min_level(hwmgr, 0xFFFFFFFF);
 	PP_ASSERT_WITH_CODE(!ret,
 			"Failed to upload boot level to highest!",
 			return ret);
 
-	ret = vega20_upload_dpm_max_level(hwmgr);
+	ret = vega20_upload_dpm_max_level(hwmgr, 0xFFFFFFFF);
 	PP_ASSERT_WITH_CODE(!ret,
 			"Failed to upload dpm max level to highest!",
 			return ret);
@@ -2158,12 +2168,12 @@ static int vega20_force_dpm_lowest(struct pp_hwmgr *hwmgr)
 		data->dpm_table.mem_table.dpm_state.soft_max_level =
 		data->dpm_table.mem_table.dpm_levels[soft_level].value;
 
-	ret = vega20_upload_dpm_min_level(hwmgr);
+	ret = vega20_upload_dpm_min_level(hwmgr, 0xFFFFFFFF);
 	PP_ASSERT_WITH_CODE(!ret,
 			"Failed to upload boot level to highest!",
 			return ret);
 
-	ret = vega20_upload_dpm_max_level(hwmgr);
+	ret = vega20_upload_dpm_max_level(hwmgr, 0xFFFFFFFF);
 	PP_ASSERT_WITH_CODE(!ret,
 			"Failed to upload dpm max level to highest!",
 			return ret);
@@ -2176,12 +2186,12 @@ static int vega20_unforce_dpm_levels(struct pp_hwmgr *hwmgr)
 {
 	int ret = 0;
 
-	ret = vega20_upload_dpm_min_level(hwmgr);
+	ret = vega20_upload_dpm_min_level(hwmgr, 0xFFFFFFFF);
 	PP_ASSERT_WITH_CODE(!ret,
 			"Failed to upload DPM Bootup Levels!",
 			return ret);
 
-	ret = vega20_upload_dpm_max_level(hwmgr);
+	ret = vega20_upload_dpm_max_level(hwmgr, 0xFFFFFFFF);
 	PP_ASSERT_WITH_CODE(!ret,
 			"Failed to upload DPM Max Levels!",
 			return ret);
@@ -2239,12 +2249,12 @@ static int vega20_force_clock_level(struct pp_hwmgr *hwmgr,
 		data->dpm_table.gfx_table.dpm_state.soft_max_level =
 			data->dpm_table.gfx_table.dpm_levels[soft_max_level].value;
 
-		ret = vega20_upload_dpm_min_level(hwmgr);
+		ret = vega20_upload_dpm_min_level(hwmgr, FEATURE_DPM_GFXCLK_MASK);
 		PP_ASSERT_WITH_CODE(!ret,
 			"Failed to upload boot level to lowest!",
 			return ret);
 
-		ret = vega20_upload_dpm_max_level(hwmgr);
+		ret = vega20_upload_dpm_max_level(hwmgr, FEATURE_DPM_GFXCLK_MASK);
 		PP_ASSERT_WITH_CODE(!ret,
 			"Failed to upload dpm max level to highest!",
 			return ret);
@@ -2259,12 +2269,12 @@ static int vega20_force_clock_level(struct pp_hwmgr *hwmgr,
 		data->dpm_table.mem_table.dpm_state.soft_max_level =
 			data->dpm_table.mem_table.dpm_levels[soft_max_level].value;
 
-		ret = vega20_upload_dpm_min_level(hwmgr);
+		ret = vega20_upload_dpm_min_level(hwmgr, FEATURE_DPM_UCLK_MASK);
 		PP_ASSERT_WITH_CODE(!ret,
 			"Failed to upload boot level to lowest!",
 			return ret);
 
-		ret = vega20_upload_dpm_max_level(hwmgr);
+		ret = vega20_upload_dpm_max_level(hwmgr, FEATURE_DPM_UCLK_MASK);
 		PP_ASSERT_WITH_CODE(!ret,
 			"Failed to upload dpm max level to highest!",
 			return ret);
-- 
GitLab


From d9f5b7f5dd0fa74a89de5a7ac1e26366f211ccee Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Mon, 3 Dec 2018 17:50:55 +0300
Subject: [PATCH 1723/1890] clk: mvebu: Off by one bugs in cp110_of_clk_get()

These > comparisons should be >= to prevent reading beyond the end of
of the clk_data->hws[] buffer.

The clk_data->hws[] array is allocated in cp110_syscon_common_probe()
when we do:
	cp110_clk_data = devm_kzalloc(dev, sizeof(*cp110_clk_data) +
				      sizeof(struct clk_hw *) * CP110_CLK_NUM,
				      GFP_KERNEL);
As you can see, it has CP110_CLK_NUM elements which is equivalent to
CP110_MAX_CORE_CLOCKS + CP110_MAX_GATABLE_CLOCKS.

Fixes: d3da3eaef7f4 ("clk: mvebu: new driver for Armada CP110 system controller")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/mvebu/cp110-system-controller.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/clk/mvebu/cp110-system-controller.c b/drivers/clk/mvebu/cp110-system-controller.c
index 9781b1bf59988..9235a331b5880 100644
--- a/drivers/clk/mvebu/cp110-system-controller.c
+++ b/drivers/clk/mvebu/cp110-system-controller.c
@@ -200,11 +200,11 @@ static struct clk_hw *cp110_of_clk_get(struct of_phandle_args *clkspec,
 	unsigned int idx = clkspec->args[1];
 
 	if (type == CP110_CLK_TYPE_CORE) {
-		if (idx > CP110_MAX_CORE_CLOCKS)
+		if (idx >= CP110_MAX_CORE_CLOCKS)
 			return ERR_PTR(-EINVAL);
 		return clk_data->hws[idx];
 	} else if (type == CP110_CLK_TYPE_GATABLE) {
-		if (idx > CP110_MAX_GATABLE_CLOCKS)
+		if (idx >= CP110_MAX_GATABLE_CLOCKS)
 			return ERR_PTR(-EINVAL);
 		return clk_data->hws[CP110_MAX_CORE_CLOCKS + idx];
 	}
-- 
GitLab


From 2e85c57493e391b93445c1e0d530b36b95becc64 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Mon, 3 Dec 2018 17:51:43 +0300
Subject: [PATCH 1724/1890] clk: mmp: Off by one in mmp_clk_add()

The > comparison should be >= or we write one element beyond the end of
the unit->clk_table[] array.

(The unit->clk_table[] array is allocated in the mmp_clk_init() function
and it has unit->nr_clks elements).

Fixes: 4661fda10f8b ("clk: mmp: add basic support functions for DT support")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/mmp/clk.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/mmp/clk.c b/drivers/clk/mmp/clk.c
index ad8d483a35cd5..ca7d37e2c7be6 100644
--- a/drivers/clk/mmp/clk.c
+++ b/drivers/clk/mmp/clk.c
@@ -183,7 +183,7 @@ void mmp_clk_add(struct mmp_clk_unit *unit, unsigned int id,
 		pr_err("CLK %d has invalid pointer %p\n", id, clk);
 		return;
 	}
-	if (id > unit->nr_clks) {
+	if (id >= unit->nr_clks) {
 		pr_err("CLK %d is invalid\n", id);
 		return;
 	}
-- 
GitLab


From 9a43be9cedd516f188e6333d3b43402386723eff Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Mon, 3 Dec 2018 17:52:01 +0300
Subject: [PATCH 1725/1890] clk: zynqmp: Off by one in zynqmp_is_valid_clock()

The > comparison should be >= to prevent reading beyond the end of the
clock[] array.

(The clock[] array is allocated in zynqmp_clk_setup() and has
clock_max_idx elements.)

Fixes: 3fde0e16d016 ("drivers: clk: Add ZynqMP clock driver")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/zynqmp/clkc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/zynqmp/clkc.c b/drivers/clk/zynqmp/clkc.c
index 297f16a20bfc5..f65cc0ff76abd 100644
--- a/drivers/clk/zynqmp/clkc.c
+++ b/drivers/clk/zynqmp/clkc.c
@@ -128,7 +128,7 @@ static const struct zynqmp_eemi_ops *eemi_ops;
  */
 static inline int zynqmp_is_valid_clock(u32 clk_id)
 {
-	if (clk_id > clock_max_idx)
+	if (clk_id >= clock_max_idx)
 		return -ENODEV;
 
 	return clock[clk_id].valid;
-- 
GitLab


From bf87ade0dd7f8cf19dac4d3161d5e86abe0c062b Mon Sep 17 00:00:00 2001
From: Adam Wong <adam@adamwong.me>
Date: Thu, 29 Nov 2018 10:04:35 -0800
Subject: [PATCH 1726/1890] Input: elan_i2c - add support for ELAN0621 touchpad

Added the ability to detect the ELAN0621 touchpad found in some Lenovo
laptops.

Signed-off-by: Adam Wong <adam@adamwong.me>
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/mouse/elan_i2c_core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c
index 99227807a5840..1428de803d2d1 100644
--- a/drivers/input/mouse/elan_i2c_core.c
+++ b/drivers/input/mouse/elan_i2c_core.c
@@ -1349,6 +1349,7 @@ static const struct acpi_device_id elan_acpi_id[] = {
 	{ "ELAN061C", 0 },
 	{ "ELAN061D", 0 },
 	{ "ELAN0620", 0 },
+	{ "ELAN0621", 0 },
 	{ "ELAN0622", 0 },
 	{ "ELAN1000", 0 },
 	{ }
-- 
GitLab


From ad33429cd02565c28404bb16ae7a4c2bdfda6626 Mon Sep 17 00:00:00 2001
From: Noah Westervelt <nwestervelt@outlook.com>
Date: Thu, 29 Nov 2018 10:10:35 -0800
Subject: [PATCH 1727/1890] Input: elan_i2c - add ACPI ID for Lenovo IdeaPad
 330-15ARR

Add ELAN061E to the ACPI table to support Elan touchpad found in Lenovo
IdeaPad 330-15ARR.

Signed-off-by: Noah Westervelt <nwestervelt@outlook.com>
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/mouse/elan_i2c_core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c
index 1428de803d2d1..a94b6494e71a5 100644
--- a/drivers/input/mouse/elan_i2c_core.c
+++ b/drivers/input/mouse/elan_i2c_core.c
@@ -1348,6 +1348,7 @@ static const struct acpi_device_id elan_acpi_id[] = {
 	{ "ELAN0618", 0 },
 	{ "ELAN061C", 0 },
 	{ "ELAN061D", 0 },
+	{ "ELAN061E", 0 },
 	{ "ELAN0620", 0 },
 	{ "ELAN0621", 0 },
 	{ "ELAN0622", 0 },
-- 
GitLab


From 9df39bedbf292680655c6a947c77d6562c693d4a Mon Sep 17 00:00:00 2001
From: Lyude Paul <lyude@redhat.com>
Date: Sat, 24 Nov 2018 23:28:10 -0800
Subject: [PATCH 1728/1890] Input: synaptics - add PNP ID for ThinkPad P50 to
 SMBus

Noticed the other day the trackpoint felt different on my P50, then
realized it was because rmi4 wasn't loading for this machine
automatically. Suspend/resume, hibernate, and everything else seem to
work perfectly fine on here.

Signed-off-by: Lyude Paul <lyude@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/mouse/synaptics.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c
index 5e85f3cca867d..c42813d505919 100644
--- a/drivers/input/mouse/synaptics.c
+++ b/drivers/input/mouse/synaptics.c
@@ -170,6 +170,7 @@ static const char * const smbus_pnp_ids[] = {
 	"LEN0048", /* X1 Carbon 3 */
 	"LEN0046", /* X250 */
 	"LEN004a", /* W541 */
+	"LEN005b", /* P50 */
 	"LEN0071", /* T480 */
 	"LEN0072", /* X1 Carbon Gen 5 (2017) - Elan/ALPS trackpoint */
 	"LEN0073", /* X1 Carbon G5 (Elantech) */
-- 
GitLab


From 5a6dab15f7a79817cab4af612ddd99eda793fce6 Mon Sep 17 00:00:00 2001
From: Teika Kazura <teika@gmx.com>
Date: Mon, 3 Dec 2018 11:26:03 -0800
Subject: [PATCH 1729/1890] Input: synaptics - enable SMBus for HP 15-ay000

SMBus works fine for the touchpad with id SYN3221, used in the HP 15-ay000
series,

This device has been reported in these messages in the "linux-input"
mailing list:
* https://marc.info/?l=linux-input&m=152016683003369&w=2
* https://www.spinics.net/lists/linux-input/msg52525.html

Reported-by: Nitesh Debnath <niteshkd1999@gmail.com>
Reported-by: Teika Kazura <teika@gmx.com>
Signed-off-by: Teika Kazura <teika@gmx.com>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/mouse/synaptics.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c
index c42813d505919..2bd5bb11c8bae 100644
--- a/drivers/input/mouse/synaptics.c
+++ b/drivers/input/mouse/synaptics.c
@@ -178,6 +178,7 @@ static const char * const smbus_pnp_ids[] = {
 	"LEN0096", /* X280 */
 	"LEN0097", /* X280 -> ALPS trackpoint */
 	"LEN200f", /* T450s */
+	"SYN3221", /* HP 15-ay000 */
 	NULL
 };
 
-- 
GitLab


From a6754fae1e66e9a40fed406290d7ca3f2b4d227c Mon Sep 17 00:00:00 2001
From: Cameron Gutman <aicommander@gmail.com>
Date: Thu, 29 Nov 2018 10:09:33 -0800
Subject: [PATCH 1730/1890] Input: xpad - quirk all PDP Xbox One gamepads

Since we continue to find tons of new variants [0,1,2,3,4,5,6] that
need the PDP quirk, let's just quirk all devices from PDP.

[0]: https://github.com/paroj/xpad/pull/104
[1]: https://github.com/paroj/xpad/pull/105
[2]: https://github.com/paroj/xpad/pull/108
[3]: https://github.com/paroj/xpad/pull/109
[4]: https://github.com/paroj/xpad/pull/112
[5]: https://github.com/paroj/xpad/pull/115
[6]: https://github.com/paroj/xpad/pull/116

Fixes: e5c9c6a885fa ("Input: xpad - add support for PDP Xbox One controllers")
Cc: stable@vger.kernel.org
Signed-off-by: Cameron Gutman <aicommander@gmail.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/joystick/xpad.c | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c
index d4b9db487b16f..cfc8b94527b97 100644
--- a/drivers/input/joystick/xpad.c
+++ b/drivers/input/joystick/xpad.c
@@ -480,18 +480,18 @@ static const u8 xboxone_hori_init[] = {
 };
 
 /*
- * This packet is required for some of the PDP pads to start
+ * This packet is required for most (all?) of the PDP pads to start
  * sending input reports. These pads include: (0x0e6f:0x02ab),
- * (0x0e6f:0x02a4).
+ * (0x0e6f:0x02a4), (0x0e6f:0x02a6).
  */
 static const u8 xboxone_pdp_init1[] = {
 	0x0a, 0x20, 0x00, 0x03, 0x00, 0x01, 0x14
 };
 
 /*
- * This packet is required for some of the PDP pads to start
+ * This packet is required for most (all?) of the PDP pads to start
  * sending input reports. These pads include: (0x0e6f:0x02ab),
- * (0x0e6f:0x02a4).
+ * (0x0e6f:0x02a4), (0x0e6f:0x02a6).
  */
 static const u8 xboxone_pdp_init2[] = {
 	0x06, 0x20, 0x00, 0x02, 0x01, 0x00
@@ -527,12 +527,8 @@ static const struct xboxone_init_packet xboxone_init_packets[] = {
 	XBOXONE_INIT_PKT(0x0e6f, 0x0165, xboxone_hori_init),
 	XBOXONE_INIT_PKT(0x0f0d, 0x0067, xboxone_hori_init),
 	XBOXONE_INIT_PKT(0x0000, 0x0000, xboxone_fw2015_init),
-	XBOXONE_INIT_PKT(0x0e6f, 0x02ab, xboxone_pdp_init1),
-	XBOXONE_INIT_PKT(0x0e6f, 0x02ab, xboxone_pdp_init2),
-	XBOXONE_INIT_PKT(0x0e6f, 0x02a4, xboxone_pdp_init1),
-	XBOXONE_INIT_PKT(0x0e6f, 0x02a4, xboxone_pdp_init2),
-	XBOXONE_INIT_PKT(0x0e6f, 0x02a6, xboxone_pdp_init1),
-	XBOXONE_INIT_PKT(0x0e6f, 0x02a6, xboxone_pdp_init2),
+	XBOXONE_INIT_PKT(0x0e6f, 0x0000, xboxone_pdp_init1),
+	XBOXONE_INIT_PKT(0x0e6f, 0x0000, xboxone_pdp_init2),
 	XBOXONE_INIT_PKT(0x24c6, 0x541a, xboxone_rumblebegin_init),
 	XBOXONE_INIT_PKT(0x24c6, 0x542a, xboxone_rumblebegin_init),
 	XBOXONE_INIT_PKT(0x24c6, 0x543a, xboxone_rumblebegin_init),
-- 
GitLab


From 6c3516fed7b61a3527459ccfa67fab130d910610 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Mon, 3 Dec 2018 11:24:30 -0800
Subject: [PATCH 1731/1890] Input: omap-keypad - fix keyboard debounce
 configuration

I noticed that the Android v3.0.8 kernel on droid4 is using different
keypad values from the mainline kernel and does not have issues with
keys occasionally being stuck until pressed again. Turns out there was
an earlier patch posted to fix this as "Input: omap-keypad: errata i689:
Correct debounce time", but it was never reposted to fix use macros
for timing calculations.

This updated version is using macros, and also fixes the use of the
input clock rate to use 32768KiHz instead of 32000KiHz. And we want to
use the known good Android kernel values of 3 and 6 instead of 2 and 6
in the earlier patch.

Reported-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/keyboard/omap4-keypad.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/drivers/input/keyboard/omap4-keypad.c b/drivers/input/keyboard/omap4-keypad.c
index 940d38b08e6b5..ce8e2baf31bbb 100644
--- a/drivers/input/keyboard/omap4-keypad.c
+++ b/drivers/input/keyboard/omap4-keypad.c
@@ -60,8 +60,18 @@
 
 /* OMAP4 values */
 #define OMAP4_VAL_IRQDISABLE		0x0
-#define OMAP4_VAL_DEBOUNCINGTIME	0x7
-#define OMAP4_VAL_PVT			0x7
+
+/*
+ * Errata i689: If a key is released for a time shorter than debounce time,
+ * the keyboard will idle and never detect the key release. The workaround
+ * is to use at least a 12ms debounce time. See omap5432 TRM chapter
+ * "26.4.6.2 Keyboard Controller Timer" for more information.
+ */
+#define OMAP4_KEYPAD_PTV_DIV_128        0x6
+#define OMAP4_KEYPAD_DEBOUNCINGTIME_MS(dbms, ptv)     \
+	((((dbms) * 1000) / ((1 << ((ptv) + 1)) * (1000000 / 32768))) - 1)
+#define OMAP4_VAL_DEBOUNCINGTIME_16MS					\
+	OMAP4_KEYPAD_DEBOUNCINGTIME_MS(16, OMAP4_KEYPAD_PTV_DIV_128)
 
 enum {
 	KBD_REVISION_OMAP4 = 0,
@@ -181,9 +191,9 @@ static int omap4_keypad_open(struct input_dev *input)
 
 	kbd_writel(keypad_data, OMAP4_KBD_CTRL,
 			OMAP4_DEF_CTRL_NOSOFTMODE |
-			(OMAP4_VAL_PVT << OMAP4_DEF_CTRL_PTV_SHIFT));
+			(OMAP4_KEYPAD_PTV_DIV_128 << OMAP4_DEF_CTRL_PTV_SHIFT));
 	kbd_writel(keypad_data, OMAP4_KBD_DEBOUNCINGTIME,
-			OMAP4_VAL_DEBOUNCINGTIME);
+			OMAP4_VAL_DEBOUNCINGTIME_16MS);
 	/* clear pending interrupts */
 	kbd_write_irqreg(keypad_data, OMAP4_KBD_IRQSTATUS,
 			 kbd_read_irqreg(keypad_data, OMAP4_KBD_IRQSTATUS));
-- 
GitLab


From e25b6783c7b1bb79103d4617336879423f86b05e Mon Sep 17 00:00:00 2001
From: Stefan Wahren <stefan.wahren@i2se.com>
Date: Mon, 3 Dec 2018 19:37:08 +0100
Subject: [PATCH 1732/1890] ARM: dts: bcm2837: Fix polarity of wifi reset GPIOs

The commit b1b8f45b3130 ("ARM: dts: bcm2837: Add missing GPIOs of Expander")
introduced a wifi power sequence. Unfortunately the polarity of the reset
GPIOs were wrong and broke the wifi support on Raspberry Pi 3 B and
later in 3 B+. This wasn't discovered before since the power sequence
takes only effect in case the relevant MMC driver is compiled as a module.

Fixes: b1b8f45b3130 ("ARM: dts: bcm2837: Add missing GPIOs of Expander")
Cc: stable@vger.kernel.org
Reported-by: Matthias Lueschner <lueschem@gmail.com>
Link: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=911443
Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
 arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts | 2 +-
 arch/arm/boot/dts/bcm2837-rpi-3-b.dts      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts b/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts
index 4adb85e66be3f..93762244be7f4 100644
--- a/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts
+++ b/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts
@@ -31,7 +31,7 @@ pwr {
 
 	wifi_pwrseq: wifi-pwrseq {
 		compatible = "mmc-pwrseq-simple";
-		reset-gpios = <&expgpio 1 GPIO_ACTIVE_HIGH>;
+		reset-gpios = <&expgpio 1 GPIO_ACTIVE_LOW>;
 	};
 };
 
diff --git a/arch/arm/boot/dts/bcm2837-rpi-3-b.dts b/arch/arm/boot/dts/bcm2837-rpi-3-b.dts
index c318bcbc6ba7e..89e6fd547c757 100644
--- a/arch/arm/boot/dts/bcm2837-rpi-3-b.dts
+++ b/arch/arm/boot/dts/bcm2837-rpi-3-b.dts
@@ -26,7 +26,7 @@ act {
 
 	wifi_pwrseq: wifi-pwrseq {
 		compatible = "mmc-pwrseq-simple";
-		reset-gpios = <&expgpio 1 GPIO_ACTIVE_HIGH>;
+		reset-gpios = <&expgpio 1 GPIO_ACTIVE_LOW>;
 	};
 };
 
-- 
GitLab


From fd6f32f78645db32b6b95a42e45da2ddd6de0e67 Mon Sep 17 00:00:00 2001
From: Juha-Matti Tilli <juha-matti.tilli@iki.fi>
Date: Sun, 2 Dec 2018 12:47:08 +0200
Subject: [PATCH 1733/1890] libata: whitelist all SAMSUNG MZ7KM* solid-state
 disks

These devices support read zero after trim (RZAT), as they advertise to
the OS. However, the OS doesn't believe the SSDs unless they are
explicitly whitelisted.

Acked-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Juha-Matti Tilli <juha-matti.tilli@iki.fi>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/ata/libata-core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index a7f5202a48152..b8c3f9e6af899 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4602,6 +4602,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
 	{ "SSD*INTEL*",			NULL,	ATA_HORKAGE_ZERO_AFTER_TRIM, },
 	{ "Samsung*SSD*",		NULL,	ATA_HORKAGE_ZERO_AFTER_TRIM, },
 	{ "SAMSUNG*SSD*",		NULL,	ATA_HORKAGE_ZERO_AFTER_TRIM, },
+	{ "SAMSUNG*MZ7KM*",		NULL,	ATA_HORKAGE_ZERO_AFTER_TRIM, },
 	{ "ST[1248][0248]0[FH]*",	NULL,	ATA_HORKAGE_ZERO_AFTER_TRIM, },
 
 	/*
-- 
GitLab


From 7bb64d55fd0e4bb09863ae1519ec8024e1279b2b Mon Sep 17 00:00:00 2001
From: Matthias Brugger <matthias.bgg@gmail.com>
Date: Tue, 16 Oct 2018 16:57:58 +0200
Subject: [PATCH 1734/1890] MAINTAINERS: mediatek: Update SoC entry

Add IRC channel and URL of the wiki.
Also add soc drivers folder and regex to catch more
mediatek components.

Signed-off-by: Matthias Brugger <matthias.bgg@gmail.com>
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 MAINTAINERS | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 6682420421c15..1306254bf0786 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1738,13 +1738,17 @@ ARM/Mediatek SoC support
 M:	Matthias Brugger <matthias.bgg@gmail.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 L:	linux-mediatek@lists.infradead.org (moderated for non-subscribers)
+W:	https://mtk.bcnfs.org/
+C:	irc://chat.freenode.net/linux-mediatek
 S:	Maintained
 F:	arch/arm/boot/dts/mt6*
 F:	arch/arm/boot/dts/mt7*
 F:	arch/arm/boot/dts/mt8*
 F:	arch/arm/mach-mediatek/
 F:	arch/arm64/boot/dts/mediatek/
+F:	drivers/soc/mediatek/
 N:	mtk
+N:	mt[678]
 K:	mediatek
 
 ARM/Mediatek USB3 PHY DRIVER
-- 
GitLab


From 76f4e2c3b6a560cdd7a75b87df543e04d05a9e5f Mon Sep 17 00:00:00 2001
From: Lubomir Rintel <lkundrak@v3.sk>
Date: Sun, 2 Dec 2018 12:12:24 +0100
Subject: [PATCH 1735/1890] ARM: mmp/mmp2: fix cpu_is_mmp2() on mmp2-dt

cpu_is_mmp2() was equivalent to cpu_is_pj4(), wouldn't be correct for
multiplatform kernels. Fix it by also considering mmp_chip_id, as is
done for cpu_is_pxa168() and cpu_is_pxa910() above.

Moreover, it is only available with CONFIG_CPU_MMP2 and thus doesn't work
on DT-based MMP2 machines. Enable it on CONFIG_MACH_MMP2_DT too.

Note: CONFIG_CPU_MMP2 is only used for machines that use board files
instead of DT. It should perhaps be renamed. I'm not doing it now, because
I don't have a better idea.

Signed-off-by: Lubomir Rintel <lkundrak@v3.sk>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Cc: stable@vger.kernel.org
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 arch/arm/mach-mmp/cputype.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/arm/mach-mmp/cputype.h b/arch/arm/mach-mmp/cputype.h
index 446edaeb78a71..a96abcf521b4b 100644
--- a/arch/arm/mach-mmp/cputype.h
+++ b/arch/arm/mach-mmp/cputype.h
@@ -44,10 +44,12 @@ static inline int cpu_is_pxa910(void)
 #define cpu_is_pxa910()	(0)
 #endif
 
-#ifdef CONFIG_CPU_MMP2
+#if defined(CONFIG_CPU_MMP2) || defined(CONFIG_MACH_MMP2_DT)
 static inline int cpu_is_mmp2(void)
 {
-	return (((read_cpuid_id() >> 8) & 0xff) == 0x58);
+	return (((read_cpuid_id() >> 8) & 0xff) == 0x58) &&
+		(((mmp_chip_id & 0xfff) == 0x410) ||
+		 ((mmp_chip_id & 0xfff) == 0x610));
 }
 #else
 #define cpu_is_mmp2()	(0)
-- 
GitLab


From f6936d029c4a14ce873bbb423bbc518aa1b4def2 Mon Sep 17 00:00:00 2001
From: Lubomir Rintel <lkundrak@v3.sk>
Date: Sun, 2 Dec 2018 12:14:28 +0100
Subject: [PATCH 1736/1890] MAINTAINERS: update entry for MMP platform

Move Eric Miao and Haojian Zhuang over to CREDITS, since they're AWOL
for some time already. The git trees have gone away too.

I'm adding myself as a reviewer. I'd like to be Cc'd on patches and will
be able to test them, but I don't possess a data sheet thus there might
be things I'll be unable to review. Hence the Odd-Fixes status.

Signed-off-by: Lubomir Rintel <lkundrak@v3.sk>
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 CREDITS     | 8 ++++++++
 MAINTAINERS | 7 ++-----
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/CREDITS b/CREDITS
index c9273393fe14c..7d397ee675242 100644
--- a/CREDITS
+++ b/CREDITS
@@ -2541,6 +2541,10 @@ S: Ormond
 S: Victoria 3163
 S: Australia
 
+N: Eric Miao
+E: eric.y.miao@gmail.com
+D: MMP support
+
 N: Pauline Middelink
 E: middelin@polyware.nl
 D: General low-level bug fixes, /proc fixes, identd support
@@ -4115,6 +4119,10 @@ S: 1507 145th Place SE #B5
 S: Bellevue, Washington 98007
 S: USA
 
+N: Haojian Zhuang
+E: haojian.zhuang@gmail.com
+D: MMP support
+
 N: Richard Zidlicky
 E: rz@linux-m68k.org, rdzidlic@geocities.com
 W: http://www.geocities.com/rdzidlic
diff --git a/MAINTAINERS b/MAINTAINERS
index 1306254bf0786..f063443a3e3a9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10002,12 +10002,9 @@ S:	Odd Fixes
 F:	drivers/media/radio/radio-miropcm20*
 
 MMP SUPPORT
-M:	Eric Miao <eric.y.miao@gmail.com>
-M:	Haojian Zhuang <haojian.zhuang@gmail.com>
+R:	Lubomir Rintel <lkundrak@v3.sk>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-T:	git git://github.com/hzhuang1/linux.git
-T:	git git://git.linaro.org/people/ycmiao/pxa-linux.git
-S:	Maintained
+S:	Odd Fixes
 F:	arch/arm/boot/dts/mmp*
 F:	arch/arm/mach-mmp/
 
-- 
GitLab


From f3b2f758ec1e6cdb13c925647cbd8ad4938b78fb Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Mon, 3 Dec 2018 13:12:48 -0600
Subject: [PATCH 1737/1890] ARM: dts: realview: Fix some more duplicate
 regulator nodes

There's a bug in dtc in checking for duplicate node names when there's
another section (e.g. "/ { };"). In this case, skeleton.dtsi provides
another section. Upon removal of skeleton.dtsi, the dtb fails to build
due to a duplicate node 'fixedregulator@0'. As both nodes were pretty
much the same 3.3V fixed regulator, it hasn't really mattered. Fix this
by renaming the nodes to something unique. In the process, drop the
unit-address which shouldn't be present wtihout reg property.

Signed-off-by: Rob Herring <robh@kernel.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 arch/arm/boot/dts/arm-realview-pb1176.dts | 4 ++--
 arch/arm/boot/dts/arm-realview-pb11mp.dts | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/arm/boot/dts/arm-realview-pb1176.dts b/arch/arm/boot/dts/arm-realview-pb1176.dts
index f2a1d25eb6cf3..83e0fbc4a1a10 100644
--- a/arch/arm/boot/dts/arm-realview-pb1176.dts
+++ b/arch/arm/boot/dts/arm-realview-pb1176.dts
@@ -45,7 +45,7 @@ memory {
 	};
 
 	/* The voltage to the MMC card is hardwired at 3.3V */
-	vmmc: fixedregulator@0 {
+	vmmc: regulator-vmmc {
 		compatible = "regulator-fixed";
 		regulator-name = "vmmc";
 		regulator-min-microvolt = <3300000>;
@@ -53,7 +53,7 @@ vmmc: fixedregulator@0 {
 		regulator-boot-on;
         };
 
-	veth: fixedregulator@0 {
+	veth: regulator-veth {
 		compatible = "regulator-fixed";
 		regulator-name = "veth";
 		regulator-min-microvolt = <3300000>;
diff --git a/arch/arm/boot/dts/arm-realview-pb11mp.dts b/arch/arm/boot/dts/arm-realview-pb11mp.dts
index 7f9cbdf33a510..2f6aa24a0b67c 100644
--- a/arch/arm/boot/dts/arm-realview-pb11mp.dts
+++ b/arch/arm/boot/dts/arm-realview-pb11mp.dts
@@ -145,7 +145,7 @@ pmu {
 	};
 
 	/* The voltage to the MMC card is hardwired at 3.3V */
-	vmmc: fixedregulator@0 {
+	vmmc: regulator-vmmc {
 		compatible = "regulator-fixed";
 		regulator-name = "vmmc";
 		regulator-min-microvolt = <3300000>;
@@ -153,7 +153,7 @@ vmmc: fixedregulator@0 {
 		regulator-boot-on;
         };
 
-	veth: fixedregulator@0 {
+	veth: regulator-veth {
 		compatible = "regulator-fixed";
 		regulator-name = "veth";
 		regulator-min-microvolt = <3300000>;
-- 
GitLab


From d2a36971ef595069b7a600d1144c2e0881a930a1 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Mon, 3 Dec 2018 08:19:33 +0100
Subject: [PATCH 1738/1890] net: phy: don't allow __set_phy_supported to add
 unsupported modes

Currently __set_phy_supported allows to add modes w/o checking whether
the PHY supports them. This is wrong, it should never add modes but
only remove modes we don't want to support.

The commit marked as fixed didn't do anything wrong, it just copied
existing functionality to the helper which is being fixed now.

Fixes: f3a6bd393c2c ("phylib: Add phy_set_max_speed helper")
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy_device.c | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 23ee3967c1667..18e92c19c5ab8 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1880,20 +1880,17 @@ EXPORT_SYMBOL(genphy_loopback);
 
 static int __set_phy_supported(struct phy_device *phydev, u32 max_speed)
 {
-	phydev->supported &= ~(PHY_1000BT_FEATURES | PHY_100BT_FEATURES |
-			       PHY_10BT_FEATURES);
-
 	switch (max_speed) {
-	default:
-		return -ENOTSUPP;
-	case SPEED_1000:
-		phydev->supported |= PHY_1000BT_FEATURES;
+	case SPEED_10:
+		phydev->supported &= ~PHY_100BT_FEATURES;
 		/* fall through */
 	case SPEED_100:
-		phydev->supported |= PHY_100BT_FEATURES;
-		/* fall through */
-	case SPEED_10:
-		phydev->supported |= PHY_10BT_FEATURES;
+		phydev->supported &= ~PHY_1000BT_FEATURES;
+		break;
+	case SPEED_1000:
+		break;
+	default:
+		return -ENOTSUPP;
 	}
 
 	return 0;
-- 
GitLab


From a5d4a89245ead1f37ed135213653c5beebea4237 Mon Sep 17 00:00:00 2001
From: Su Yanjun <suyj.fnst@cn.fujitsu.com>
Date: Mon, 3 Dec 2018 15:33:07 +0800
Subject: [PATCH 1739/1890] net: 8139cp: fix a BUG triggered by changing mtu
 with network traffic

When changing mtu many times with traffic, a bug is triggered:

[ 1035.684037] kernel BUG at lib/dynamic_queue_limits.c:26!
[ 1035.684042] invalid opcode: 0000 [#1] SMP
[ 1035.684049] Modules linked in: loop binfmt_misc 8139cp(OE) macsec
tcp_diag udp_diag inet_diag unix_diag af_packet_diag netlink_diag tcp_lp
fuse uinput xt_CHECKSUM iptable_mangle ipt_MASQUERADE
nf_nat_masquerade_ipv4 iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4
nf_defrag_ipv4 xt_conntrack nf_conntrack ipt_REJECT nf_reject_ipv4 tun
bridge stp llc ebtable_filter ebtables ip6table_filter devlink
ip6_tables iptable_filter sunrpc snd_hda_codec_generic snd_hda_intel
snd_hda_codec snd_hda_core snd_hwdep ppdev snd_seq iosf_mbi crc32_pclmul
parport_pc snd_seq_device ghash_clmulni_intel parport snd_pcm
aesni_intel joydev lrw snd_timer virtio_balloon sg gf128mul glue_helper
ablk_helper cryptd snd soundcore i2c_piix4 pcspkr ip_tables xfs
libcrc32c sr_mod sd_mod cdrom crc_t10dif crct10dif_generic ata_generic
[ 1035.684102]  pata_acpi virtio_console qxl drm_kms_helper syscopyarea
sysfillrect sysimgblt floppy fb_sys_fops crct10dif_pclmul
crct10dif_common ttm crc32c_intel serio_raw ata_piix drm libata 8139too
virtio_pci drm_panel_orientation_quirks virtio_ring virtio mii dm_mirror
dm_region_hash dm_log dm_mod [last unloaded: 8139cp]
[ 1035.684132] CPU: 9 PID: 25140 Comm: if-mtu-change Kdump: loaded
Tainted: G           OE  ------------ T 3.10.0-957.el7.x86_64 #1
[ 1035.684134] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
[ 1035.684136] task: ffff8f59b1f5a080 ti: ffff8f5a2e32c000 task.ti:
ffff8f5a2e32c000
[ 1035.684149] RIP: 0010:[<ffffffffba3a40d0>]  [<ffffffffba3a40d0>]
dql_completed+0x180/0x190
[ 1035.684162] RSP: 0000:ffff8f5a75483e50  EFLAGS: 00010093
[ 1035.684162] RAX: 00000000000000c2 RBX: ffff8f5a6f91c000 RCX:
0000000000000000
[ 1035.684162] RDX: 0000000000000000 RSI: 0000000000000184 RDI:
ffff8f599fea3ec0
[ 1035.684162] RBP: ffff8f5a75483ea8 R08: 00000000000000c2 R09:
0000000000000000
[ 1035.684162] R10: 00000000000616ef R11: ffff8f5a75483b56 R12:
ffff8f599fea3e00
[ 1035.684162] R13: 0000000000000001 R14: 0000000000000000 R15:
0000000000000184
[ 1035.684162] FS:  00007fa8434de740(0000) GS:ffff8f5a75480000(0000)
knlGS:0000000000000000
[ 1035.684162] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 1035.684162] CR2: 00000000004305d0 CR3: 000000024eb66000 CR4:
00000000001406e0
[ 1035.684162] Call Trace:
[ 1035.684162]  <IRQ>
[ 1035.684162]  [<ffffffffc08cbaf8>] ? cp_interrupt+0x478/0x580 [8139cp]
[ 1035.684162]  [<ffffffffba14a294>]
__handle_irq_event_percpu+0x44/0x1c0
[ 1035.684162]  [<ffffffffba14a442>] handle_irq_event_percpu+0x32/0x80
[ 1035.684162]  [<ffffffffba14a4cc>] handle_irq_event+0x3c/0x60
[ 1035.684162]  [<ffffffffba14db29>] handle_fasteoi_irq+0x59/0x110
[ 1035.684162]  [<ffffffffba02e554>] handle_irq+0xe4/0x1a0
[ 1035.684162]  [<ffffffffba7795dd>] do_IRQ+0x4d/0xf0
[ 1035.684162]  [<ffffffffba76b362>] common_interrupt+0x162/0x162
[ 1035.684162]  <EOI>
[ 1035.684162]  [<ffffffffba0c2ae4>] ? __wake_up_bit+0x24/0x70
[ 1035.684162]  [<ffffffffba1e46f5>] ? do_set_pte+0xd5/0x120
[ 1035.684162]  [<ffffffffba1b64fb>] unlock_page+0x2b/0x30
[ 1035.684162]  [<ffffffffba1e4879>] do_read_fault.isra.61+0x139/0x1b0
[ 1035.684162]  [<ffffffffba1e9134>] handle_pte_fault+0x2f4/0xd10
[ 1035.684162]  [<ffffffffba1ebc6d>] handle_mm_fault+0x39d/0x9b0
[ 1035.684162]  [<ffffffffba76f5e3>] __do_page_fault+0x203/0x500
[ 1035.684162]  [<ffffffffba76f9c6>] trace_do_page_fault+0x56/0x150
[ 1035.684162]  [<ffffffffba76ef42>] do_async_page_fault+0x22/0xf0
[ 1035.684162]  [<ffffffffba76b788>] async_page_fault+0x28/0x30
[ 1035.684162] Code: 54 c7 47 54 ff ff ff ff 44 0f 49 ce 48 8b 35 48 2f
9c 00 48 89 77 58 e9 fe fe ff ff 0f 1f 80 00 00 00 00 41 89 d1 e9 ef fe
ff ff <0f> 0b 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 55 8d 42 ff 48
[ 1035.684162] RIP  [<ffffffffba3a40d0>] dql_completed+0x180/0x190
[ 1035.684162]  RSP <ffff8f5a75483e50>

It's not the same as in 7fe0ee09 patch described.
As 8139cp uses shared irq mode, other device irq will trigger
cp_interrupt to execute.

cp_change_mtu
 -> cp_close
 -> cp_open

In cp_close routine  just before free_irq(), some interrupt may occur.
In my environment, cp_interrupt exectutes and IntrStatus is 0x4,
exactly TxOk. That will cause cp_tx to wake device queue.

As device queue is started, cp_start_xmit and cp_open will run at same
time which will cause kernel BUG.

For example:
[#] for tx descriptor

At start:

[#][#][#]
num_queued=3

After cp_init_hw->cp_start_hw->netdev_reset_queue:

[#][#][#]
num_queued=0

When 8139cp starts to work then cp_tx will check
num_queued mismatchs the complete_bytes.

The patch will check IntrMask before check IntrStatus in cp_interrupt.
When 8139cp interrupt is disabled, just return.

Signed-off-by: Su Yanjun <suyj.fnst@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/8139cp.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c
index 81045dfa1cd89..44f6e4873aadd 100644
--- a/drivers/net/ethernet/realtek/8139cp.c
+++ b/drivers/net/ethernet/realtek/8139cp.c
@@ -571,6 +571,7 @@ static irqreturn_t cp_interrupt (int irq, void *dev_instance)
 	struct cp_private *cp;
 	int handled = 0;
 	u16 status;
+	u16 mask;
 
 	if (unlikely(dev == NULL))
 		return IRQ_NONE;
@@ -578,6 +579,10 @@ static irqreturn_t cp_interrupt (int irq, void *dev_instance)
 
 	spin_lock(&cp->lock);
 
+	mask = cpr16(IntrMask);
+	if (!mask)
+		goto out_unlock;
+
 	status = cpr16(IntrStatus);
 	if (!status || (status == 0xFFFF))
 		goto out_unlock;
-- 
GitLab


From 4e4b08e55889da97dec750759f3ade8cc92b4644 Mon Sep 17 00:00:00 2001
From: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
Date: Mon, 3 Dec 2018 18:09:24 +0900
Subject: [PATCH 1740/1890] tun: remove skb access after netif_receive_skb

In tun.c skb->len was accessed while doing stats accounting after a
call to netif_receive_skb. We can not access skb after this call
because buffers may be dropped.

The fix for this bug would be to store skb->len in local variable and
then use it after netif_receive_skb(). IMO using xdp data size for
accounting bytes will be better because input for tun_xdp_one() is
xdp_buff.

Hence this patch:
- fixes a bug by removing skb access after netif_receive_skb()
- uses xdp data size for accounting bytes

[613.019057] BUG: KASAN: use-after-free in tun_sendmsg+0x77c/0xc50 [tun]
[613.021062] Read of size 4 at addr ffff8881da9ab7c0 by task vhost-1115/1155
[613.023073]
[613.024003] CPU: 0 PID: 1155 Comm: vhost-1115 Not tainted 4.20.0-rc3-vm+ #232
[613.026029] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014
[613.029116] Call Trace:
[613.031145]  dump_stack+0x5b/0x90
[613.032219]  print_address_description+0x6c/0x23c
[613.034156]  ? tun_sendmsg+0x77c/0xc50 [tun]
[613.036141]  kasan_report.cold.5+0x241/0x308
[613.038125]  tun_sendmsg+0x77c/0xc50 [tun]
[613.040109]  ? tun_get_user+0x1960/0x1960 [tun]
[613.042094]  ? __isolate_free_page+0x270/0x270
[613.045173]  vhost_tx_batch.isra.14+0xeb/0x1f0 [vhost_net]
[613.047127]  ? peek_head_len.part.13+0x90/0x90 [vhost_net]
[613.049096]  ? get_tx_bufs+0x5a/0x2c0 [vhost_net]
[613.051106]  ? vhost_enable_notify+0x2d8/0x420 [vhost]
[613.053139]  handle_tx_copy+0x2d0/0x8f0 [vhost_net]
[613.053139]  ? vhost_net_buf_peek+0x340/0x340 [vhost_net]
[613.053139]  ? __mutex_lock+0x8d9/0xb30
[613.053139]  ? finish_task_switch+0x8f/0x3f0
[613.053139]  ? handle_tx+0x32/0x120 [vhost_net]
[613.053139]  ? mutex_trylock+0x110/0x110
[613.053139]  ? finish_task_switch+0xcf/0x3f0
[613.053139]  ? finish_task_switch+0x240/0x3f0
[613.053139]  ? __switch_to_asm+0x34/0x70
[613.053139]  ? __switch_to_asm+0x40/0x70
[613.053139]  ? __schedule+0x506/0xf10
[613.053139]  handle_tx+0xc7/0x120 [vhost_net]
[613.053139]  vhost_worker+0x166/0x200 [vhost]
[613.053139]  ? vhost_dev_init+0x580/0x580 [vhost]
[613.053139]  ? __kthread_parkme+0x77/0x90
[613.053139]  ? vhost_dev_init+0x580/0x580 [vhost]
[613.053139]  kthread+0x1b1/0x1d0
[613.053139]  ? kthread_park+0xb0/0xb0
[613.053139]  ret_from_fork+0x35/0x40
[613.088705]
[613.088705] Allocated by task 1155:
[613.088705]  kasan_kmalloc+0xbf/0xe0
[613.088705]  kmem_cache_alloc+0xdc/0x220
[613.088705]  __build_skb+0x2a/0x160
[613.088705]  build_skb+0x14/0xc0
[613.088705]  tun_sendmsg+0x4f0/0xc50 [tun]
[613.088705]  vhost_tx_batch.isra.14+0xeb/0x1f0 [vhost_net]
[613.088705]  handle_tx_copy+0x2d0/0x8f0 [vhost_net]
[613.088705]  handle_tx+0xc7/0x120 [vhost_net]
[613.088705]  vhost_worker+0x166/0x200 [vhost]
[613.088705]  kthread+0x1b1/0x1d0
[613.088705]  ret_from_fork+0x35/0x40
[613.088705]
[613.088705] Freed by task 1155:
[613.088705]  __kasan_slab_free+0x12e/0x180
[613.088705]  kmem_cache_free+0xa0/0x230
[613.088705]  ip6_mc_input+0x40f/0x5a0
[613.088705]  ipv6_rcv+0xc9/0x1e0
[613.088705]  __netif_receive_skb_one_core+0xc1/0x100
[613.088705]  netif_receive_skb_internal+0xc4/0x270
[613.088705]  br_pass_frame_up+0x2b9/0x2e0
[613.088705]  br_handle_frame_finish+0x2fb/0x7a0
[613.088705]  br_handle_frame+0x30f/0x6c0
[613.088705]  __netif_receive_skb_core+0x61a/0x15b0
[613.088705]  __netif_receive_skb_one_core+0x8e/0x100
[613.088705]  netif_receive_skb_internal+0xc4/0x270
[613.088705]  tun_sendmsg+0x738/0xc50 [tun]
[613.088705]  vhost_tx_batch.isra.14+0xeb/0x1f0 [vhost_net]
[613.088705]  handle_tx_copy+0x2d0/0x8f0 [vhost_net]
[613.088705]  handle_tx+0xc7/0x120 [vhost_net]
[613.088705]  vhost_worker+0x166/0x200 [vhost]
[613.088705]  kthread+0x1b1/0x1d0
[613.088705]  ret_from_fork+0x35/0x40
[613.088705]
[613.088705] The buggy address belongs to the object at ffff8881da9ab740
[613.088705]  which belongs to the cache skbuff_head_cache of size 232

Fixes: 043d222f93ab ("tuntap: accept an array of XDP buffs through sendmsg()")
Reviewed-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Signed-off-by: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
Acked-by: Jason Wang <jasowang@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tun.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index cf349e65a66b3..005020042be94 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -2385,6 +2385,7 @@ static int tun_xdp_one(struct tun_struct *tun,
 		       struct tun_file *tfile,
 		       struct xdp_buff *xdp, int *flush)
 {
+	unsigned int datasize = xdp->data_end - xdp->data;
 	struct tun_xdp_hdr *hdr = xdp->data_hard_start;
 	struct virtio_net_hdr *gso = &hdr->gso;
 	struct tun_pcpu_stats *stats;
@@ -2461,7 +2462,7 @@ static int tun_xdp_one(struct tun_struct *tun,
 	stats = get_cpu_ptr(tun->pcpu_stats);
 	u64_stats_update_begin(&stats->syncp);
 	stats->rx_packets++;
-	stats->rx_bytes += skb->len;
+	stats->rx_bytes += datasize;
 	u64_stats_update_end(&stats->syncp);
 	put_cpu_ptr(stats);
 
-- 
GitLab


From 025dceb0fab31c912c41b8f32577432231d83e6b Mon Sep 17 00:00:00 2001
From: Sandipan Das <sandipan@linux.ibm.com>
Date: Mon, 3 Dec 2018 17:51:04 +0530
Subject: [PATCH 1741/1890] bpf: powerpc64: optimize JIT passes for bpf
 function calls

Once the JITed images for each function in a multi-function program
are generated after the first three JIT passes, we only need to fix
the target address for the branch instruction corresponding to each
bpf-to-bpf function call.

This introduces the following optimizations for reducing the work
done by the JIT compiler when handling multi-function programs:

  [1] Instead of doing two extra passes to fix the bpf function calls,
      do just one as that would be sufficient.

  [2] During the extra pass, only overwrite the instruction sequences
      for the bpf-to-bpf function calls as everything else would still
      remain exactly the same. This also reduces the number of writes
      to the JITed image.

  [3] Do not regenerate the prologue and the epilogue during the extra
      pass as that would be redundant.

Signed-off-by: Sandipan Das <sandipan@linux.ibm.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 arch/powerpc/net/bpf_jit_comp64.c | 66 +++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)

diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 17482f5de3e26..9393e231cbc28 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -891,6 +891,55 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
 	return 0;
 }
 
+/* Fix the branch target addresses for subprog calls */
+static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image,
+				       struct codegen_context *ctx, u32 *addrs)
+{
+	const struct bpf_insn *insn = fp->insnsi;
+	bool func_addr_fixed;
+	u64 func_addr;
+	u32 tmp_idx;
+	int i, ret;
+
+	for (i = 0; i < fp->len; i++) {
+		/*
+		 * During the extra pass, only the branch target addresses for
+		 * the subprog calls need to be fixed. All other instructions
+		 * can left untouched.
+		 *
+		 * The JITed image length does not change because we already
+		 * ensure that the JITed instruction sequence for these calls
+		 * are of fixed length by padding them with NOPs.
+		 */
+		if (insn[i].code == (BPF_JMP | BPF_CALL) &&
+		    insn[i].src_reg == BPF_PSEUDO_CALL) {
+			ret = bpf_jit_get_func_addr(fp, &insn[i], true,
+						    &func_addr,
+						    &func_addr_fixed);
+			if (ret < 0)
+				return ret;
+
+			/*
+			 * Save ctx->idx as this would currently point to the
+			 * end of the JITed image and set it to the offset of
+			 * the instruction sequence corresponding to the
+			 * subprog call temporarily.
+			 */
+			tmp_idx = ctx->idx;
+			ctx->idx = addrs[i] / 4;
+			bpf_jit_emit_func_call_rel(image, ctx, func_addr);
+
+			/*
+			 * Restore ctx->idx here. This is safe as the length
+			 * of the JITed sequence remains unchanged.
+			 */
+			ctx->idx = tmp_idx;
+		}
+	}
+
+	return 0;
+}
+
 struct powerpc64_jit_data {
 	struct bpf_binary_header *header;
 	u32 *addrs;
@@ -989,6 +1038,22 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 skip_init_ctx:
 	code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
 
+	if (extra_pass) {
+		/*
+		 * Do not touch the prologue and epilogue as they will remain
+		 * unchanged. Only fix the branch target address for subprog
+		 * calls in the body.
+		 *
+		 * This does not change the offsets and lengths of the subprog
+		 * call instruction sequences and hence, the size of the JITed
+		 * image as well.
+		 */
+		bpf_jit_fixup_subprog_calls(fp, code_base, &cgctx, addrs);
+
+		/* There is no need to perform the usual passes. */
+		goto skip_codegen_passes;
+	}
+
 	/* Code generation passes 1-2 */
 	for (pass = 1; pass < 3; pass++) {
 		/* Now build the prologue, body code & epilogue for real. */
@@ -1002,6 +1067,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 				proglen - (cgctx.idx * 4), cgctx.seen);
 	}
 
+skip_codegen_passes:
 	if (bpf_jit_enable > 1)
 		/*
 		 * Note that we output the base address of the code_base
-- 
GitLab


From ef1b5bf506b1f0ee3edc98533e1f3ecb105eb46a Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Date: Wed, 28 Nov 2018 09:02:41 +0000
Subject: [PATCH 1742/1890] net: phy: Fix not to call phy_resume() if PHY is
 not attached

This patch fixes an issue that mdio_bus_phy_resume() doesn't call
phy_resume() if the PHY is not attached.

Fixes: 803dd9c77ac3 ("net: phy: avoid suspending twice a PHY")
Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy_device.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 18e92c19c5ab8..c4b9008c52d26 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -220,7 +220,7 @@ static LIST_HEAD(phy_fixup_list);
 static DEFINE_MUTEX(phy_fixup_lock);
 
 #ifdef CONFIG_PM
-static bool mdio_bus_phy_may_suspend(struct phy_device *phydev)
+static bool mdio_bus_phy_may_suspend(struct phy_device *phydev, bool suspend)
 {
 	struct device_driver *drv = phydev->mdio.dev.driver;
 	struct phy_driver *phydrv = to_phy_driver(drv);
@@ -232,10 +232,11 @@ static bool mdio_bus_phy_may_suspend(struct phy_device *phydev)
 	/* PHY not attached? May suspend if the PHY has not already been
 	 * suspended as part of a prior call to phy_disconnect() ->
 	 * phy_detach() -> phy_suspend() because the parent netdev might be the
-	 * MDIO bus driver and clock gated at this point.
+	 * MDIO bus driver and clock gated at this point. Also may resume if
+	 * PHY is not attached.
 	 */
 	if (!netdev)
-		return !phydev->suspended;
+		return suspend ? !phydev->suspended : phydev->suspended;
 
 	if (netdev->wol_enabled)
 		return false;
@@ -270,7 +271,7 @@ static int mdio_bus_phy_suspend(struct device *dev)
 	if (phydev->attached_dev && phydev->adjust_link)
 		phy_stop_machine(phydev);
 
-	if (!mdio_bus_phy_may_suspend(phydev))
+	if (!mdio_bus_phy_may_suspend(phydev, true))
 		return 0;
 
 	return phy_suspend(phydev);
@@ -281,7 +282,7 @@ static int mdio_bus_phy_resume(struct device *dev)
 	struct phy_device *phydev = to_phy_device(dev);
 	int ret;
 
-	if (!mdio_bus_phy_may_suspend(phydev))
+	if (!mdio_bus_phy_may_suspend(phydev, false))
 		goto no_resume;
 
 	ret = phy_resume(phydev);
-- 
GitLab


From 8c85f4b81296a530b8af2796c110fa482ac42d4f Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Date: Wed, 28 Nov 2018 09:02:42 +0000
Subject: [PATCH 1743/1890] net: phy: micrel: add toggling phy reset if PHY is
 not attached

This patch adds toggling phy reset if PHY is not attached. Otherwise,
some boards (e.g. R-Car H3 Salvator-XS) cannot link up correctly if
we do the following method:

 1) Kernel boots by using initramfs.
 --> No open the nic, so phy_device_register() and phy_probe()
     deasserts the reset.
 2) Kernel enters the suspend.
 --> So, keep the reset signal as deassert.
 --> On R-Car Salvator-XS board, unfortunately, the board power is
     turned off.
 3) Kernel returns from suspend.
 4) ifconfig eth0 up
 --> Then, since edge signal of the reset doesn't happen,
     it cannot link up.
 5) ifconfig eth0 down
 6) ifconfig eth0 up
 --> In this case, it can link up.

Reported-by: Hiromitsu Yamasaki <hiromitsu.yamasaki.ym@renesas.com>
Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/micrel.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 9265dea794120..1679a6ea104c2 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -23,6 +23,7 @@
  *			 ksz9477
  */
 
+#include <linux/delay.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/phy.h>
@@ -835,6 +836,13 @@ static int kszphy_resume(struct phy_device *phydev)
 {
 	int ret;
 
+	if (!phydev->attached_dev) {
+		/* If the PHY is not attached, toggle the reset */
+		phy_device_reset(phydev, 1);
+		udelay(1);
+		phy_device_reset(phydev, 0);
+	}
+
 	genphy_resume(phydev);
 
 	ret = kszphy_config_reset(phydev);
-- 
GitLab


From e3f787189e10f5fafce77ba8aa948741ebb93c2b Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
Date: Fri, 30 Nov 2018 16:05:53 +0000
Subject: [PATCH 1744/1890] vhost: fix IOTLB locking

Commit 78139c94dc8c ("net: vhost: lock the vqs one by one") moved the vq
lock to improve scalability, but introduced a possible deadlock in
vhost-iotlb. vhost_iotlb_notify_vq() now takes vq->mutex while holding
the device's IOTLB spinlock. And on the vhost_iotlb_miss() path, the
spinlock is taken while holding vq->mutex.

Since calling vhost_poll_queue() doesn't require any lock, avoid the
deadlock by not taking vq->mutex.

Fixes: 78139c94dc8c ("net: vhost: lock the vqs one by one")
Acked-by: Jason Wang <jasowang@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/vhost/vhost.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 3a5f81a66d34f..6b98d8e3a5bf8 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -944,10 +944,7 @@ static void vhost_iotlb_notify_vq(struct vhost_dev *d,
 		if (msg->iova <= vq_msg->iova &&
 		    msg->iova + msg->size - 1 >= vq_msg->iova &&
 		    vq_msg->type == VHOST_IOTLB_MISS) {
-			mutex_lock(&node->vq->mutex);
 			vhost_poll_queue(&node->vq->poll);
-			mutex_unlock(&node->vq->mutex);
-
 			list_del(&node->node);
 			kfree(node);
 		}
-- 
GitLab


From 986103e7920cabc0b910749e77ae5589d3934d52 Mon Sep 17 00:00:00 2001
From: Thomas Falcon <tlfalcon@linux.ibm.com>
Date: Fri, 30 Nov 2018 10:59:08 -0600
Subject: [PATCH 1745/1890] net/ibmvnic: Fix RTNL deadlock during device reset

Commit a5681e20b541 ("net/ibmnvic: Fix deadlock problem
in reset") made the change to hold the RTNL lock during
driver reset but still calls netdev_notify_peers, which
results in a deadlock. Instead, use call_netdevice_notifiers,
which is functionally the same except that it does not
take the RTNL lock again.

Fixes: a5681e20b541 ("net/ibmnvic: Fix deadlock problem in reset")
Signed-off-by: Thomas Falcon <tlfalcon@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index c0203a0d5e3b8..ed50b8dee44f3 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1859,7 +1859,7 @@ static int do_reset(struct ibmvnic_adapter *adapter,
 
 	if (adapter->reset_reason != VNIC_RESET_FAILOVER &&
 	    adapter->reset_reason != VNIC_RESET_CHANGE_PARAM)
-		netdev_notify_peers(netdev);
+		call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, netdev);
 
 	netif_carrier_on(netdev);
 
-- 
GitLab


From fb6df5a6234c38a9c551559506a49a677ac6f07a Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Sat, 1 Dec 2018 01:36:59 +0800
Subject: [PATCH 1746/1890] sctp: kfree_rcu asoc

In sctp_hash_transport/sctp_epaddr_lookup_transport, it dereferences
a transport's asoc under rcu_read_lock while asoc is freed not after
a grace period, which leads to a use-after-free panic.

This patch fixes it by calling kfree_rcu to make asoc be freed after
a grace period.

Note that only the asoc's memory is delayed to free in the patch, it
won't cause sk to linger longer.

Thanks Neil and Marcelo to make this clear.

Fixes: 7fda702f9315 ("sctp: use new rhlist interface on sctp transport rhashtable")
Fixes: cd2b70875058 ("sctp: check duplicate node before inserting a new transport")
Reported-by: syzbot+0b05d8aa7cb185107483@syzkaller.appspotmail.com
Reported-by: syzbot+aad231d51b1923158444@syzkaller.appspotmail.com
Suggested-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h | 2 ++
 net/sctp/associola.c       | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index a11f937904769..feada358d872f 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -2075,6 +2075,8 @@ struct sctp_association {
 
 	__u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1];
 	__u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1];
+
+	struct rcu_head rcu;
 };
 
 
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index dd77ec3892b6a..914750b819b26 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -435,7 +435,7 @@ static void sctp_association_destroy(struct sctp_association *asoc)
 
 	WARN_ON(atomic_read(&asoc->rmem_alloc));
 
-	kfree(asoc);
+	kfree_rcu(asoc, rcu);
 	SCTP_DBG_OBJCNT_DEC(assoc);
 }
 
-- 
GitLab


From 59f997b088d26a774958cb7b17b0763cd82de7ec Mon Sep 17 00:00:00 2001
From: Matteo Croce <mcroce@redhat.com>
Date: Sat, 1 Dec 2018 00:26:27 +0100
Subject: [PATCH 1747/1890] macvlan: return correct error value

A MAC address must be unique among all the macvlan devices with the same
lower device. The only exception is the passthru [sic] mode,
which shares the lower device address.

When duplicate addresses are detected, EBUSY is returned when bringing
the interface up:

    # ip link add macvlan0 link eth0 type macvlan
    # read addr </sys/class/net/eth0/address
    # ip link set macvlan0 address $addr
    # ip link set macvlan0 up
    RTNETLINK answers: Device or resource busy

Use correct error code which is EADDRINUSE, and do the check also
earlier, on address change:

    # ip link set macvlan0 address $addr
    RTNETLINK answers: Address already in use

Signed-off-by: Matteo Croce <mcroce@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index fc8d5f1ee1add..0da3d36b283be 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -608,7 +608,7 @@ static int macvlan_open(struct net_device *dev)
 		goto hash_add;
 	}
 
-	err = -EBUSY;
+	err = -EADDRINUSE;
 	if (macvlan_addr_busy(vlan->port, dev->dev_addr))
 		goto out;
 
@@ -706,7 +706,7 @@ static int macvlan_sync_address(struct net_device *dev, unsigned char *addr)
 	} else {
 		/* Rehash and update the device filters */
 		if (macvlan_addr_busy(vlan->port, addr))
-			return -EBUSY;
+			return -EADDRINUSE;
 
 		if (!macvlan_passthru(port)) {
 			err = dev_uc_add(lowerdev, addr);
@@ -747,6 +747,9 @@ static int macvlan_set_mac_address(struct net_device *dev, void *p)
 		return dev_set_mac_address(vlan->lowerdev, addr);
 	}
 
+	if (macvlan_addr_busy(vlan->port, addr->sa_data))
+		return -EADDRINUSE;
+
 	return macvlan_sync_address(dev, addr->sa_data);
 }
 
-- 
GitLab


From a74515604a7b171f2702bdcbd1e231225fb456d0 Mon Sep 17 00:00:00 2001
From: Anderson Luiz Alves <alacn1@gmail.com>
Date: Fri, 30 Nov 2018 21:58:36 -0200
Subject: [PATCH 1748/1890] mv88e6060: disable hardware level MAC learning

Disable hardware level MAC learning because it breaks station roaming.
When enabled it drops all frames that arrive from a MAC address
that is on a different port at learning table.

Signed-off-by: Anderson Luiz Alves <alacn1@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6060.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c
index 65f10fec25b39..0b3e51f248c21 100644
--- a/drivers/net/dsa/mv88e6060.c
+++ b/drivers/net/dsa/mv88e6060.c
@@ -116,8 +116,7 @@ static int mv88e6060_switch_reset(struct dsa_switch *ds)
 	/* Reset the switch. */
 	REG_WRITE(REG_GLOBAL, GLOBAL_ATU_CONTROL,
 		  GLOBAL_ATU_CONTROL_SWRESET |
-		  GLOBAL_ATU_CONTROL_ATUSIZE_1024 |
-		  GLOBAL_ATU_CONTROL_ATE_AGE_5MIN);
+		  GLOBAL_ATU_CONTROL_LEARNDIS);
 
 	/* Wait up to one second for reset to complete. */
 	timeout = jiffies + 1 * HZ;
@@ -142,13 +141,10 @@ static int mv88e6060_setup_global(struct dsa_switch *ds)
 	 */
 	REG_WRITE(REG_GLOBAL, GLOBAL_CONTROL, GLOBAL_CONTROL_MAX_FRAME_1536);
 
-	/* Enable automatic address learning, set the address
-	 * database size to 1024 entries, and set the default aging
-	 * time to 5 minutes.
+	/* Disable automatic address learning.
 	 */
 	REG_WRITE(REG_GLOBAL, GLOBAL_ATU_CONTROL,
-		  GLOBAL_ATU_CONTROL_ATUSIZE_1024 |
-		  GLOBAL_ATU_CONTROL_ATE_AGE_5MIN);
+		  GLOBAL_ATU_CONTROL_LEARNDIS);
 
 	return 0;
 }
-- 
GitLab


From b07b864ee4232b03125992a8f6a490b040adcb6a Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Mon, 3 Dec 2018 18:05:17 -0600
Subject: [PATCH 1749/1890] Revert "PCI/ASPM: Do not initialize link state when
 aspm_disabled is set"

This reverts commit 17c91487364fb33797ed84022564ee7544ac4945.

Rafael found that this commit broke the SD card reader in his
Acer Aspire S5.  Details of the problem are in the bugzilla below.

Fixes: 17c91487364f ("PCI/ASPM: Do not initialize link state when aspm_disabled is set")
Link: https://bugzilla.kernel.org/show_bug.cgi?id=201801
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pcie/aspm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index dcb29cb76dc69..f78860ce884bc 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -895,7 +895,7 @@ void pcie_aspm_init_link_state(struct pci_dev *pdev)
 	struct pcie_link_state *link;
 	int blacklist = !!pcie_aspm_sanity_check(pdev);
 
-	if (!aspm_support_enabled || aspm_disabled)
+	if (!aspm_support_enabled)
 		return;
 
 	if (pdev->link_state)
-- 
GitLab


From bf29e9e9b6d2f09cdbf39b48d028f0b49e944f85 Mon Sep 17 00:00:00 2001
From: Qian Cai <cai@gmx.us>
Date: Sat, 1 Dec 2018 21:11:19 -0500
Subject: [PATCH 1750/1890] net/core: tidy up an error message

netif_napi_add() could report an error like this below due to it allows
to pass a format string for wildcarding before calling
dev_get_valid_name(),

"netif_napi_add() called with weight 256 on device eth%d"

For example, hns_enet_drv module does this.

hns_nic_try_get_ae
  hns_nic_init_ring_data
    netif_napi_add
  register_netdev
    dev_get_valid_name

Hence, make it a bit more human-readable by using netdev_err_once()
instead.

Signed-off-by: Qian Cai <cai@gmx.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 3470e7fff1f47..e06223b65674e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6209,8 +6209,8 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
 	napi->skb = NULL;
 	napi->poll = poll;
 	if (weight > NAPI_POLL_WEIGHT)
-		pr_err_once("netif_napi_add() called with weight %d on device %s\n",
-			    weight, dev->name);
+		netdev_err_once(dev, "%s() called with weight %d\n", __func__,
+				weight);
 	napi->weight = weight;
 	list_add(&napi->dev_list, &dev->napi_list);
 	napi->dev = dev;
-- 
GitLab


From 24be19e47779d604d1492c114459dca9a92acf78 Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Sun, 2 Dec 2018 14:34:36 +0200
Subject: [PATCH 1751/1890] net/mlx4_en: Change min MTU size to ETH_MIN_MTU

NIC driver minimal MTU size shall be set to ETH_MIN_MTU, as defined in
the RFC791 and in the network stack. Remove old mlx4_en only define for
it, which was set to wrong value.

Fixes: b80f71f5816f ("ethernet/mellanox: use core min/max MTU checking")
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 4 ++--
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h   | 1 -
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index b744cd49a7856..6b88881b8e358 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -3493,8 +3493,8 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 		dev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM;
 	}
 
-	/* MTU range: 46 - hw-specific max */
-	dev->min_mtu = MLX4_EN_MIN_MTU;
+	/* MTU range: 68 - hw-specific max */
+	dev->min_mtu = ETH_MIN_MTU;
 	dev->max_mtu = priv->max_mtu;
 
 	mdev->pndev[port] = dev;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 485d856546c6c..8137454e25349 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -161,7 +161,6 @@
 #define MLX4_SELFTEST_LB_MIN_MTU (MLX4_LOOPBACK_TEST_PAYLOAD + NET_IP_ALIGN + \
 				  ETH_HLEN + PREAMBLE_LEN)
 
-#define MLX4_EN_MIN_MTU		46
 /* VLAN_HLEN is added twice,to support skb vlan tagged with multiple
  * headers. (For example: ETH_P_8021Q and ETH_P_8021AD).
  */
-- 
GitLab


From 1b603f9e4313348608f256b564ed6e3d9e67f377 Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Sun, 2 Dec 2018 14:34:37 +0200
Subject: [PATCH 1752/1890] net/mlx4_en: Fix build break when CONFIG_INET is
 off
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

MLX4_EN depends on NETDEVICES, ETHERNET and INET Kconfigs.
Make sure they are listed in MLX4_EN Kconfig dependencies.

This fixes the following build break:

drivers/net/ethernet/mellanox/mlx4/en_rx.c:582:18: warning: â€˜struct iphdrâ€™ declared inside parameter list [enabled by default]
struct iphdr *iph)
^
drivers/net/ethernet/mellanox/mlx4/en_rx.c:582:18: warning: its scope is only this definition or declaration, which is probably not what you want [enabled by default]
drivers/net/ethernet/mellanox/mlx4/en_rx.c: In function â€˜get_fixed_ipv4_csumâ€™:
drivers/net/ethernet/mellanox/mlx4/en_rx.c:586:20: error: dereferencing pointer to incomplete type
_u8 ipproto = iph->protocol;

Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/Kconfig b/drivers/net/ethernet/mellanox/mlx4/Kconfig
index 36054e6fb9d34..f200b8c420d57 100644
--- a/drivers/net/ethernet/mellanox/mlx4/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx4/Kconfig
@@ -5,7 +5,7 @@
 config MLX4_EN
 	tristate "Mellanox Technologies 1/10/40Gbit Ethernet support"
 	depends on MAY_USE_DEVLINK
-	depends on PCI
+	depends on PCI && NETDEVICES && ETHERNET && INET
 	select MLX4_CORE
 	imply PTP_1588_CLOCK
 	---help---
-- 
GitLab


From 43a1b0cb4cd6dbfd3cd9c10da663368394d299d8 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Fri, 24 Aug 2018 02:16:12 +0900
Subject: [PATCH 1753/1890] kprobes/x86: Fix instruction patching corruption
 when copying more than one RIP-relative instruction

After copy_optimized_instructions() copies several instructions
to the working buffer it tries to fix up the real RIP address, but it
adjusts the RIP-relative instruction with an incorrect RIP address
for the 2nd and subsequent instructions due to a bug in the logic.

This will break the kernel pretty badly (with likely outcomes such as
a kernel freeze, a crash, or worse) because probed instructions can refer
to the wrong data.

For example putting kprobes on cpumask_next() typically hits this bug.

cpumask_next() is normally like below if CONFIG_CPUMASK_OFFSTACK=y
(in this case nr_cpumask_bits is an alias of nr_cpu_ids):

 <cpumask_next>:
	48 89 f0		mov    %rsi,%rax
	8b 35 7b fb e2 00	mov    0xe2fb7b(%rip),%esi # ffffffff82db9e64 <nr_cpu_ids>
	55			push   %rbp
...

If we put a kprobe on it and it gets jump-optimized, it gets
patched by the kprobes code like this:

 <cpumask_next>:
	e9 95 7d 07 1e		jmpq   0xffffffffa000207a
	7b fb			jnp    0xffffffff81f8a2e2 <cpumask_next+2>
	e2 00			loop   0xffffffff81f8a2e9 <cpumask_next+9>
	55			push   %rbp

This shows that the first two MOV instructions were copied to a
trampoline buffer at 0xffffffffa000207a.

Here is the disassembled result of the trampoline, skipping
the optprobe template instructions:

	# Dump of assembly code from 0xffffffffa000207a to 0xffffffffa00020ea:

	54			push   %rsp
	...
	48 83 c4 08		add    $0x8,%rsp
	9d			popfq
	48 89 f0		mov    %rsi,%rax
	8b 35 82 7d db e2	mov    -0x1d24827e(%rip),%esi # 0xffffffff82db9e67 <nr_cpu_ids+3>

This dump shows that the second MOV accesses *(nr_cpu_ids+3) instead of
the original *nr_cpu_ids. This leads to a kernel freeze because
cpumask_next() always returns 0 and for_each_cpu() never ends.

Fix this by adding 'len' correctly to the real RIP address while
copying.

[ mingo: Improved the changelog. ]

Reported-by: Michael Rodin <michael@rodin.online>
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org # v4.15+
Fixes: 63fef14fc98a ("kprobes/x86: Make insn buffer always ROX and use text_poke()")
Link: http://lkml.kernel.org/r/153504457253.22602.1314289671019919596.stgit@devbox
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/kprobes/opt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 40b16b2706560..6adf6e6c29339 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -189,7 +189,7 @@ static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real)
 	int len = 0, ret;
 
 	while (len < RELATIVEJUMP_SIZE) {
-		ret = __copy_instruction(dest + len, src + len, real, &insn);
+		ret = __copy_instruction(dest + len, src + len, real + len, &insn);
 		if (!ret || !can_boost(&insn, src + len))
 			return -EINVAL;
 		len += ret;
-- 
GitLab


From b31a3ca745a4a47ba63208d37cd50abffe58280f Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Tue, 4 Dec 2018 06:32:15 +0000
Subject: [PATCH 1754/1890] drm/fb-helper: Fix typo in parameter description

Fix typo in parameter description.

Fixes: 4be9bd10e22d ("drm/fb_helper: Allow leaking fbdev smem_start")
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Acked-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1543905135-35293-1-git-send-email-weiyongjun1@huawei.com
---
 drivers/gpu/drm/drm_fb_helper.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index dd852a25d3754..9d64f874f965b 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -71,7 +71,7 @@ MODULE_PARM_DESC(drm_fbdev_overalloc,
 #if IS_ENABLED(CONFIG_DRM_FBDEV_LEAK_PHYS_SMEM)
 static bool drm_leak_fbdev_smem = false;
 module_param_unsafe(drm_leak_fbdev_smem, bool, 0600);
-MODULE_PARM_DESC(fbdev_emulation,
+MODULE_PARM_DESC(drm_leak_fbdev_smem,
 		 "Allow unsafe leaking fbdev physical smem address [default=false]");
 #endif
 
-- 
GitLab


From 10950929e994c5ecee149ff0873388d3c98f12b5 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Fri, 23 Nov 2018 09:06:36 +0800
Subject: [PATCH 1755/1890] btrfs: tree-checker: Don't check max block group
 size as current max chunk size limit is unreliable

[BUG]
A completely valid btrfs will refuse to mount, with error message like:
  BTRFS critical (device sdb2): corrupt leaf: root=2 block=239681536 slot=172 \
    bg_start=12018974720 bg_len=10888413184, invalid block group size, \
    have 10888413184 expect (0, 10737418240]

This has been reported several times as the 4.19 kernel is now being
used. The filesystem refuses to mount, but is otherwise ok and booting
4.18 is a workaround.

Btrfs check returns no error, and all kernels used on this fs is later
than 2011, which should all have the 10G size limit commit.

[CAUSE]
For a 12 devices btrfs, we could allocate a chunk larger than 10G due to
stripe stripe bump up.

__btrfs_alloc_chunk()
|- max_stripe_size = 1G
|- max_chunk_size = 10G
|- data_stripe = 11
|- if (1G * 11 > 10G) {
       stripe_size = 976128930;
       stripe_size = round_up(976128930, SZ_16M) = 989855744

However the final stripe_size (989855744) * 11 = 10888413184, which is
still larger than 10G.

[FIX]
For the comprehensive check, we need to do the full check at chunk read
time, and rely on bg <-> chunk mapping to do the check.

We could just skip the length check for now.

Fixes: fce466eab7ac ("btrfs: tree-checker: Verify block_group_item")
Cc: stable@vger.kernel.org # v4.19+
Reported-by: Wang Yugui <wangyugui@e16-tech.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/tree-checker.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index efcf89a8ba44c..1a4e2b101ef24 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -389,13 +389,11 @@ static int check_block_group_item(struct btrfs_fs_info *fs_info,
 
 	/*
 	 * Here we don't really care about alignment since extent allocator can
-	 * handle it.  We care more about the size, as if one block group is
-	 * larger than maximum size, it's must be some obvious corruption.
+	 * handle it.  We care more about the size.
 	 */
-	if (key->offset > BTRFS_MAX_DATA_CHUNK_SIZE || key->offset == 0) {
+	if (key->offset == 0) {
 		block_group_err(fs_info, leaf, slot,
-			"invalid block group size, have %llu expect (0, %llu]",
-				key->offset, BTRFS_MAX_DATA_CHUNK_SIZE);
+				"invalid block group size 0");
 		return -EUCLEAN;
 	}
 
-- 
GitLab


From a72173ecfc6774cf2d55de9fb29421ce69e3428c Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 3 Dec 2018 13:04:18 +0100
Subject: [PATCH 1756/1890] Revert "exec: make de_thread() freezable"

Revert commit c22397888f1e "exec: make de_thread() freezable" as
requested by Ingo Molnar:

"So there's a new regression in v4.20-rc4, my desktop produces this
lockdep splat:

[ 1772.588771] WARNING: pkexec/4633 still has locks held!
[ 1772.588773] 4.20.0-rc4-custom-00213-g93a49841322b #1 Not tainted
[ 1772.588775] ------------------------------------
[ 1772.588776] 1 lock held by pkexec/4633:
[ 1772.588778]  #0: 00000000ed85fbf8 (&sig->cred_guard_mutex){+.+.}, at: prepare_bprm_creds+0x2a/0x70
[ 1772.588786] stack backtrace:
[ 1772.588789] CPU: 7 PID: 4633 Comm: pkexec Not tainted 4.20.0-rc4-custom-00213-g93a49841322b #1
[ 1772.588792] Call Trace:
[ 1772.588800]  dump_stack+0x85/0xcb
[ 1772.588803]  flush_old_exec+0x116/0x890
[ 1772.588807]  ? load_elf_phdrs+0x72/0xb0
[ 1772.588809]  load_elf_binary+0x291/0x1620
[ 1772.588815]  ? sched_clock+0x5/0x10
[ 1772.588817]  ? search_binary_handler+0x6d/0x240
[ 1772.588820]  search_binary_handler+0x80/0x240
[ 1772.588823]  load_script+0x201/0x220
[ 1772.588825]  search_binary_handler+0x80/0x240
[ 1772.588828]  __do_execve_file.isra.32+0x7d2/0xa60
[ 1772.588832]  ? strncpy_from_user+0x40/0x180
[ 1772.588835]  __x64_sys_execve+0x34/0x40
[ 1772.588838]  do_syscall_64+0x60/0x1c0

The warning gets triggered by an ancient lockdep check in the freezer:

(gdb) list *0xffffffff812ece06
0xffffffff812ece06 is in flush_old_exec (./include/linux/freezer.h:57).
52	 * DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION
53	 * If try_to_freeze causes a lockdep warning it means the caller may deadlock
54	 */
55	static inline bool try_to_freeze_unsafe(void)
56	{
57		might_sleep();
58		if (likely(!freezing(current)))
59			return false;
60		return __refrigerator(false);
61	}

I reviewed the ->cred_guard_mutex code, and the mutex is held across all
of exec() - and we always did this.

But there's this recent -rc4 commit:

> Chanho Min (1):
>       exec: make de_thread() freezable

  c22397888f1e: exec: make de_thread() freezable

I believe this commit is bogus, you cannot call try_to_freeze() from
de_thread(), because it's holding the ->cred_guard_mutex."

Reported-by: Ingo Molnar <mingo@kernel.org>
Tested-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 fs/exec.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index acc3a5536384c..fc281b738a982 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -62,7 +62,6 @@
 #include <linux/oom.h>
 #include <linux/compat.h>
 #include <linux/vmalloc.h>
-#include <linux/freezer.h>
 
 #include <linux/uaccess.h>
 #include <asm/mmu_context.h>
@@ -1084,7 +1083,7 @@ static int de_thread(struct task_struct *tsk)
 	while (sig->notify_count) {
 		__set_current_state(TASK_KILLABLE);
 		spin_unlock_irq(lock);
-		freezable_schedule();
+		schedule();
 		if (unlikely(__fatal_signal_pending(tsk)))
 			goto killed;
 		spin_lock_irq(lock);
@@ -1112,7 +1111,7 @@ static int de_thread(struct task_struct *tsk)
 			__set_current_state(TASK_KILLABLE);
 			write_unlock_irq(&tasklist_lock);
 			cgroup_threadgroup_change_end(tsk);
-			freezable_schedule();
+			schedule();
 			if (unlikely(__fatal_signal_pending(tsk)))
 				goto killed;
 		}
-- 
GitLab


From c3494801cd1785e2c25f1a5735fa19ddcf9665da Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Mon, 3 Dec 2018 22:46:04 -0800
Subject: [PATCH 1757/1890] bpf: check pending signals while verifying programs

Malicious user space may try to force the verifier to use as much cpu
time and memory as possible. Hence check for pending signals
while verifying the program.
Note that suspend of sys_bpf(PROG_LOAD) syscall will lead to EAGAIN,
since the kernel has to release the resources used for program verification.

Reported-by: Anatoly Trosinenko <anatoly.trosinenko@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/verifier.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 6dd419550aba4..751bb30b7c5cc 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5148,6 +5148,9 @@ static int do_check(struct bpf_verifier_env *env)
 			goto process_bpf_exit;
 		}
 
+		if (signal_pending(current))
+			return -EAGAIN;
+
 		if (need_resched())
 			cond_resched();
 
-- 
GitLab


From 4f7b3e82589e0de723780198ec7983e427144c0a Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Mon, 3 Dec 2018 22:46:05 -0800
Subject: [PATCH 1758/1890] bpf: improve verifier branch analysis

pathological bpf programs may try to force verifier to explode in
the number of branch states:
  20: (d5) if r1 s<= 0x24000028 goto pc+0
  21: (b5) if r0 <= 0xe1fa20 goto pc+2
  22: (d5) if r1 s<= 0x7e goto pc+0
  23: (b5) if r0 <= 0xe880e000 goto pc+0
  24: (c5) if r0 s< 0x2100ecf4 goto pc+0
  25: (d5) if r1 s<= 0xe880e000 goto pc+1
  26: (c5) if r0 s< 0xf4041810 goto pc+0
  27: (d5) if r1 s<= 0x1e007e goto pc+0
  28: (b5) if r0 <= 0xe86be000 goto pc+0
  29: (07) r0 += 16614
  30: (c5) if r0 s< 0x6d0020da goto pc+0
  31: (35) if r0 >= 0x2100ecf4 goto pc+0

Teach verifier to recognize always taken and always not taken branches.
This analysis is already done for == and != comparison.
Expand it to all other branches.

It also helps real bpf programs to be verified faster:
                       before  after
bpf_lb-DLB_L3.o         2003    1940
bpf_lb-DLB_L4.o         3173    3089
bpf_lb-DUNKNOWN.o       1080    1065
bpf_lxc-DDROP_ALL.o     29584   28052
bpf_lxc-DUNKNOWN.o      36916   35487
bpf_netdev.o            11188   10864
bpf_overlay.o           6679    6643
bpf_lcx_jit.o           39555   38437

Reported-by: Anatoly Trosinenko <anatoly.trosinenko@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/verifier.c                       | 93 ++++++++++++++++++---
 tools/testing/selftests/bpf/test_verifier.c |  4 +-
 2 files changed, 82 insertions(+), 15 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 751bb30b7c5cc..55a49703f423e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3751,6 +3751,79 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
 	}
 }
 
+/* compute branch direction of the expression "if (reg opcode val) goto target;"
+ * and return:
+ *  1 - branch will be taken and "goto target" will be executed
+ *  0 - branch will not be taken and fall-through to next insn
+ * -1 - unknown. Example: "if (reg < 5)" is unknown when register value range [0,10]
+ */
+static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
+{
+	if (__is_pointer_value(false, reg))
+		return -1;
+
+	switch (opcode) {
+	case BPF_JEQ:
+		if (tnum_is_const(reg->var_off))
+			return !!tnum_equals_const(reg->var_off, val);
+		break;
+	case BPF_JNE:
+		if (tnum_is_const(reg->var_off))
+			return !tnum_equals_const(reg->var_off, val);
+		break;
+	case BPF_JGT:
+		if (reg->umin_value > val)
+			return 1;
+		else if (reg->umax_value <= val)
+			return 0;
+		break;
+	case BPF_JSGT:
+		if (reg->smin_value > (s64)val)
+			return 1;
+		else if (reg->smax_value < (s64)val)
+			return 0;
+		break;
+	case BPF_JLT:
+		if (reg->umax_value < val)
+			return 1;
+		else if (reg->umin_value >= val)
+			return 0;
+		break;
+	case BPF_JSLT:
+		if (reg->smax_value < (s64)val)
+			return 1;
+		else if (reg->smin_value >= (s64)val)
+			return 0;
+		break;
+	case BPF_JGE:
+		if (reg->umin_value >= val)
+			return 1;
+		else if (reg->umax_value < val)
+			return 0;
+		break;
+	case BPF_JSGE:
+		if (reg->smin_value >= (s64)val)
+			return 1;
+		else if (reg->smax_value < (s64)val)
+			return 0;
+		break;
+	case BPF_JLE:
+		if (reg->umax_value <= val)
+			return 1;
+		else if (reg->umin_value > val)
+			return 0;
+		break;
+	case BPF_JSLE:
+		if (reg->smax_value <= (s64)val)
+			return 1;
+		else if (reg->smin_value > (s64)val)
+			return 0;
+		break;
+	}
+
+	return -1;
+}
+
 /* Adjusts the register min/max values in the case that the dst_reg is the
  * variable register that we are working on, and src_reg is a constant or we're
  * simply doing a BPF_K check.
@@ -4152,21 +4225,15 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
 
 	dst_reg = &regs[insn->dst_reg];
 
-	/* detect if R == 0 where R was initialized to zero earlier */
-	if (BPF_SRC(insn->code) == BPF_K &&
-	    (opcode == BPF_JEQ || opcode == BPF_JNE) &&
-	    dst_reg->type == SCALAR_VALUE &&
-	    tnum_is_const(dst_reg->var_off)) {
-		if ((opcode == BPF_JEQ && dst_reg->var_off.value == insn->imm) ||
-		    (opcode == BPF_JNE && dst_reg->var_off.value != insn->imm)) {
-			/* if (imm == imm) goto pc+off;
-			 * only follow the goto, ignore fall-through
-			 */
+	if (BPF_SRC(insn->code) == BPF_K) {
+		int pred = is_branch_taken(dst_reg, insn->imm, opcode);
+
+		if (pred == 1) {
+			 /* only follow the goto, ignore fall-through */
 			*insn_idx += insn->off;
 			return 0;
-		} else {
-			/* if (imm != imm) goto pc+off;
-			 * only follow fall-through branch, since
+		} else if (pred == 0) {
+			/* only follow fall-through branch, since
 			 * that's where the program will go
 			 */
 			return 0;
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 5dd4410a716ce..df6f751cc1e81 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -8576,7 +8576,7 @@ static struct bpf_test tests[] = {
 			BPF_JMP_IMM(BPF_JA, 0, 0, -7),
 		},
 		.fixup_map_hash_8b = { 4 },
-		.errstr = "R0 invalid mem access 'inv'",
+		.errstr = "unbounded min value",
 		.result = REJECT,
 	},
 	{
@@ -10547,7 +10547,7 @@ static struct bpf_test tests[] = {
 		"check deducing bounds from const, 5",
 		.insns = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
-			BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1),
+			BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 1, 1),
 			BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
 			BPF_EXIT_INSN(),
 		},
-- 
GitLab


From ceefbc96fa5c5b975d87bf8e89ba8416f6b764d9 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Mon, 3 Dec 2018 22:46:06 -0800
Subject: [PATCH 1759/1890] bpf: add per-insn complexity limit

malicious bpf program may try to force the verifier to remember
a lot of distinct verifier states.
Put a limit to number of per-insn 'struct bpf_verifier_state'.
Note that hitting the limit doesn't reject the program.
It potentially makes the verifier do more steps to analyze the program.
It means that malicious programs will hit BPF_COMPLEXITY_LIMIT_INSNS sooner
instead of spending cpu time walking long link list.

The limit of BPF_COMPLEXITY_LIMIT_STATES==64 affects cilium progs
with slight increase in number of "steps" it takes to successfully verify
the programs:
                       before    after
bpf_lb-DLB_L3.o         1940      1940
bpf_lb-DLB_L4.o         3089      3089
bpf_lb-DUNKNOWN.o       1065      1065
bpf_lxc-DDROP_ALL.o     28052  |  28162
bpf_lxc-DUNKNOWN.o      35487  |  35541
bpf_netdev.o            10864     10864
bpf_overlay.o           6643      6643
bpf_lcx_jit.o           38437     38437

But it also makes malicious program to be rejected in 0.4 seconds vs 6.5
Hence apply this limit to unprivileged programs only.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/verifier.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 55a49703f423e..fc760d00a38c4 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -175,6 +175,7 @@ struct bpf_verifier_stack_elem {
 
 #define BPF_COMPLEXITY_LIMIT_INSNS	131072
 #define BPF_COMPLEXITY_LIMIT_STACK	1024
+#define BPF_COMPLEXITY_LIMIT_STATES	64
 
 #define BPF_MAP_PTR_UNPRIV	1UL
 #define BPF_MAP_PTR_POISON	((void *)((0xeB9FUL << 1) +	\
@@ -5047,7 +5048,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
 	struct bpf_verifier_state_list *new_sl;
 	struct bpf_verifier_state_list *sl;
 	struct bpf_verifier_state *cur = env->cur_state, *new;
-	int i, j, err;
+	int i, j, err, states_cnt = 0;
 
 	sl = env->explored_states[insn_idx];
 	if (!sl)
@@ -5074,8 +5075,12 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
 			return 1;
 		}
 		sl = sl->next;
+		states_cnt++;
 	}
 
+	if (!env->allow_ptr_leaks && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
+		return 0;
+
 	/* there were no equivalent states, remember current one.
 	 * technically the current state is not proven to be safe yet,
 	 * but it will either reach outer most bpf_exit (which means it's safe)
-- 
GitLab


From 7b566f70e1bf65b189b66eb3de6f431c30f7dff2 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 4 Dec 2018 08:47:44 -0800
Subject: [PATCH 1760/1890] phy: Revert toggling reset changes.

This reverts:

ef1b5bf506b1 ("net: phy: Fix not to call phy_resume() if PHY is not attached")
8c85f4b81296 ("net: phy: micrel: add toggling phy reset if PHY is not  attached")

Andrew Lunn informs me that there are alternative efforts
underway to fix this more properly.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/micrel.c     |  8 --------
 drivers/net/phy/phy_device.c | 11 +++++------
 2 files changed, 5 insertions(+), 14 deletions(-)

diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 1679a6ea104c2..9265dea794120 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -23,7 +23,6 @@
  *			 ksz9477
  */
 
-#include <linux/delay.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/phy.h>
@@ -836,13 +835,6 @@ static int kszphy_resume(struct phy_device *phydev)
 {
 	int ret;
 
-	if (!phydev->attached_dev) {
-		/* If the PHY is not attached, toggle the reset */
-		phy_device_reset(phydev, 1);
-		udelay(1);
-		phy_device_reset(phydev, 0);
-	}
-
 	genphy_resume(phydev);
 
 	ret = kszphy_config_reset(phydev);
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index c4b9008c52d26..18e92c19c5ab8 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -220,7 +220,7 @@ static LIST_HEAD(phy_fixup_list);
 static DEFINE_MUTEX(phy_fixup_lock);
 
 #ifdef CONFIG_PM
-static bool mdio_bus_phy_may_suspend(struct phy_device *phydev, bool suspend)
+static bool mdio_bus_phy_may_suspend(struct phy_device *phydev)
 {
 	struct device_driver *drv = phydev->mdio.dev.driver;
 	struct phy_driver *phydrv = to_phy_driver(drv);
@@ -232,11 +232,10 @@ static bool mdio_bus_phy_may_suspend(struct phy_device *phydev, bool suspend)
 	/* PHY not attached? May suspend if the PHY has not already been
 	 * suspended as part of a prior call to phy_disconnect() ->
 	 * phy_detach() -> phy_suspend() because the parent netdev might be the
-	 * MDIO bus driver and clock gated at this point. Also may resume if
-	 * PHY is not attached.
+	 * MDIO bus driver and clock gated at this point.
 	 */
 	if (!netdev)
-		return suspend ? !phydev->suspended : phydev->suspended;
+		return !phydev->suspended;
 
 	if (netdev->wol_enabled)
 		return false;
@@ -271,7 +270,7 @@ static int mdio_bus_phy_suspend(struct device *dev)
 	if (phydev->attached_dev && phydev->adjust_link)
 		phy_stop_machine(phydev);
 
-	if (!mdio_bus_phy_may_suspend(phydev, true))
+	if (!mdio_bus_phy_may_suspend(phydev))
 		return 0;
 
 	return phy_suspend(phydev);
@@ -282,7 +281,7 @@ static int mdio_bus_phy_resume(struct device *dev)
 	struct phy_device *phydev = to_phy_device(dev);
 	int ret;
 
-	if (!mdio_bus_phy_may_suspend(phydev, false))
+	if (!mdio_bus_phy_may_suspend(phydev))
 		goto no_resume;
 
 	ret = phy_resume(phydev);
-- 
GitLab


From a579121f94aba4e8bad1a121a0fad050d6925296 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 27 Nov 2018 11:01:43 -0800
Subject: [PATCH 1761/1890] xfs: fix PAGE_MASK usage in xfs_free_file_space

In commit e53c4b598, I *tried* to teach xfs to force writeback when we
fzero/fpunch right up to EOF so that if EOF is in the middle of a page,
the post-EOF part of the page gets zeroed before we return to userspace.
Unfortunately, I missed the part where PAGE_MASK is ~(PAGE_SIZE - 1),
which means that we totally fail to zero if we're fpunching and EOF is
within the first page.  Worse yet, the same PAGE_MASK thinko plagues the
filemap_write_and_wait_range call, so we'd initiate writeback of the
entire file, which (mostly) masked the thinko.

Drop the tricky PAGE_MASK and replace it with correct usage of PAGE_SIZE
and the proper rounding macros.

Fixes: e53c4b598 ("xfs: ensure post-EOF zeroing happens after zeroing part of a file")
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_bmap_util.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 404e581f1ea1e..1ee8c5539fa4f 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1126,9 +1126,9 @@ xfs_free_file_space(
 	 * page could be mmap'd and iomap_zero_range doesn't do that for us.
 	 * Writeback of the eof page will do this, albeit clumsily.
 	 */
-	if (offset + len >= XFS_ISIZE(ip) && ((offset + len) & PAGE_MASK)) {
+	if (offset + len >= XFS_ISIZE(ip) && offset_in_page(offset + len) > 0) {
 		error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
-				(offset + len) & ~PAGE_MASK, LLONG_MAX);
+				round_down(offset + len, PAGE_SIZE), LLONG_MAX);
 	}
 
 	return error;
-- 
GitLab


From 7d048df4e9b05ba89b74d062df59498aa81f3785 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Fri, 30 Nov 2018 07:55:57 -0800
Subject: [PATCH 1762/1890] xfs: fix inverted return from
 xfs_btree_sblock_verify_crc

xfs_btree_sblock_verify_crc is a bool so should not be returning
a failaddr_t; worse, if xfs_log_check_lsn fails it returns
__this_address which looks like a boolean true (i.e. success)
to the caller.

(interestingly xfs_btree_lblock_verify_crc doesn't have the issue)

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/libxfs/xfs_btree.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 34c6d7bd4d180..bbdae2b4559fc 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -330,7 +330,7 @@ xfs_btree_sblock_verify_crc(
 
 	if (xfs_sb_version_hascrc(&mp->m_sb)) {
 		if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn)))
-			return __this_address;
+			return false;
 		return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
 	}
 
-- 
GitLab


From 6744557b53a2b710ebce3736a5c27a0119685fcc Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Fri, 30 Nov 2018 12:32:38 -0800
Subject: [PATCH 1763/1890] vfs: allow some remap flags to be passed to
 vfs_clone_file_range

In overlayfs, ovl_remap_file_range calls vfs_clone_file_range on the
lower filesystem's inode, passing through whatever remap flags it got
from its caller.  Since vfs_copy_file_range first tries a filesystem's
remap function with REMAP_FILE_CAN_SHORTEN, this can get passed through
to the second vfs_copy_file_range call, and this isn't an issue.
Change the WARN_ON to look only for the DEDUP flag.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/read_write.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/read_write.c b/fs/read_write.c
index 4dae0399c75a7..58f30537c47a0 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1956,7 +1956,7 @@ loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
 	struct inode *inode_out = file_inode(file_out);
 	loff_t ret;
 
-	WARN_ON_ONCE(remap_flags);
+	WARN_ON_ONCE(remap_flags & REMAP_FILE_DEDUP);
 
 	if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
 		return -EISDIR;
-- 
GitLab


From 17614445576b6af24e9cf36607c6448164719c96 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Fri, 30 Nov 2018 10:37:49 -0800
Subject: [PATCH 1764/1890] splice: don't read more than available pipe space

In commit 4721a601099, we tried to fix a problem wherein directio reads
into a splice pipe will bounce EFAULT/EAGAIN all the way out to
userspace by simulating a zero-byte short read.  This happens because
some directio read implementations (xfs) will call
bio_iov_iter_get_pages to grab pipe buffer pages and issue asynchronous
reads, but as soon as we run out of pipe buffers that _get_pages call
returns EFAULT, which the splice code translates to EAGAIN and bounces
out to userspace.

In that commit, the iomap code catches the EFAULT and simulates a
zero-byte read, but that causes assertion errors on regular splice reads
because xfs doesn't allow short directio reads.

The brokenness is compounded by splice_direct_to_actor immediately
bailing on do_splice_to returning <= 0 without ever calling ->actor
(which empties out the pipe), so if userspace calls back we'll EFAULT
again on the full pipe, and nothing ever gets copied.

Therefore, teach splice_direct_to_actor to clamp its requests to the
amount of free space in the pipe and remove the simulated short read
from the iomap directio code.

Fixes: 4721a601099 ("iomap: dio data corruption and spurious errors when pipes fill")
Reported-by: Murphy Zhou <jencce.kernel@gmail.com>
Ranted-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/splice.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/fs/splice.c b/fs/splice.c
index 3553f1956508d..de2ede048473c 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -945,11 +945,16 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
 	sd->flags &= ~SPLICE_F_NONBLOCK;
 	more = sd->flags & SPLICE_F_MORE;
 
+	WARN_ON_ONCE(pipe->nrbufs != 0);
+
 	while (len) {
 		size_t read_len;
 		loff_t pos = sd->pos, prev_pos = pos;
 
-		ret = do_splice_to(in, &pos, pipe, len, flags);
+		/* Don't try to read more the pipe has space for. */
+		read_len = min_t(size_t, len,
+				 (pipe->buffers - pipe->nrbufs) << PAGE_SHIFT);
+		ret = do_splice_to(in, &pos, pipe, read_len, flags);
 		if (unlikely(ret <= 0))
 			goto out_release;
 
-- 
GitLab


From 8f67b5adc030553fbc877124306f3f3bdab89aa8 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Sun, 2 Dec 2018 08:38:07 -0800
Subject: [PATCH 1765/1890] iomap: partially revert 4721a601099 (simulated
 directio short read on EFAULT)

In commit 4721a601099, we tried to fix a problem wherein directio reads
into a splice pipe will bounce EFAULT/EAGAIN all the way out to
userspace by simulating a zero-byte short read.  This happens because
some directio read implementations (xfs) will call
bio_iov_iter_get_pages to grab pipe buffer pages and issue asynchronous
reads, but as soon as we run out of pipe buffers that _get_pages call
returns EFAULT, which the splice code translates to EAGAIN and bounces
out to userspace.

In that commit, the iomap code catches the EFAULT and simulates a
zero-byte read, but that causes assertion errors on regular splice reads
because xfs doesn't allow short directio reads.  This causes infinite
splice() loops and assertion failures on generic/095 on overlayfs
because xfs only permit total success or total failure of a directio
operation.  The underlying issue in the pipe splice code has now been
fixed by changing the pipe splice loop to avoid avoid reading more data
than there is space in the pipe.

Therefore, it's no longer necessary to simulate the short directio, so
remove the hack from iomap.

Fixes: 4721a601099 ("iomap: dio data corruption and spurious errors when pipes fill")
Reported-by: Murphy Zhou <jencce.kernel@gmail.com>
Ranted-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/iomap.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/fs/iomap.c b/fs/iomap.c
index 3ffb776fbebe3..d6bc98ae8d350 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -1877,15 +1877,6 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 				dio->wait_for_completion = true;
 				ret = 0;
 			}
-
-			/*
-			 * Splicing to pipes can fail on a full pipe. We have to
-			 * swallow this to make it look like a short IO
-			 * otherwise the higher splice layers will completely
-			 * mishandle the error and stop moving data.
-			 */
-			if (ret == -EFAULT)
-				ret = 0;
 			break;
 		}
 		pos += ret;
-- 
GitLab


From 4944af670bc2c7da77ac17b295656d913e3898f4 Mon Sep 17 00:00:00 2001
From: wentalou <wentalou@amd.com>
Date: Mon, 3 Dec 2018 10:49:50 +0800
Subject: [PATCH 1766/1890] drm/amdgpu: enlarge maximum waiting time of KIQ
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

KIQ in VFâ€™s init delayed by another VFâ€™s reset,
which would cause late_init failed occasionally.
MAX_KIQ_REG_TRY enlarged from 20 to 80 would fix this issue.

Reviewed-by: Christian KÃ¶nig <christian.koenig@amd.com>
Signed-off-by: Wentao Lou <Wentao.Lou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 104b2e0d893bd..b0fc116296cb3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -233,7 +233,7 @@ enum amdgpu_kiq_irq {
 
 #define MAX_KIQ_REG_WAIT       5000 /* in usecs, 5ms */
 #define MAX_KIQ_REG_BAILOUT_INTERVAL   5 /* in msecs, 5ms */
-#define MAX_KIQ_REG_TRY 20
+#define MAX_KIQ_REG_TRY 80 /* 20 -> 80 */
 
 int amdgpu_device_ip_set_clockgating_state(void *dev,
 					   enum amd_ip_block_type block_type,
-- 
GitLab


From 1b3b27b2a1044457d7dbf1b13bfdbd9a0c1c1108 Mon Sep 17 00:00:00 2001
From: tianci yin <tianci.yin@amd.com>
Date: Tue, 4 Dec 2018 16:07:18 +0800
Subject: [PATCH 1767/1890] drm/amd/powerplay: improve OD code robustness

add protection code to avoid lower frequency trigger over drive.

Reviewed-by: Rex Zhu <Rex.Zhu@amd.com>
Signed-off-by: Tianci Yin <tianci.yin@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c   | 12 ++++++++----
 drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c | 12 ++++++++----
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
index 88f6b35ea6fee..b61a01f552840 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
@@ -3589,8 +3589,10 @@ static int smu7_find_dpm_states_clocks_in_dpm_table(struct pp_hwmgr *hwmgr, cons
 	}
 
 	if (i >= sclk_table->count) {
-		data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_SCLK;
-		sclk_table->dpm_levels[i-1].value = sclk;
+		if (sclk > sclk_table->dpm_levels[i-1].value) {
+			data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_SCLK;
+			sclk_table->dpm_levels[i-1].value = sclk;
+		}
 	} else {
 	/* TODO: Check SCLK in DAL's minimum clocks
 	 * in case DeepSleep divider update is required.
@@ -3607,8 +3609,10 @@ static int smu7_find_dpm_states_clocks_in_dpm_table(struct pp_hwmgr *hwmgr, cons
 	}
 
 	if (i >= mclk_table->count) {
-		data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_MCLK;
-		mclk_table->dpm_levels[i-1].value = mclk;
+		if (mclk > mclk_table->dpm_levels[i-1].value) {
+			data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_MCLK;
+			mclk_table->dpm_levels[i-1].value = mclk;
+		}
 	}
 
 	if (data->display_timing.num_existing_displays != hwmgr->display_config->num_display)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index e2bc6e0c229f9..79c86247d0ac0 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -3266,8 +3266,10 @@ static int vega10_find_dpm_states_clocks_in_dpm_table(struct pp_hwmgr *hwmgr, co
 	}
 
 	if (i >= sclk_table->count) {
-		data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_SCLK;
-		sclk_table->dpm_levels[i-1].value = sclk;
+		if (sclk > sclk_table->dpm_levels[i-1].value) {
+			data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_SCLK;
+			sclk_table->dpm_levels[i-1].value = sclk;
+		}
 	}
 
 	for (i = 0; i < mclk_table->count; i++) {
@@ -3276,8 +3278,10 @@ static int vega10_find_dpm_states_clocks_in_dpm_table(struct pp_hwmgr *hwmgr, co
 	}
 
 	if (i >= mclk_table->count) {
-		data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_MCLK;
-		mclk_table->dpm_levels[i-1].value = mclk;
+		if (mclk > mclk_table->dpm_levels[i-1].value) {
+			data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_MCLK;
+			mclk_table->dpm_levels[i-1].value = mclk;
+		}
 	}
 
 	if (data->display_timing.num_existing_displays != hwmgr->display_config->num_display)
-- 
GitLab


From 090afc1e40f411144e112431da279bafe7e734ff Mon Sep 17 00:00:00 2001
From: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Date: Fri, 20 Jul 2018 10:17:29 -0400
Subject: [PATCH 1768/1890] drm/amd/display: Fix overflow/truncation from
 strncpy.

[Why]

New GCC warnings for stringop-truncation and stringop-overflow help
catch common misuse of strncpy. This patch suppresses these warnings
by fixing bugs identified by them.

[How]

Since the parameter passed for name in amdpgu_dm_create_common_mode has
no fixed length, if the string is >= DRM_DISPLAY_MODE_LEN then
mode->name will not be null-terminated.

The truncation in fill_audio_info won't actually occur (and the string
will be null-terminated since the buffer is initialized to zero), but
the warning can be suppressed by using the proper buffer size.

This patch fixes both issues by using the real size for the buffer and
making use of strscpy (which always terminates).

Signed-off-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 33b605b259c21..5a6edf65c9eae 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -2554,9 +2554,9 @@ static void fill_audio_info(struct audio_info *audio_info,
 
 	cea_revision = drm_connector->display_info.cea_rev;
 
-	strncpy(audio_info->display_name,
+	strscpy(audio_info->display_name,
 		edid_caps->display_name,
-		AUDIO_INFO_DISPLAY_NAME_SIZE_IN_CHARS - 1);
+		AUDIO_INFO_DISPLAY_NAME_SIZE_IN_CHARS);
 
 	if (cea_revision >= 3) {
 		audio_info->mode_count = edid_caps->audio_mode_count;
@@ -3652,7 +3652,7 @@ amdgpu_dm_create_common_mode(struct drm_encoder *encoder,
 	mode->hdisplay = hdisplay;
 	mode->vdisplay = vdisplay;
 	mode->type &= ~DRM_MODE_TYPE_PREFERRED;
-	strncpy(mode->name, name, DRM_DISPLAY_MODE_LEN);
+	strscpy(mode->name, name, DRM_DISPLAY_MODE_LEN);
 
 	return mode;
 
-- 
GitLab


From a1208f6a822ac29933e772ef1f637c5d67838da9 Mon Sep 17 00:00:00 2001
From: Chris Cole <chris@sageembedded.com>
Date: Fri, 23 Nov 2018 12:20:45 +0100
Subject: [PATCH 1769/1890] ARM: 8814/1: mm: improve/fix ARM v7_dma_inv_range()
 unaligned address handling

This patch addresses possible memory corruption when
v7_dma_inv_range(start_address, end_address) address parameters are not
aligned to whole cache lines. This function issues "invalidate" cache
management operations to all cache lines from start_address (inclusive)
to end_address (exclusive). When start_address and/or end_address are
not aligned, the start and/or end cache lines are first issued "clean &
invalidate" operation. The assumption is this is done to ensure that any
dirty data addresses outside the address range (but part of the first or
last cache lines) are cleaned/flushed so that data is not lost, which
could happen if just an invalidate is issued.

The problem is that these first/last partial cache lines are issued
"clean & invalidate" and then "invalidate". This second "invalidate" is
not required and worse can cause "lost" writes to addresses outside the
address range but part of the cache line. If another component writes to
its part of the cache line between the "clean & invalidate" and
"invalidate" operations, the write can get lost. This fix is to remove
the extra "invalidate" operation when unaligned addressed are used.

A kernel module is available that has a stress test to reproduce the
issue and a unit test of the updated v7_dma_inv_range(). It can be
downloaded from
http://ftp.sageembedded.com/outgoing/linux/cache-test-20181107.tgz.

v7_dma_inv_range() is call by dmac_[un]map_area(addr, len, direction)
when the direction is DMA_FROM_DEVICE. One can (I believe) successfully
argue that DMA from a device to main memory should use buffers aligned
to cache line size, because the "clean & invalidate" might overwrite
data that the device just wrote using DMA. But if a driver does use
unaligned buffers, at least this fix will prevent memory corruption
outside the buffer.

Signed-off-by: Chris Cole <chris@sageembedded.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/mm/cache-v7.S | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
index 215df435bfb98..2149b47a0c5ac 100644
--- a/arch/arm/mm/cache-v7.S
+++ b/arch/arm/mm/cache-v7.S
@@ -360,14 +360,16 @@ v7_dma_inv_range:
 	ALT_UP(W(nop))
 #endif
 	mcrne	p15, 0, r0, c7, c14, 1		@ clean & invalidate D / U line
+	addne	r0, r0, r2
 
 	tst	r1, r3
 	bic	r1, r1, r3
 	mcrne	p15, 0, r1, c7, c14, 1		@ clean & invalidate D / U line
-1:
-	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D / U line
-	add	r0, r0, r2
 	cmp	r0, r1
+1:
+	mcrlo	p15, 0, r0, c7, c6, 1		@ invalidate D / U line
+	addlo	r0, r0, r2
+	cmplo	r0, r1
 	blo	1b
 	dsb	st
 	ret	lr
-- 
GitLab


From 3d0358d0ba048c5afb1385787aaec8fa5ad78fcc Mon Sep 17 00:00:00 2001
From: Vladimir Murzin <vladimir.murzin@arm.com>
Date: Fri, 23 Nov 2018 12:25:21 +0100
Subject: [PATCH 1770/1890] ARM: 8815/1: V7M: align v7m_dma_inv_range() with v7
 counterpart

Chris has discovered and reported that v7_dma_inv_range() may corrupt
memory if address range is not aligned to cache line size.

Since the whole cache-v7m.S was lifted form cache-v7.S the same
observation applies to v7m_dma_inv_range(). So the fix just mirrors
what has been done for v7 with a little specific of M-class.

Cc: Chris Cole <chris@sageembedded.com>
Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/mm/cache-v7m.S | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/arch/arm/mm/cache-v7m.S b/arch/arm/mm/cache-v7m.S
index 788486e830d3e..32aa2a2aa260c 100644
--- a/arch/arm/mm/cache-v7m.S
+++ b/arch/arm/mm/cache-v7m.S
@@ -73,9 +73,11 @@
 /*
  * dcimvac: Invalidate data cache line by MVA to PoC
  */
-.macro dcimvac, rt, tmp
-	v7m_cacheop \rt, \tmp, V7M_SCB_DCIMVAC
+.irp    c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo
+.macro dcimvac\c, rt, tmp
+	v7m_cacheop \rt, \tmp, V7M_SCB_DCIMVAC, \c
 .endm
+.endr
 
 /*
  * dccmvau: Clean data cache line by MVA to PoU
@@ -369,14 +371,16 @@ v7m_dma_inv_range:
 	tst	r0, r3
 	bic	r0, r0, r3
 	dccimvacne r0, r3
+	addne	r0, r0, r2
 	subne	r3, r2, #1	@ restore r3, corrupted by v7m's dccimvac
 	tst	r1, r3
 	bic	r1, r1, r3
 	dccimvacne r1, r3
-1:
-	dcimvac r0, r3
-	add	r0, r0, r2
 	cmp	r0, r1
+1:
+	dcimvaclo r0, r3
+	addlo	r0, r0, r2
+	cmplo	r0, r1
 	blo	1b
 	dsb	st
 	ret	lr
-- 
GitLab


From c2a3831df6dc164af66d8d86cf356a90c021b86f Mon Sep 17 00:00:00 2001
From: Nathan Jones <nathanj439@gmail.com>
Date: Tue, 4 Dec 2018 10:05:32 +0100
Subject: [PATCH 1771/1890] ARM: 8816/1: dma-mapping: fix potential
 uninitialized return

While trying to use the dma_mmap_*() interface, it was noticed that this
interface returns strange values when passed an incorrect length.

If neither of the if() statements fire then the return value is
uninitialized. In the worst case it returns 0 which means the caller
will think the function succeeded.

Fixes: 1655cf8829d8 ("ARM: dma-mapping: Remove traces of NOMMU code")
Signed-off-by: Nathan Jones <nathanj439@gmail.com>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Acked-by: Vladimir Murzin <vladimir.murzin@arm.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/mm/dma-mapping.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 661fe48ab78da..78de138aa66dc 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -829,7 +829,7 @@ static int __arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 		 void *cpu_addr, dma_addr_t dma_addr, size_t size,
 		 unsigned long attrs)
 {
-	int ret;
+	int ret = -ENXIO;
 	unsigned long nr_vma_pages = vma_pages(vma);
 	unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
 	unsigned long pfn = dma_to_pfn(dev, dma_addr);
-- 
GitLab


From ffe81d45322cc3cb140f0db080a4727ea284661e Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 4 Dec 2018 20:06:48 -0700
Subject: [PATCH 1772/1890] blk-mq: fix corruption with direct issue

If we attempt a direct issue to a SCSI device, and it returns BUSY, then
we queue the request up normally. However, the SCSI layer may have
already setup SG tables etc for this particular command. If we later
merge with this request, then the old tables are no longer valid. Once
we issue the IO, we only read/write the original part of the request,
not the new state of it.

This causes data corruption, and is most often noticed with the file
system complaining about the just read data being invalid:

[  235.934465] EXT4-fs error (device sda1): ext4_iget:4831: inode #7142: comm dpkg-query: bad extra_isize 24937 (inode size 256)

because most of it is garbage...

This doesn't happen from the normal issue path, as we will simply defer
the request to the hardware queue dispatch list if we fail. Once it's on
the dispatch list, we never merge with it.

Fix this from the direct issue path by flagging the request as
REQ_NOMERGE so we don't change the size of it before issue.

See also:
  https://bugzilla.kernel.org/show_bug.cgi?id=201685

Tested-by: Guenter Roeck <linux@roeck-us.net>
Fixes: 6ce3dd6eec1 ("blk-mq: issue directly if hw queue isn't busy in case of 'none'")
Cc: stable@vger.kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 3f91c6e5b17a9..3262d83b9e074 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1715,6 +1715,15 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
 		break;
 	case BLK_STS_RESOURCE:
 	case BLK_STS_DEV_RESOURCE:
+		/*
+		 * If direct dispatch fails, we cannot allow any merging on
+		 * this IO. Drivers (like SCSI) may have set up permanent state
+		 * for this request, like SG tables and mappings, and if we
+		 * merge to it later on then we'll still only do IO to the
+		 * original part.
+		 */
+		rq->cmd_flags |= REQ_NOMERGE;
+
 		blk_mq_update_dispatch_busy(hctx, true);
 		__blk_mq_requeue_request(rq);
 		break;
@@ -1727,6 +1736,18 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
 	return ret;
 }
 
+/*
+ * Don't allow direct dispatch of anything but regular reads/writes,
+ * as some of the other commands can potentially share request space
+ * with data we need for the IO scheduler. If we attempt a direct dispatch
+ * on those and fail, we can't safely add it to the scheduler afterwards
+ * without potentially overwriting data that the driver has already written.
+ */
+static bool blk_rq_can_direct_dispatch(struct request *rq)
+{
+	return req_op(rq) == REQ_OP_READ || req_op(rq) == REQ_OP_WRITE;
+}
+
 static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
 						struct request *rq,
 						blk_qc_t *cookie,
@@ -1748,7 +1769,7 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
 		goto insert;
 	}
 
-	if (q->elevator && !bypass_insert)
+	if (!blk_rq_can_direct_dispatch(rq) || (q->elevator && !bypass_insert))
 		goto insert;
 
 	if (!blk_mq_get_dispatch_budget(hctx))
@@ -1810,6 +1831,9 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
 		struct request *rq = list_first_entry(list, struct request,
 				queuelist);
 
+		if (!blk_rq_can_direct_dispatch(rq))
+			break;
+
 		list_del_init(&rq->queuelist);
 		ret = blk_mq_request_issue_directly(rq);
 		if (ret != BLK_STS_OK) {
-- 
GitLab


From c3b9ab5db11d8098ca7674175f12ab21cdce1bbb Mon Sep 17 00:00:00 2001
From: Fabio Estevam <festevam@gmail.com>
Date: Fri, 30 Nov 2018 08:31:29 -0200
Subject: [PATCH 1773/1890] ARM: dts: imx7d-pico: Describe the Wifi clock

The Wifi chip should be clocked by a 32kHz clock coming from i.MX7D
CLKO2 output pin, so describe the pinmux and clock hierarchy in the
device tree to allow the Wifi chip to be properly clocked.

Managed to successfully test Wifi with such change. Used the standard
nvram.txt file provided by TechNexion, which selects an external 32kHz
clock for the Wifi chip by default.

Fixes: 99a52450c707 ("ARM: dts: imx7d-pico: Add Wifi support")
Suggested-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Tested-by: Otavio Salvador <otavio@ossystems.com.br>
Signed-off-by: Fabio Estevam <festevam@gmail.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm/boot/dts/imx7d-pico.dtsi | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/imx7d-pico.dtsi b/arch/arm/boot/dts/imx7d-pico.dtsi
index 21973eb556719..f27b3849d3ff3 100644
--- a/arch/arm/boot/dts/imx7d-pico.dtsi
+++ b/arch/arm/boot/dts/imx7d-pico.dtsi
@@ -100,6 +100,19 @@ reg_vref_1v8: regulator-vref-1v8 {
 		regulator-min-microvolt = <1800000>;
 		regulator-max-microvolt = <1800000>;
 	};
+
+	usdhc2_pwrseq: usdhc2_pwrseq {
+		compatible = "mmc-pwrseq-simple";
+		clocks = <&clks IMX7D_CLKO2_ROOT_DIV>;
+		clock-names = "ext_clock";
+	};
+};
+
+&clks {
+	assigned-clocks = <&clks IMX7D_CLKO2_ROOT_SRC>,
+			  <&clks IMX7D_CLKO2_ROOT_DIV>;
+	assigned-clock-parents = <&clks IMX7D_CKIL>;
+	assigned-clock-rates = <0>, <32768>;
 };
 
 &i2c4 {
@@ -199,12 +212,13 @@ vgen6_reg: vldo4 {
 
 &usdhc2 { /* Wifi SDIO */
 	pinctrl-names = "default";
-	pinctrl-0 = <&pinctrl_usdhc2>;
+	pinctrl-0 = <&pinctrl_usdhc2 &pinctrl_wifi_clk>;
 	no-1-8-v;
 	non-removable;
 	keep-power-in-suspend;
 	wakeup-source;
 	vmmc-supply = <&reg_ap6212>;
+	mmc-pwrseq = <&usdhc2_pwrseq>;
 	status = "okay";
 };
 
@@ -301,6 +315,12 @@ MX7D_PAD_SD3_DATA7__SD3_DATA7		0x5b
 };
 
 &iomuxc_lpsr {
+	pinctrl_wifi_clk: wificlkgrp {
+		fsl,pins = <
+			MX7D_PAD_LPSR_GPIO1_IO03__CCM_CLKO2	0x7d
+		>;
+	};
+
 	pinctrl_wdog: wdoggrp {
 		fsl,pins = <
 			MX7D_PAD_LPSR_GPIO1_IO00__WDOG1_WDOG_B	0x74
-- 
GitLab


From 688838934c231bb08f46db687e57f6d8bf82709c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 4 Dec 2018 09:40:35 -0800
Subject: [PATCH 1774/1890] rtnetlink: ndo_dflt_fdb_dump() only work for
 ARPHRD_ETHER devices

kmsan was able to trigger a kernel-infoleak using a gre device [1]

nlmsg_populate_fdb_fill() has a hard coded assumption
that dev->addr_len is ETH_ALEN, as normally guaranteed
for ARPHRD_ETHER devices.

A similar issue was fixed recently in commit da71577545a5
("rtnetlink: Disallow FDB configuration for non-Ethernet device")

[1]
BUG: KMSAN: kernel-infoleak in copyout lib/iov_iter.c:143 [inline]
BUG: KMSAN: kernel-infoleak in _copy_to_iter+0x4c0/0x2700 lib/iov_iter.c:576
CPU: 0 PID: 6697 Comm: syz-executor310 Not tainted 4.20.0-rc3+ #95
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x32d/0x480 lib/dump_stack.c:113
 kmsan_report+0x12c/0x290 mm/kmsan/kmsan.c:683
 kmsan_internal_check_memory+0x32a/0xa50 mm/kmsan/kmsan.c:743
 kmsan_copy_to_user+0x78/0xd0 mm/kmsan/kmsan_hooks.c:634
 copyout lib/iov_iter.c:143 [inline]
 _copy_to_iter+0x4c0/0x2700 lib/iov_iter.c:576
 copy_to_iter include/linux/uio.h:143 [inline]
 skb_copy_datagram_iter+0x4e2/0x1070 net/core/datagram.c:431
 skb_copy_datagram_msg include/linux/skbuff.h:3316 [inline]
 netlink_recvmsg+0x6f9/0x19d0 net/netlink/af_netlink.c:1975
 sock_recvmsg_nosec net/socket.c:794 [inline]
 sock_recvmsg+0x1d1/0x230 net/socket.c:801
 ___sys_recvmsg+0x444/0xae0 net/socket.c:2278
 __sys_recvmsg net/socket.c:2327 [inline]
 __do_sys_recvmsg net/socket.c:2337 [inline]
 __se_sys_recvmsg+0x2fa/0x450 net/socket.c:2334
 __x64_sys_recvmsg+0x4a/0x70 net/socket.c:2334
 do_syscall_64+0xcf/0x110 arch/x86/entry/common.c:291
 entry_SYSCALL_64_after_hwframe+0x63/0xe7
RIP: 0033:0x441119
Code: 18 89 d0 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 db 0a fc ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007fffc7f008a8 EFLAGS: 00000207 ORIG_RAX: 000000000000002f
RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 0000000000441119
RDX: 0000000000000040 RSI: 00000000200005c0 RDI: 0000000000000003
RBP: 00000000006cc018 R08: 0000000000000100 R09: 0000000000000100
R10: 0000000000000100 R11: 0000000000000207 R12: 0000000000402080
R13: 0000000000402110 R14: 0000000000000000 R15: 0000000000000000

Uninit was stored to memory at:
 kmsan_save_stack_with_flags mm/kmsan/kmsan.c:246 [inline]
 kmsan_save_stack mm/kmsan/kmsan.c:261 [inline]
 kmsan_internal_chain_origin+0x13d/0x240 mm/kmsan/kmsan.c:469
 kmsan_memcpy_memmove_metadata+0x1a9/0xf70 mm/kmsan/kmsan.c:344
 kmsan_memcpy_metadata+0xb/0x10 mm/kmsan/kmsan.c:362
 __msan_memcpy+0x61/0x70 mm/kmsan/kmsan_instr.c:162
 __nla_put lib/nlattr.c:744 [inline]
 nla_put+0x20a/0x2d0 lib/nlattr.c:802
 nlmsg_populate_fdb_fill+0x444/0x810 net/core/rtnetlink.c:3466
 nlmsg_populate_fdb net/core/rtnetlink.c:3775 [inline]
 ndo_dflt_fdb_dump+0x73a/0x960 net/core/rtnetlink.c:3807
 rtnl_fdb_dump+0x1318/0x1cb0 net/core/rtnetlink.c:3979
 netlink_dump+0xc79/0x1c90 net/netlink/af_netlink.c:2244
 __netlink_dump_start+0x10c4/0x11d0 net/netlink/af_netlink.c:2352
 netlink_dump_start include/linux/netlink.h:216 [inline]
 rtnetlink_rcv_msg+0x141b/0x1540 net/core/rtnetlink.c:4910
 netlink_rcv_skb+0x394/0x640 net/netlink/af_netlink.c:2477
 rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:4965
 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline]
 netlink_unicast+0x1699/0x1740 net/netlink/af_netlink.c:1336
 netlink_sendmsg+0x13c7/0x1440 net/netlink/af_netlink.c:1917
 sock_sendmsg_nosec net/socket.c:621 [inline]
 sock_sendmsg net/socket.c:631 [inline]
 ___sys_sendmsg+0xe3b/0x1240 net/socket.c:2116
 __sys_sendmsg net/socket.c:2154 [inline]
 __do_sys_sendmsg net/socket.c:2163 [inline]
 __se_sys_sendmsg+0x305/0x460 net/socket.c:2161
 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2161
 do_syscall_64+0xcf/0x110 arch/x86/entry/common.c:291
 entry_SYSCALL_64_after_hwframe+0x63/0xe7

Uninit was created at:
 kmsan_save_stack_with_flags mm/kmsan/kmsan.c:246 [inline]
 kmsan_internal_poison_shadow+0x6d/0x130 mm/kmsan/kmsan.c:170
 kmsan_kmalloc+0xa1/0x100 mm/kmsan/kmsan_hooks.c:186
 __kmalloc+0x14c/0x4d0 mm/slub.c:3825
 kmalloc include/linux/slab.h:551 [inline]
 __hw_addr_create_ex net/core/dev_addr_lists.c:34 [inline]
 __hw_addr_add_ex net/core/dev_addr_lists.c:80 [inline]
 __dev_mc_add+0x357/0x8a0 net/core/dev_addr_lists.c:670
 dev_mc_add+0x6d/0x80 net/core/dev_addr_lists.c:687
 ip_mc_filter_add net/ipv4/igmp.c:1128 [inline]
 igmp_group_added+0x4d4/0xb80 net/ipv4/igmp.c:1311
 __ip_mc_inc_group+0xea9/0xf70 net/ipv4/igmp.c:1444
 ip_mc_inc_group net/ipv4/igmp.c:1453 [inline]
 ip_mc_up+0x1c3/0x400 net/ipv4/igmp.c:1775
 inetdev_event+0x1d03/0x1d80 net/ipv4/devinet.c:1522
 notifier_call_chain kernel/notifier.c:93 [inline]
 __raw_notifier_call_chain kernel/notifier.c:394 [inline]
 raw_notifier_call_chain+0x13d/0x240 kernel/notifier.c:401
 __dev_notify_flags+0x3da/0x860 net/core/dev.c:1733
 dev_change_flags+0x1ac/0x230 net/core/dev.c:7569
 do_setlink+0x165f/0x5ea0 net/core/rtnetlink.c:2492
 rtnl_newlink+0x2ad7/0x35a0 net/core/rtnetlink.c:3111
 rtnetlink_rcv_msg+0x1148/0x1540 net/core/rtnetlink.c:4947
 netlink_rcv_skb+0x394/0x640 net/netlink/af_netlink.c:2477
 rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:4965
 netlink_unicast_kernel net/netlink/af_netlink.c:1310 [inline]
 netlink_unicast+0x1699/0x1740 net/netlink/af_netlink.c:1336
 netlink_sendmsg+0x13c7/0x1440 net/netlink/af_netlink.c:1917
 sock_sendmsg_nosec net/socket.c:621 [inline]
 sock_sendmsg net/socket.c:631 [inline]
 ___sys_sendmsg+0xe3b/0x1240 net/socket.c:2116
 __sys_sendmsg net/socket.c:2154 [inline]
 __do_sys_sendmsg net/socket.c:2163 [inline]
 __se_sys_sendmsg+0x305/0x460 net/socket.c:2161
 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2161
 do_syscall_64+0xcf/0x110 arch/x86/entry/common.c:291
 entry_SYSCALL_64_after_hwframe+0x63/0xe7

Bytes 36-37 of 105 are uninitialized
Memory access of size 105 starts at ffff88819686c000
Data copied to user address 0000000020000380

Fixes: d83b06036048 ("net: add fdb generic dump routine")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Ido Schimmel <idosch@mellanox.com>
Cc: David Ahern <dsahern@gmail.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 33d9227a8b807..7819f7804eeb8 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3800,6 +3800,9 @@ int ndo_dflt_fdb_dump(struct sk_buff *skb,
 {
 	int err;
 
+	if (dev->type != ARPHRD_ETHER)
+		return -EINVAL;
+
 	netif_addr_lock_bh(dev);
 	err = nlmsg_populate_fdb(skb, cb, dev, idx, &dev->uc);
 	if (err)
-- 
GitLab


From ecb239d96d369c23c33d41708646df646de669f4 Mon Sep 17 00:00:00 2001
From: Nicolas Saenz Julienne <nsaenzjulienne@suse.de>
Date: Mon, 3 Dec 2018 13:21:01 +0100
Subject: [PATCH 1775/1890] ethernet: fman: fix wrong of_node_put() in probe
 function

After getting a reference to the platform device's of_node the probe
function ends up calling of_find_matching_node() using the node as an
argument. The function takes care of decreasing the refcount on it. We
are then incorrectly decreasing the refcount on that node again.

This patch removes the unwarranted call to of_node_put().

Fixes: 414fd46e7762 ("fsl/fman: Add FMan support")
Signed-off-by: Nicolas Saenz Julienne <nsaenzjulienne@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/fman/fman.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/freescale/fman/fman.c b/drivers/net/ethernet/freescale/fman/fman.c
index c415ac67cb7be..e80fedb27cee8 100644
--- a/drivers/net/ethernet/freescale/fman/fman.c
+++ b/drivers/net/ethernet/freescale/fman/fman.c
@@ -2786,7 +2786,7 @@ static struct fman *read_dts_node(struct platform_device *of_dev)
 	if (!muram_node) {
 		dev_err(&of_dev->dev, "%s: could not find MURAM node\n",
 			__func__);
-		goto fman_node_put;
+		goto fman_free;
 	}
 
 	err = of_address_to_resource(muram_node, 0,
@@ -2795,11 +2795,10 @@ static struct fman *read_dts_node(struct platform_device *of_dev)
 		of_node_put(muram_node);
 		dev_err(&of_dev->dev, "%s: of_address_to_resource() = %d\n",
 			__func__, err);
-		goto fman_node_put;
+		goto fman_free;
 	}
 
 	of_node_put(muram_node);
-	of_node_put(fm_node);
 
 	err = devm_request_irq(&of_dev->dev, irq, fman_irq, IRQF_SHARED,
 			       "fman", fman);
-- 
GitLab


From 70bb27b79adf63ea39e37371d09c823c7a8f93ce Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Fri, 9 Nov 2018 16:44:14 +0000
Subject: [PATCH 1776/1890] thermal: armada: fix legacy validity test sense

Commit 8c0e64ac4075 ("thermal: armada: get rid of the ->is_valid()
pointer") removed the unnecessary indirection through a function
pointer, but in doing so, also removed the negation operator too:

-       if (priv->data->is_valid && !priv->data->is_valid(priv)) {
+       if (armada_is_valid(priv)) {

which results in:

armada_thermal f06f808c.thermal: Temperature sensor reading not valid
armada_thermal f2400078.thermal: Temperature sensor reading not valid
armada_thermal f4400078.thermal: Temperature sensor reading not valid

at boot, or whenever the "temp" sysfs file is read.  Replace the
negation operator.

Fixes: 8c0e64ac4075 ("thermal: armada: get rid of the ->is_valid() pointer")
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/armada_thermal.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/thermal/armada_thermal.c b/drivers/thermal/armada_thermal.c
index 92f67d40f2e96..1c9830b2c84da 100644
--- a/drivers/thermal/armada_thermal.c
+++ b/drivers/thermal/armada_thermal.c
@@ -357,7 +357,7 @@ static int armada_get_temp_legacy(struct thermal_zone_device *thermal,
 	int ret;
 
 	/* Valid check */
-	if (armada_is_valid(priv)) {
+	if (!armada_is_valid(priv)) {
 		dev_err(priv->dev,
 			"Temperature sensor reading not valid\n");
 		return -EIO;
-- 
GitLab


From 01b3fd5ac97caffb8e5d5bd85086da33db3b361f Mon Sep 17 00:00:00 2001
From: Baruch Siach <baruch@tkos.co.il>
Date: Tue, 4 Dec 2018 16:03:52 +0200
Subject: [PATCH 1777/1890] net: mvpp2: fix detection of 10G SFP modules

The mvpp2_phylink_validate() relies on the interface field of
phylink_link_state to determine valid link modes. However, when called
from phylink_sfp_module_insert() this field in not initialized. The
default switch case then excludes 10G link modes. This allows 10G SFP
modules that are detected correctly to be configured at max rate of
2.5G.

Catch the uninitialized PHY mode case, and allow 10G rates.

Fixes: d97c9f4ab000b ("net: mvpp2: 1000baseX support")
Cc: Maxime Chevallier <maxime.chevallier@bootlin.com>
Cc: Antoine Tenart <antoine.tenart@bootlin.com>
Acked-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 7a37a37e3fb34..eb1dc8abc359c 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -4384,6 +4384,7 @@ static void mvpp2_phylink_validate(struct net_device *dev,
 
 	switch (state->interface) {
 	case PHY_INTERFACE_MODE_10GKR:
+	case PHY_INTERFACE_MODE_NA:
 		phylink_set(mask, 10000baseCR_Full);
 		phylink_set(mask, 10000baseSR_Full);
 		phylink_set(mask, 10000baseLR_Full);
-- 
GitLab


From 0fb628f0f250c74b1023edd0ca4a57c8b35b9b2c Mon Sep 17 00:00:00 2001
From: Baruch Siach <baruch@tkos.co.il>
Date: Tue, 4 Dec 2018 16:03:53 +0200
Subject: [PATCH 1778/1890] net: mvpp2: fix phylink handling of invalid PHY
 modes

The .validate phylink callback should empty the supported bitmap when
the interface mode is invalid.

Cc: Maxime Chevallier <maxime.chevallier@bootlin.com>
Cc: Antoine Tenart <antoine.tenart@bootlin.com>
Reported-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/marvell/mvpp2/mvpp2_main.c   | 33 ++++++++++++++++++-
 1 file changed, 32 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index eb1dc8abc359c..125ea99418df6 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -4375,8 +4375,27 @@ static void mvpp2_phylink_validate(struct net_device *dev,
 				   unsigned long *supported,
 				   struct phylink_link_state *state)
 {
+	struct mvpp2_port *port = netdev_priv(dev);
 	__ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
 
+	/* Invalid combinations */
+	switch (state->interface) {
+	case PHY_INTERFACE_MODE_10GKR:
+	case PHY_INTERFACE_MODE_XAUI:
+		if (port->gop_id != 0)
+			goto empty_set;
+		break;
+	case PHY_INTERFACE_MODE_RGMII:
+	case PHY_INTERFACE_MODE_RGMII_ID:
+	case PHY_INTERFACE_MODE_RGMII_RXID:
+	case PHY_INTERFACE_MODE_RGMII_TXID:
+		if (port->gop_id == 0)
+			goto empty_set;
+		break;
+	default:
+		break;
+	}
+
 	phylink_set(mask, Autoneg);
 	phylink_set_port_modes(mask);
 	phylink_set(mask, Pause);
@@ -4384,6 +4403,7 @@ static void mvpp2_phylink_validate(struct net_device *dev,
 
 	switch (state->interface) {
 	case PHY_INTERFACE_MODE_10GKR:
+	case PHY_INTERFACE_MODE_XAUI:
 	case PHY_INTERFACE_MODE_NA:
 		phylink_set(mask, 10000baseCR_Full);
 		phylink_set(mask, 10000baseSR_Full);
@@ -4392,7 +4412,11 @@ static void mvpp2_phylink_validate(struct net_device *dev,
 		phylink_set(mask, 10000baseER_Full);
 		phylink_set(mask, 10000baseKR_Full);
 		/* Fall-through */
-	default:
+	case PHY_INTERFACE_MODE_RGMII:
+	case PHY_INTERFACE_MODE_RGMII_ID:
+	case PHY_INTERFACE_MODE_RGMII_RXID:
+	case PHY_INTERFACE_MODE_RGMII_TXID:
+	case PHY_INTERFACE_MODE_SGMII:
 		phylink_set(mask, 10baseT_Half);
 		phylink_set(mask, 10baseT_Full);
 		phylink_set(mask, 100baseT_Half);
@@ -4404,11 +4428,18 @@ static void mvpp2_phylink_validate(struct net_device *dev,
 		phylink_set(mask, 1000baseT_Full);
 		phylink_set(mask, 1000baseX_Full);
 		phylink_set(mask, 2500baseX_Full);
+		break;
+	default:
+		goto empty_set;
 	}
 
 	bitmap_and(supported, supported, mask, __ETHTOOL_LINK_MODE_MASK_NBITS);
 	bitmap_and(state->advertising, state->advertising, mask,
 		   __ETHTOOL_LINK_MODE_MASK_NBITS);
+	return;
+
+empty_set:
+	bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS);
 }
 
 static void mvpp22_xlg_link_state(struct mvpp2_port *port,
-- 
GitLab


From dc6946cbef38f10b9617250fa851e3cc550d9602 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Fri, 9 Nov 2018 17:01:05 +0000
Subject: [PATCH 1779/1890] thermal: armada: fix legacy resource fixup

When the armada thermal module is inserted, removed and then reinserted,
the system panics as per the messages below.  The reason is that "edit"
a live resource in the resource tree twice, and end up with it pointing
to some other hardware.

Editing live resources (resources that are part of the registered
resource tree) is not permissible - the resource tree is an ordered
set of resources, sorted by start address, and when a new resource is
inserted, it is validated that it (a) fits within its parent resource
and (b) does not overlap a neighbouring resource.

Get rid of this resource editing.  We can instead adjust the return
value from ioremap() as ioremap() deals with the creation of page-
based mappings - provided the adjustment does not cross a page
boundary.

SError Interrupt on CPU1, code 0xbf000000 -- SError
CPU: 1 PID: 2749 Comm: modprobe Not tainted 4.19.0+ #175
Hardware name: Marvell 8040 MACCHIATOBin Double shot (DT)
pstate: 20400085 (nzCv daIf +PAN -UAO)
pc : regmap_mmio_read+0x3c/0x60
lr : regmap_mmio_read+0x3c/0x60
sp : ffffff800d453900
x29: ffffff800d453900 x28: ffffff800096a1d0
x27: 0000000000000100 x26: ffffff80009696d8
x25: ffffff8000969000 x24: ffffffc13a588918
x23: ffffffc13a9a28a8 x22: ffffff800d4539dc
x21: 0000000000000084 x20: ffffff800d4539dc
x19: ffffffc13a5d5480 x18: 0000000000000000
x17: 0000000000000000 x16: 0000000000000000
x15: 0000000000000000 x14: 0000000000000000
x13: 0000000000000000 x12: 0000000000000030
x11: 0101010101010101 x10: 7f7f7f7f7f7f7f7f
x9 : 0000000000000000 x8 : ffffffc13a5d5a80
x7 : 0000000000000000 x6 : 000000000000003f
x5 : 0000000000000000 x4 : 0000000000000000
x3 : ffffff800851be70 x2 : ffffff800851bd60
x1 : ffffff800d492ff8 x0 : 0000000000000000
Kernel panic - not syncing: Asynchronous SError Interrupt
CPU: 1 PID: 2749 Comm: modprobe Not tainted 4.19.0+ #175
Hardware name: Marvell 8040 MACCHIATOBin Double shot (DT)
Call trace:
 dump_backtrace+0x0/0x158
 show_stack+0x14/0x1c
 dump_stack+0x90/0xb0
 panic+0x128/0x298
 print_tainted+0x0/0xa8
 arm64_serror_panic+0x74/0x80
 do_serror+0x5c/0xb8
 el1_error+0xb4/0x144
 regmap_mmio_read+0x3c/0x60
 _regmap_bus_reg_read+0x18/0x20
 _regmap_read+0x64/0x180
 regmap_read+0x44/0x6c
 armada_ap806_init+0x24/0x5c [armada_thermal]
 armada_thermal_probe+0x2c8/0x37c [armada_thermal]
 platform_drv_probe+0x4c/0xb0
 really_probe+0x21c/0x2b4
 driver_probe_device+0x58/0xfc
 __driver_attach+0xd4/0xd8
 bus_for_each_dev+0x50/0xa0
 driver_attach+0x20/0x28
 bus_add_driver+0x1c4/0x228
 driver_register+0x6c/0x124
 __platform_driver_register+0x4c/0x54
 armada_thermal_driver_init+0x20/0x1000 [armada_thermal]
 do_one_initcall+0x30/0x204
 do_init_module+0x5c/0x1d4
 load_module+0x1a88/0x212c
 __se_sys_finit_module+0xa0/0xac
 __arm64_sys_finit_module+0x1c/0x24
 el0_svc_common+0x94/0xf0
 el0_svc_handler+0x24/0x80
 el0_svc+0x8/0x3c0
SMP: stopping secondary CPUs
Kernel Offset: disabled
CPU features: 0x0,21806000
Memory Limit: none

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Tested-by: Miquel Raynal <miquel.raynal@bootlin.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/armada_thermal.c | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/drivers/thermal/armada_thermal.c b/drivers/thermal/armada_thermal.c
index 1c9830b2c84da..a6d3ee6077f8b 100644
--- a/drivers/thermal/armada_thermal.c
+++ b/drivers/thermal/armada_thermal.c
@@ -526,23 +526,21 @@ static int armada_thermal_probe_legacy(struct platform_device *pdev,
 
 	/* First memory region points towards the status register */
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res)
-		return -EIO;
-
-	/*
-	 * Edit the resource start address and length to map over all the
-	 * registers, instead of pointing at them one by one.
-	 */
-	res->start -= data->syscon_status_off;
-	res->end = res->start + max(data->syscon_status_off,
-				    max(data->syscon_control0_off,
-					data->syscon_control1_off)) +
-		   sizeof(unsigned int) - 1;
-
 	base = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(base))
 		return PTR_ERR(base);
 
+	/*
+	 * Fix up from the old individual DT register specification to
+	 * cover all the registers.  We do this by adjusting the ioremap()
+	 * result, which should be fine as ioremap() deals with pages.
+	 * However, validate that we do not cross a page boundary while
+	 * making this adjustment.
+	 */
+	if (((unsigned long)base & ~PAGE_MASK) < data->syscon_status_off)
+		return -EINVAL;
+	base -= data->syscon_status_off;
+
 	priv->syscon = devm_regmap_init_mmio(&pdev->dev, base,
 					     &armada_thermal_regmap_config);
 	if (IS_ERR(priv->syscon))
-- 
GitLab


From 27359fd6e5f3c5db8fe544b63238b6170e8806d8 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Fri, 30 Nov 2018 11:05:06 -0500
Subject: [PATCH 1780/1890] dax: Fix unlock mismatch with updated API

Internal to dax_unlock_mapping_entry(), dax_unlock_entry() is used to
store a replacement entry in the Xarray at the given xas-index with the
DAX_LOCKED bit clear. When called, dax_unlock_entry() expects the unlocked
value of the entry relative to the current Xarray state to be specified.

In most contexts dax_unlock_entry() is operating in the same scope as
the matched dax_lock_entry(). However, in the dax_unlock_mapping_entry()
case the implementation needs to recall the original entry. In the case
where the original entry is a 'pmd' entry it is possible that the pfn
performed to do the lookup is misaligned to the value retrieved in the
Xarray.

Change the api to return the unlock cookie from dax_lock_page() and pass
it to dax_unlock_page(). This fixes a bug where dax_unlock_page() was
assuming that the page was PMD-aligned if the entry was a PMD entry with
signatures like:

 WARNING: CPU: 38 PID: 1396 at fs/dax.c:340 dax_insert_entry+0x2b2/0x2d0
 RIP: 0010:dax_insert_entry+0x2b2/0x2d0
 [..]
 Call Trace:
  dax_iomap_pte_fault.isra.41+0x791/0xde0
  ext4_dax_huge_fault+0x16f/0x1f0
  ? up_read+0x1c/0xa0
  __do_fault+0x1f/0x160
  __handle_mm_fault+0x1033/0x1490
  handle_mm_fault+0x18b/0x3d0

Link: https://lkml.kernel.org/r/20181130154902.GL10377@bombadil.infradead.org
Fixes: 9f32d221301c ("dax: Convert dax_lock_mapping_entry to XArray")
Reported-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Matthew Wilcox <willy@infradead.org>
Tested-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 fs/dax.c            | 21 ++++++++-------------
 include/linux/dax.h | 14 ++++++++------
 mm/memory-failure.c |  6 ++++--
 3 files changed, 20 insertions(+), 21 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 3f592dc18d67c..48132eca3761d 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -379,20 +379,20 @@ static struct page *dax_busy_page(void *entry)
  * @page: The page whose entry we want to lock
  *
  * Context: Process context.
- * Return: %true if the entry was locked or does not need to be locked.
+ * Return: A cookie to pass to dax_unlock_page() or 0 if the entry could
+ * not be locked.
  */
-bool dax_lock_mapping_entry(struct page *page)
+dax_entry_t dax_lock_page(struct page *page)
 {
 	XA_STATE(xas, NULL, 0);
 	void *entry;
-	bool locked;
 
 	/* Ensure page->mapping isn't freed while we look at it */
 	rcu_read_lock();
 	for (;;) {
 		struct address_space *mapping = READ_ONCE(page->mapping);
 
-		locked = false;
+		entry = NULL;
 		if (!mapping || !dax_mapping(mapping))
 			break;
 
@@ -403,7 +403,7 @@ bool dax_lock_mapping_entry(struct page *page)
 		 * otherwise we would not have a valid pfn_to_page()
 		 * translation.
 		 */
-		locked = true;
+		entry = (void *)~0UL;
 		if (S_ISCHR(mapping->host->i_mode))
 			break;
 
@@ -426,23 +426,18 @@ bool dax_lock_mapping_entry(struct page *page)
 		break;
 	}
 	rcu_read_unlock();
-	return locked;
+	return (dax_entry_t)entry;
 }
 
-void dax_unlock_mapping_entry(struct page *page)
+void dax_unlock_page(struct page *page, dax_entry_t cookie)
 {
 	struct address_space *mapping = page->mapping;
 	XA_STATE(xas, &mapping->i_pages, page->index);
-	void *entry;
 
 	if (S_ISCHR(mapping->host->i_mode))
 		return;
 
-	rcu_read_lock();
-	entry = xas_load(&xas);
-	rcu_read_unlock();
-	entry = dax_make_entry(page_to_pfn_t(page), dax_is_pmd_entry(entry));
-	dax_unlock_entry(&xas, entry);
+	dax_unlock_entry(&xas, (void *)cookie);
 }
 
 /*
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 450b28db95331..0dd316a74a295 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -7,6 +7,8 @@
 #include <linux/radix-tree.h>
 #include <asm/pgtable.h>
 
+typedef unsigned long dax_entry_t;
+
 struct iomap_ops;
 struct dax_device;
 struct dax_operations {
@@ -88,8 +90,8 @@ int dax_writeback_mapping_range(struct address_space *mapping,
 		struct block_device *bdev, struct writeback_control *wbc);
 
 struct page *dax_layout_busy_page(struct address_space *mapping);
-bool dax_lock_mapping_entry(struct page *page);
-void dax_unlock_mapping_entry(struct page *page);
+dax_entry_t dax_lock_page(struct page *page);
+void dax_unlock_page(struct page *page, dax_entry_t cookie);
 #else
 static inline bool bdev_dax_supported(struct block_device *bdev,
 		int blocksize)
@@ -122,14 +124,14 @@ static inline int dax_writeback_mapping_range(struct address_space *mapping,
 	return -EOPNOTSUPP;
 }
 
-static inline bool dax_lock_mapping_entry(struct page *page)
+static inline dax_entry_t dax_lock_page(struct page *page)
 {
 	if (IS_DAX(page->mapping->host))
-		return true;
-	return false;
+		return ~0UL;
+	return 0;
 }
 
-static inline void dax_unlock_mapping_entry(struct page *page)
+static inline void dax_unlock_page(struct page *page, dax_entry_t cookie)
 {
 }
 #endif
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 0cd3de3550f08..7c72f2a95785e 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1161,6 +1161,7 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
 	LIST_HEAD(tokill);
 	int rc = -EBUSY;
 	loff_t start;
+	dax_entry_t cookie;
 
 	/*
 	 * Prevent the inode from being freed while we are interrogating
@@ -1169,7 +1170,8 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
 	 * also prevents changes to the mapping of this pfn until
 	 * poison signaling is complete.
 	 */
-	if (!dax_lock_mapping_entry(page))
+	cookie = dax_lock_page(page);
+	if (!cookie)
 		goto out;
 
 	if (hwpoison_filter(page)) {
@@ -1220,7 +1222,7 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
 	kill_procs(&tokill, flags & MF_MUST_KILL, !unmap_success, pfn, flags);
 	rc = 0;
 unlock:
-	dax_unlock_mapping_entry(page);
+	dax_unlock_page(page, cookie);
 out:
 	/* drop pgmap ref acquired in caller */
 	put_dev_pagemap(pgmap);
-- 
GitLab


From 6085c13351ebab4a427215082d11e2f02bd3fb46 Mon Sep 17 00:00:00 2001
From: Stefan Wahren <stefan.wahren@i2se.com>
Date: Sat, 10 Nov 2018 17:55:27 +0100
Subject: [PATCH 1781/1890] thermal: bcm2835: Switch to SPDX identifier

Adopt the SPDX license identifier headers to ease license compliance
management.

Cc: Martin Sperl <kernel@martin.sperl.org>
Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/broadcom/bcm2835_thermal.c | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/drivers/thermal/broadcom/bcm2835_thermal.c b/drivers/thermal/broadcom/bcm2835_thermal.c
index 23ad4f9f21438..b9d90f0ed504d 100644
--- a/drivers/thermal/broadcom/bcm2835_thermal.c
+++ b/drivers/thermal/broadcom/bcm2835_thermal.c
@@ -1,17 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * Driver for Broadcom BCM2835 SoC temperature sensor
  *
  * Copyright (C) 2016 Martin Sperl
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/clk.h>
-- 
GitLab


From 25896d073d8a0403b07e6dec56f58e6c33678207 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Wed, 5 Dec 2018 15:27:19 +0900
Subject: [PATCH 1782/1890] x86/build: Fix compiler support check for
 CONFIG_RETPOLINE

It is troublesome to add a diagnostic like this to the Makefile
parse stage because the top-level Makefile could be parsed with
a stale include/config/auto.conf.

Once you are hit by the error about non-retpoline compiler, the
compilation still breaks even after disabling CONFIG_RETPOLINE.

The easiest fix is to move this check to the "archprepare" like
this commit did:

  829fe4aa9ac1 ("x86: Allow generating user-space headers without a compiler")

Reported-by: Meelis Roos <mroos@linux.ee>
Tested-by: Meelis Roos <mroos@linux.ee>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Acked-by: Zhenzhong Duan <zhenzhong.duan@oracle.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Zhenzhong Duan <zhenzhong.duan@oracle.com>
Fixes: 4cd24de3a098 ("x86/retpoline: Make CONFIG_RETPOLINE depend on compiler support")
Link: http://lkml.kernel.org/r/1543991239-18476-1-git-send-email-yamada.masahiro@socionext.com
Link: https://lkml.org/lkml/2018/12/4/206
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/Makefile | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index f5d7f41345249..75ef499a66e2b 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -220,9 +220,6 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
 
 # Avoid indirect branches in kernel to deal with Spectre
 ifdef CONFIG_RETPOLINE
-ifeq ($(RETPOLINE_CFLAGS),)
-  $(error You are building kernel with non-retpoline compiler, please update your compiler.)
-endif
   KBUILD_CFLAGS += $(RETPOLINE_CFLAGS)
 endif
 
@@ -307,6 +304,13 @@ ifndef CC_HAVE_ASM_GOTO
 	@echo Compiler lacks asm-goto support.
 	@exit 1
 endif
+ifdef CONFIG_RETPOLINE
+ifeq ($(RETPOLINE_CFLAGS),)
+	@echo "You are building kernel with non-retpoline compiler." >&2
+	@echo "Please update your compiler." >&2
+	@false
+endif
+endif
 
 archclean:
 	$(Q)rm -rf $(objtree)/arch/i386
-- 
GitLab


From c06abca69218ac42fa58d1ba7a7b0d9bab5f1b18 Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Tue, 6 Nov 2018 03:40:25 +0000
Subject: [PATCH 1783/1890] Revert "dmaengine: imx-sdma: Use GFP_NOWAIT for dma
 allocations"

This reverts commit c1199875d327, as this depends on another commit
that is going to be reverted.

Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Signed-off-by: Robin Gong <yibin.gong@nxp.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/imx-sdma.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index b4ec2d20e6616..3bca5e0c715fc 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -1194,8 +1194,8 @@ static int sdma_alloc_bd(struct sdma_desc *desc)
 {
 	int ret = 0;
 
-	desc->bd = dma_pool_alloc(desc->sdmac->bd_pool, GFP_NOWAIT,
-				  &desc->bd_phys);
+	desc->bd = dma_pool_alloc(desc->sdmac->bd_pool, GFP_ATOMIC,
+					&desc->bd_phys);
 	if (!desc->bd) {
 		ret = -ENOMEM;
 		goto out;
-- 
GitLab


From ebb853b1bd5f659b92c71dc6a9de44cfc37c78c0 Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Tue, 6 Nov 2018 03:40:28 +0000
Subject: [PATCH 1784/1890] Revert "dmaengine: imx-sdma: alloclate bd memory
 from dma pool"

This reverts commit fe5b85c656bc. The SDMA engine needs the descriptors to
be contiguous in memory. As the dma pool API is only able to provide a
single descriptor per alloc invocation there is no guarantee that multiple
descriptors satisfy this requirement. Also the code in question is broken
as it only allocates memory for a single descriptor, without looking at the
number of descriptors required for the transfer, leading to out-of-bounds
accesses when the descriptors are written.

Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Signed-off-by: Robin Gong <yibin.gong@nxp.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/imx-sdma.c | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index 3bca5e0c715fc..8d2fec8b16cc1 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -24,7 +24,6 @@
 #include <linux/spinlock.h>
 #include <linux/device.h>
 #include <linux/dma-mapping.h>
-#include <linux/dmapool.h>
 #include <linux/firmware.h>
 #include <linux/slab.h>
 #include <linux/platform_device.h>
@@ -376,7 +375,6 @@ struct sdma_channel {
 	u32				shp_addr, per_addr;
 	enum dma_status			status;
 	struct imx_dma_data		data;
-	struct dma_pool			*bd_pool;
 };
 
 #define IMX_DMA_SG_LOOP		BIT(0)
@@ -1192,10 +1190,11 @@ static int sdma_request_channel0(struct sdma_engine *sdma)
 
 static int sdma_alloc_bd(struct sdma_desc *desc)
 {
+	u32 bd_size = desc->num_bd * sizeof(struct sdma_buffer_descriptor);
 	int ret = 0;
 
-	desc->bd = dma_pool_alloc(desc->sdmac->bd_pool, GFP_ATOMIC,
-					&desc->bd_phys);
+	desc->bd = dma_zalloc_coherent(NULL, bd_size, &desc->bd_phys,
+					GFP_ATOMIC);
 	if (!desc->bd) {
 		ret = -ENOMEM;
 		goto out;
@@ -1206,7 +1205,9 @@ static int sdma_alloc_bd(struct sdma_desc *desc)
 
 static void sdma_free_bd(struct sdma_desc *desc)
 {
-	dma_pool_free(desc->sdmac->bd_pool, desc->bd, desc->bd_phys);
+	u32 bd_size = desc->num_bd * sizeof(struct sdma_buffer_descriptor);
+
+	dma_free_coherent(NULL, bd_size, desc->bd, desc->bd_phys);
 }
 
 static void sdma_desc_free(struct virt_dma_desc *vd)
@@ -1272,10 +1273,6 @@ static int sdma_alloc_chan_resources(struct dma_chan *chan)
 	if (ret)
 		goto disable_clk_ahb;
 
-	sdmac->bd_pool = dma_pool_create("bd_pool", chan->device->dev,
-				sizeof(struct sdma_buffer_descriptor),
-				32, 0);
-
 	return 0;
 
 disable_clk_ahb:
@@ -1304,9 +1301,6 @@ static void sdma_free_chan_resources(struct dma_chan *chan)
 
 	clk_disable(sdma->clk_ipg);
 	clk_disable(sdma->clk_ahb);
-
-	dma_pool_destroy(sdmac->bd_pool);
-	sdmac->bd_pool = NULL;
 }
 
 static struct sdma_desc *sdma_transfer_init(struct sdma_channel *sdmac,
-- 
GitLab


From b8603d2a5795c42f78998e70dc792336e0dc20c9 Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Tue, 6 Nov 2018 03:40:33 +0000
Subject: [PATCH 1785/1890] dmaengine: imx-sdma: implement channel termination
 via worker

The dmaengine documentation states that device_terminate_all may be
asynchronous and need not wait for the active transfers to stop.

This allows us to move most of the functionality currently implemented
in the sdma channel termination function to run in a worker, outside
of any atomic context. Moving this out of atomic context has two
benefits: we can now sleep while waiting for the channel to terminate,
instead of busy waiting and the freeing of the dma descriptors happens
with IRQs enabled, getting rid of a warning in the dma mapping code.

As the termination is now async, we need to implement the
device_synchronize dma engine function which simply waits for the
worker to finish its execution.

Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Signed-off-by: Robin Gong <yibin.gong@nxp.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/imx-sdma.c | 51 +++++++++++++++++++++++++++++++-----------
 1 file changed, 38 insertions(+), 13 deletions(-)

diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index 8d2fec8b16cc1..03d46f1b08263 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -32,6 +32,7 @@
 #include <linux/of_address.h>
 #include <linux/of_device.h>
 #include <linux/of_dma.h>
+#include <linux/workqueue.h>
 
 #include <asm/irq.h>
 #include <linux/platform_data/dma-imx-sdma.h>
@@ -375,6 +376,7 @@ struct sdma_channel {
 	u32				shp_addr, per_addr;
 	enum dma_status			status;
 	struct imx_dma_data		data;
+	struct work_struct		terminate_worker;
 };
 
 #define IMX_DMA_SG_LOOP		BIT(0)
@@ -1025,31 +1027,49 @@ static int sdma_disable_channel(struct dma_chan *chan)
 
 	return 0;
 }
-
-static int sdma_disable_channel_with_delay(struct dma_chan *chan)
+static void sdma_channel_terminate_work(struct work_struct *work)
 {
-	struct sdma_channel *sdmac = to_sdma_chan(chan);
+	struct sdma_channel *sdmac = container_of(work, struct sdma_channel,
+						  terminate_worker);
 	unsigned long flags;
 	LIST_HEAD(head);
 
-	sdma_disable_channel(chan);
-	spin_lock_irqsave(&sdmac->vc.lock, flags);
-	vchan_get_all_descriptors(&sdmac->vc, &head);
-	sdmac->desc = NULL;
-	spin_unlock_irqrestore(&sdmac->vc.lock, flags);
-	vchan_dma_desc_free_list(&sdmac->vc, &head);
-
 	/*
 	 * According to NXP R&D team a delay of one BD SDMA cost time
 	 * (maximum is 1ms) should be added after disable of the channel
 	 * bit, to ensure SDMA core has really been stopped after SDMA
 	 * clients call .device_terminate_all.
 	 */
-	mdelay(1);
+	usleep_range(1000, 2000);
+
+	spin_lock_irqsave(&sdmac->vc.lock, flags);
+	vchan_get_all_descriptors(&sdmac->vc, &head);
+	sdmac->desc = NULL;
+	spin_unlock_irqrestore(&sdmac->vc.lock, flags);
+	vchan_dma_desc_free_list(&sdmac->vc, &head);
+}
+
+static int sdma_disable_channel_async(struct dma_chan *chan)
+{
+	struct sdma_channel *sdmac = to_sdma_chan(chan);
+
+	sdma_disable_channel(chan);
+
+	if (sdmac->desc)
+		schedule_work(&sdmac->terminate_worker);
 
 	return 0;
 }
 
+static void sdma_channel_synchronize(struct dma_chan *chan)
+{
+	struct sdma_channel *sdmac = to_sdma_chan(chan);
+
+	vchan_synchronize(&sdmac->vc);
+
+	flush_work(&sdmac->terminate_worker);
+}
+
 static void sdma_set_watermarklevel_for_p2p(struct sdma_channel *sdmac)
 {
 	struct sdma_engine *sdma = sdmac->sdma;
@@ -1287,7 +1307,9 @@ static void sdma_free_chan_resources(struct dma_chan *chan)
 	struct sdma_channel *sdmac = to_sdma_chan(chan);
 	struct sdma_engine *sdma = sdmac->sdma;
 
-	sdma_disable_channel_with_delay(chan);
+	sdma_disable_channel_async(chan);
+
+	sdma_channel_synchronize(chan);
 
 	if (sdmac->event_id0)
 		sdma_event_disable(sdmac, sdmac->event_id0);
@@ -1993,6 +2015,8 @@ static int sdma_probe(struct platform_device *pdev)
 
 		sdmac->channel = i;
 		sdmac->vc.desc_free = sdma_desc_free;
+		INIT_WORK(&sdmac->terminate_worker,
+				sdma_channel_terminate_work);
 		/*
 		 * Add the channel to the DMAC list. Do not add channel 0 though
 		 * because we need it internally in the SDMA driver. This also means
@@ -2044,7 +2068,8 @@ static int sdma_probe(struct platform_device *pdev)
 	sdma->dma_device.device_prep_slave_sg = sdma_prep_slave_sg;
 	sdma->dma_device.device_prep_dma_cyclic = sdma_prep_dma_cyclic;
 	sdma->dma_device.device_config = sdma_config;
-	sdma->dma_device.device_terminate_all = sdma_disable_channel_with_delay;
+	sdma->dma_device.device_terminate_all = sdma_disable_channel_async;
+	sdma->dma_device.device_synchronize = sdma_channel_synchronize;
 	sdma->dma_device.src_addr_widths = SDMA_DMA_BUSWIDTHS;
 	sdma->dma_device.dst_addr_widths = SDMA_DMA_BUSWIDTHS;
 	sdma->dma_device.directions = SDMA_DMA_DIRECTIONS;
-- 
GitLab


From 64068853bc77786d1a28abb4087d6a3e93aedbe2 Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Tue, 6 Nov 2018 03:40:37 +0000
Subject: [PATCH 1786/1890] dmaengine: imx-sdma: use GFP_NOWAIT for dma
 descriptor allocations

DMA buffer descriptors aren't allocated from atomic context, so they
can use the less heavyweigth GFP_NOWAIT.

Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Signed-off-by: Robin Gong <yibin.gong@nxp.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/imx-sdma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index 03d46f1b08263..cb1b44d78a1f2 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -1214,7 +1214,7 @@ static int sdma_alloc_bd(struct sdma_desc *desc)
 	int ret = 0;
 
 	desc->bd = dma_zalloc_coherent(NULL, bd_size, &desc->bd_phys,
-					GFP_ATOMIC);
+					GFP_NOWAIT);
 	if (!desc->bd) {
 		ret = -ENOMEM;
 		goto out;
-- 
GitLab


From dae522045094ebfa9a10cf7951a8f79b02da8e15 Mon Sep 17 00:00:00 2001
From: Baruch Siach <baruch@tkos.co.il>
Date: Tue, 4 Dec 2018 17:46:02 +0200
Subject: [PATCH 1787/1890] Revert "arm64: dts: marvell: add CPU Idle power
 state support on Armada 7K/8K"

This reverts commit 8ed46368776b3bc93d74c1f8f2bfb9fd8a9ad805.

This commit breaks boot on Armada 8K based systems. Reverting it makes
affected systems boot again.

Reported-by: Sergey Matyukevich <geomatsi@gmail.com>
Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Signed-off-by: Gregory CLEMENT <gregory.clement@bootlin.com>
---
 .../boot/dts/marvell/armada-ap806-quad.dtsi   |  4 ---
 arch/arm64/boot/dts/marvell/armada-ap806.dtsi | 27 -------------------
 2 files changed, 31 deletions(-)

diff --git a/arch/arm64/boot/dts/marvell/armada-ap806-quad.dtsi b/arch/arm64/boot/dts/marvell/armada-ap806-quad.dtsi
index 64632c8738887..01ea662afba87 100644
--- a/arch/arm64/boot/dts/marvell/armada-ap806-quad.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-ap806-quad.dtsi
@@ -20,28 +20,24 @@ cpu0: cpu@0 {
 			compatible = "arm,cortex-a72", "arm,armv8";
 			reg = <0x000>;
 			enable-method = "psci";
-			cpu-idle-states = <&CPU_SLEEP_0>;
 		};
 		cpu1: cpu@1 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a72", "arm,armv8";
 			reg = <0x001>;
 			enable-method = "psci";
-			cpu-idle-states = <&CPU_SLEEP_0>;
 		};
 		cpu2: cpu@100 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a72", "arm,armv8";
 			reg = <0x100>;
 			enable-method = "psci";
-			cpu-idle-states = <&CPU_SLEEP_0>;
 		};
 		cpu3: cpu@101 {
 			device_type = "cpu";
 			compatible = "arm,cortex-a72", "arm,armv8";
 			reg = <0x101>;
 			enable-method = "psci";
-			cpu-idle-states = <&CPU_SLEEP_0>;
 		};
 	};
 };
diff --git a/arch/arm64/boot/dts/marvell/armada-ap806.dtsi b/arch/arm64/boot/dts/marvell/armada-ap806.dtsi
index 073610ac0a53e..7d94c1fa592a0 100644
--- a/arch/arm64/boot/dts/marvell/armada-ap806.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-ap806.dtsi
@@ -28,33 +28,6 @@ psci {
 		method = "smc";
 	};
 
-	cpus {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		idle_states {
-			entry_method = "arm,pcsi";
-
-			CPU_SLEEP_0: cpu-sleep-0 {
-				compatible = "arm,idle-state";
-				local-timer-stop;
-				arm,psci-suspend-param = <0x0010000>;
-				entry-latency-us = <80>;
-				exit-latency-us  = <160>;
-				min-residency-us = <320>;
-			};
-
-			CLUSTER_SLEEP_0: cluster-sleep-0 {
-				compatible = "arm,idle-state";
-				local-timer-stop;
-				arm,psci-suspend-param = <0x1010000>;
-				entry-latency-us = <500>;
-				exit-latency-us = <1000>;
-				min-residency-us = <2500>;
-			};
-		};
-	};
-
 	ap806 {
 		#address-cells = <2>;
 		#size-cells = <2>;
-- 
GitLab


From 59861547ec9a9736e7882f6fb0c096a720ff811a Mon Sep 17 00:00:00 2001
From: Bin Liu <b-liu@ti.com>
Date: Mon, 12 Nov 2018 09:43:22 -0600
Subject: [PATCH 1788/1890] dmaengine: cppi41: delete channel from pending list
 when stop channel

The driver defines three states for a cppi channel.
- idle: .chan_busy == 0 && not in .pending list
- pending: .chan_busy == 0 && in .pending list
- busy: .chan_busy == 1 && not in .pending list

There are cases in which the cppi channel could be in the pending state
when cppi41_dma_issue_pending() is called after cppi41_runtime_suspend()
is called.

cppi41_stop_chan() has a bug for these cases to set channels to idle state.
It only checks the .chan_busy flag, but not the .pending list, then later
when cppi41_runtime_resume() is called the channels in .pending list will
be transitioned to busy state.

Removing channels from the .pending list solves the problem.

Fixes: 975faaeb9985 ("dma: cppi41: start tear down only if channel is busy")
Cc: stable@vger.kernel.org # v3.15+
Signed-off-by: Bin Liu <b-liu@ti.com>
Reviewed-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/ti/cppi41.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/dma/ti/cppi41.c b/drivers/dma/ti/cppi41.c
index 1497da3677109..e507ec36c0d3d 100644
--- a/drivers/dma/ti/cppi41.c
+++ b/drivers/dma/ti/cppi41.c
@@ -723,8 +723,22 @@ static int cppi41_stop_chan(struct dma_chan *chan)
 
 	desc_phys = lower_32_bits(c->desc_phys);
 	desc_num = (desc_phys - cdd->descs_phys) / sizeof(struct cppi41_desc);
-	if (!cdd->chan_busy[desc_num])
+	if (!cdd->chan_busy[desc_num]) {
+		struct cppi41_channel *cc, *_ct;
+
+		/*
+		 * channels might still be in the pendling list if
+		 * cppi41_dma_issue_pending() is called after
+		 * cppi41_runtime_suspend() is called
+		 */
+		list_for_each_entry_safe(cc, _ct, &cdd->pending, node) {
+			if (cc != c)
+				continue;
+			list_del(&cc->node);
+			break;
+		}
 		return 0;
+	}
 
 	ret = cppi41_tear_down_chan(c);
 	if (ret)
-- 
GitLab


From 300cd664865bed5d50ae0a42fb4e3a6f415e8a10 Mon Sep 17 00:00:00 2001
From: Young Xiao <YangX92@hotmail.com>
Date: Wed, 28 Nov 2018 08:06:53 +0000
Subject: [PATCH 1789/1890] staging: rtl8712: Fix possible buffer overrun

In commit 8b7a13c3f404 ("staging: r8712u: Fix possible buffer
overrun") we fix a potential off by one by making the limit smaller.
The better fix is to make the buffer larger.  This makes it match up
with the similar code in other drivers.

Fixes: 8b7a13c3f404 ("staging: r8712u: Fix possible buffer overrun")
Signed-off-by: Young Xiao <YangX92@hotmail.com>
Cc: stable <stable@vger.kernel.org>
Reviewed-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8712/mlme_linux.c   | 2 +-
 drivers/staging/rtl8712/rtl871x_mlme.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/rtl8712/mlme_linux.c b/drivers/staging/rtl8712/mlme_linux.c
index 9d156efbc9edf..4d473f008aa48 100644
--- a/drivers/staging/rtl8712/mlme_linux.c
+++ b/drivers/staging/rtl8712/mlme_linux.c
@@ -146,7 +146,7 @@ void r8712_report_sec_ie(struct _adapter *adapter, u8 authmode, u8 *sec_ie)
 		p = buff;
 		p += sprintf(p, "ASSOCINFO(ReqIEs=");
 		len = sec_ie[1] + 2;
-		len =  (len < IW_CUSTOM_MAX) ? len : IW_CUSTOM_MAX - 1;
+		len =  (len < IW_CUSTOM_MAX) ? len : IW_CUSTOM_MAX;
 		for (i = 0; i < len; i++)
 			p += sprintf(p, "%02x", sec_ie[i]);
 		p += sprintf(p, ")");
diff --git a/drivers/staging/rtl8712/rtl871x_mlme.c b/drivers/staging/rtl8712/rtl871x_mlme.c
index a7374006a9fbe..986a1d5269180 100644
--- a/drivers/staging/rtl8712/rtl871x_mlme.c
+++ b/drivers/staging/rtl8712/rtl871x_mlme.c
@@ -1346,7 +1346,7 @@ sint r8712_restruct_sec_ie(struct _adapter *adapter, u8 *in_ie,
 		     u8 *out_ie, uint in_len)
 {
 	u8 authmode = 0, match;
-	u8 sec_ie[255], uncst_oui[4], bkup_ie[255];
+	u8 sec_ie[IW_CUSTOM_MAX], uncst_oui[4], bkup_ie[255];
 	u8 wpa_oui[4] = {0x0, 0x50, 0xf2, 0x01};
 	uint ielength, cnt, remove_cnt;
 	int iEntry;
-- 
GitLab


From 87e4a5405f087427fbf8b437d2796283dce2b38f Mon Sep 17 00:00:00 2001
From: Young Xiao <YangX92@hotmail.com>
Date: Tue, 27 Nov 2018 09:12:20 +0000
Subject: [PATCH 1790/1890] Revert commit ef9209b642f "staging: rtl8723bs: Fix
 indenting errors and an off-by-one mistake in core/rtw_mlme_ext.c"

pstapriv->max_num_sta is always <= NUM_STA, since max_num_sta is either
set in _rtw_init_sta_priv() or rtw_set_beacon().

Fixes: ef9209b642f1 ("staging: rtl8723bs: Fix indenting errors and an off-by-one mistake in core/rtw_mlme_ext.c")
Signed-off-by: Young Xiao <YangX92@hotmail.com>
Reviewed-by: Dan Carpenter <dan.carpenter@oracle.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8723bs/core/rtw_mlme_ext.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c b/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c
index 69c7abc0e3a55..8445d516c93d3 100644
--- a/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c
+++ b/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c
@@ -1565,7 +1565,7 @@ unsigned int OnAssocReq(struct adapter *padapter, union recv_frame *precv_frame)
 	if (pstat->aid > 0) {
 		DBG_871X("  old AID %d\n", pstat->aid);
 	} else {
-		for (pstat->aid = 1; pstat->aid < NUM_STA; pstat->aid++)
+		for (pstat->aid = 1; pstat->aid <= NUM_STA; pstat->aid++)
 			if (pstapriv->sta_aid[pstat->aid - 1] == NULL)
 				break;
 
-- 
GitLab


From d7859905301880ad3e16272399d26900af3ac496 Mon Sep 17 00:00:00 2001
From: Alexander Theissen <alex.theissen@me.com>
Date: Tue, 4 Dec 2018 23:43:35 +0100
Subject: [PATCH 1791/1890] usb: appledisplay: Add 27" Apple Cinema Display

Add another Apple Cinema Display to the list of supported displays.

Signed-off-by: Alexander Theissen <alex.theissen@me.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/misc/appledisplay.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/misc/appledisplay.c b/drivers/usb/misc/appledisplay.c
index 85b48c6ddc7e6..39ca31b4de466 100644
--- a/drivers/usb/misc/appledisplay.c
+++ b/drivers/usb/misc/appledisplay.c
@@ -51,6 +51,7 @@ static const struct usb_device_id appledisplay_table[] = {
 	{ APPLEDISPLAY_DEVICE(0x921c) },
 	{ APPLEDISPLAY_DEVICE(0x921d) },
 	{ APPLEDISPLAY_DEVICE(0x9222) },
+	{ APPLEDISPLAY_DEVICE(0x9226) },
 	{ APPLEDISPLAY_DEVICE(0x9236) },
 
 	/* Terminating entry */
-- 
GitLab


From d81bb019d7bb30091e3c796102c00935d6dd7ca9 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Wed, 28 Nov 2018 11:25:58 -0500
Subject: [PATCH 1792/1890] USB: Fix invalid-free bug in
 port_over_current_notify()

Syzbot and KASAN found the following invalid-free bug in
port_over_current_notify():

--------------------------------------------------------------------------
BUG: KASAN: double-free or invalid-free in port_over_current_notify
drivers/usb/core/hub.c:5192 [inline]
BUG: KASAN: double-free or invalid-free in port_event
drivers/usb/core/hub.c:5241 [inline]
BUG: KASAN: double-free or invalid-free in hub_event+0xd97/0x4140
drivers/usb/core/hub.c:5384

CPU: 1 PID: 32710 Comm: kworker/1:3 Not tainted 4.20.0-rc3+ #129
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
Google 01/01/2011
Workqueue: usb_hub_wq hub_event
Call Trace:
  __dump_stack lib/dump_stack.c:77 [inline]
  dump_stack+0x244/0x39d lib/dump_stack.c:113
  print_address_description.cold.7+0x9/0x1ff mm/kasan/report.c:256
  kasan_report_invalid_free+0x64/0xa0 mm/kasan/report.c:336
  __kasan_slab_free+0x13a/0x150 mm/kasan/kasan.c:501
  kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528
  __cache_free mm/slab.c:3498 [inline]
  kfree+0xcf/0x230 mm/slab.c:3817
  port_over_current_notify drivers/usb/core/hub.c:5192 [inline]
  port_event drivers/usb/core/hub.c:5241 [inline]
  hub_event+0xd97/0x4140 drivers/usb/core/hub.c:5384
  process_one_work+0xc90/0x1c40 kernel/workqueue.c:2153
  worker_thread+0x17f/0x1390 kernel/workqueue.c:2296
  kthread+0x35a/0x440 kernel/kthread.c:246
  ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:352
--------------------------------------------------------------------------

The problem is caused by use of a static array to store
environment-string pointers.  When the routine is called by multiple
threads concurrently, the pointers from one thread can overwrite those
from another.

The solution is to use an ordinary automatic array instead of a static
array.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Reported-by: syzbot+98881958e1410ec7e53c@syzkaller.appspotmail.com
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hub.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 0f9381b69a3b5..528664059a126 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -5163,7 +5163,7 @@ static void hub_port_connect_change(struct usb_hub *hub, int port1,
 /* Handle notifying userspace about hub over-current events */
 static void port_over_current_notify(struct usb_port *port_dev)
 {
-	static char *envp[] = { NULL, NULL, NULL };
+	char *envp[3];
 	struct device *hub_dev;
 	char *port_dev_path;
 
@@ -5187,6 +5187,7 @@ static void port_over_current_notify(struct usb_port *port_dev)
 	if (!envp[1])
 		goto exit;
 
+	envp[2] = NULL;
 	kobject_uevent_env(&hub_dev->kobj, KOBJ_CHANGE, envp);
 
 	kfree(envp[1]);
-- 
GitLab


From 2f2dde6ba89b1ef1fe23c1138131b315d9aa4019 Mon Sep 17 00:00:00 2001
From: Harry Pan <harry.pan@intel.com>
Date: Thu, 29 Nov 2018 00:40:41 +0800
Subject: [PATCH 1793/1890] usb: quirk: add no-LPM quirk on SanDisk Ultra Flair
 device

Some lower volume SanDisk Ultra Flair in 16GB, which the VID:PID is
in 0781:5591, will aggressively request LPM of U1/U2 during runtime,
when using this thumb drive as the OS installation key we found the
device will generate failure during U1 exit path making it dropped
from the USB bus, this causes a corrupted installation in system at
the end.

i.e.,
[  166.918296] hub 2-0:1.0: state 7 ports 7 chg 0000 evt 0004
[  166.918327] usb usb2-port2: link state change
[  166.918337] usb usb2-port2: do warm reset
[  166.970039] usb usb2-port2: not warm reset yet, waiting 50ms
[  167.022040] usb usb2-port2: not warm reset yet, waiting 200ms
[  167.276043] usb usb2-port2: status 02c0, change 0041, 5.0 Gb/s
[  167.276050] usb 2-2: USB disconnect, device number 2
[  167.276058] usb 2-2: unregistering device
[  167.276060] usb 2-2: unregistering interface 2-2:1.0
[  167.276170] xhci_hcd 0000:00:15.0: shutdown urb ffffa3c7cc695cc0 ep1in-bulk
[  167.284055] sd 0:0:0:0: [sda] tag#0 FAILED Result: hostbyte=DID_NO_CONNECT driverbyte=DRIVER_OK
[  167.284064] sd 0:0:0:0: [sda] tag#0 CDB: Read(10) 28 00 00 33 04 90 00 01 00 00
...

Analyzed the USB trace in the link layer we realized it is because
of the 6-ms timer of tRecoveryConfigurationTimeout which documented
on the USB 3.2 Revision 1.0, the section 7.5.10.4.2 of "Exit from
Recovery.Configuration"; device initiates U1 exit -> Recovery.Active
-> Recovery.Configuration, then the host timer timeout makes the link
transits to eSS.Inactive -> Rx.Detect follows by a Warm Reset.

Interestingly, the other higher volume of SanDisk Ultra Flair sharing
the same VID:PID, such as 64GB, would not request LPM during runtime,
it sticks at U0 always, thus disabling LPM does not affect those thumb
drives at all.

The same odd occures in SanDisk Ultra Fit 16GB, VID:PID in 0781:5583.

Signed-off-by: Harry Pan <harry.pan@intel.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/quirks.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index 0690fcff0ea23..514c5214ddb24 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -333,6 +333,10 @@ static const struct usb_device_id usb_quirk_list[] = {
 	/* Midiman M-Audio Keystation 88es */
 	{ USB_DEVICE(0x0763, 0x0192), .driver_info = USB_QUIRK_RESET_RESUME },
 
+	/* SanDisk Ultra Fit and Ultra Flair */
+	{ USB_DEVICE(0x0781, 0x5583), .driver_info = USB_QUIRK_NO_LPM },
+	{ USB_DEVICE(0x0781, 0x5591), .driver_info = USB_QUIRK_NO_LPM },
+
 	/* M-Systems Flash Disk Pioneers */
 	{ USB_DEVICE(0x08ec, 0x1000), .driver_info = USB_QUIRK_RESET_RESUME },
 
-- 
GitLab


From 48a2ca0ee3994df53da230c7079a18a70ec914f9 Mon Sep 17 00:00:00 2001
From: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Date: Tue, 4 Dec 2018 16:58:43 +0100
Subject: [PATCH 1794/1890] Revert "mfd: cros_ec: Use devm_kzalloc for private
 data"

This reverts commit 3aa2177e47878f7e7616da8a2050c44f22301b6e.

That commit triggered a new WARN when unloading the module (see at the
end of the commit message). When a class_dev is embedded in a structure
then that class_dev is the thing that controls the lifetime of that
structure, for that reason device managed allocations can't be used here.
See Documentation/kobject.txt.

Revert the above patch, so the struct is allocated using kzalloc and we
have a release function for it that frees the allocated memory, otherwise
it is broken.

 ------------[ cut here ]------------
 Device 'cros_ec' does not have a release() function, it is broken and must be fixed.
 WARNING: CPU: 3 PID: 3675 at drivers/base/core.c:895 device_release+0x80/0x90
 Modules linked in: btusb btrtl btintel btbcm bluetooth ...
 CPU: 3 PID: 3675 Comm: rmmod Not tainted 4.20.0-rc4 #76
 Hardware name: Google Kevin (DT)
 pstate: 40000005 (nZcv daif -PAN -UAO)
 pc : device_release+0x80/0x90
 lr : device_release+0x80/0x90
 sp : ffff00000c47bc70
 x29: ffff00000c47bc70 x28: ffff8000e86b0d40
 x27: 0000000000000000 x26: 0000000000000000
 x25: 0000000056000000 x24: 0000000000000015
 x23: ffff8000f0bbf860 x22: ffff000000d320a0
 x21: ffff8000ee93e100 x20: ffff8000ed931428
 x19: ffff8000ed931418 x18: 0000000000000020
 x17: 0000000000000000 x16: 0000000000000000
 x15: 0000000000000400 x14: 0000000000000143
 x13: 0000000000000000 x12: 0000000000000400
 x11: 0000000000000157 x10: 0000000000000960
 x9 : ffff00000c47b9b0 x8 : ffff8000e86b1700
 x7 : 0000000000000000 x6 : ffff8000f7d520b8
 x5 : ffff8000f7d520b8 x4 : 0000000000000000
 x3 : ffff8000f7d58e68 x2 : ffff8000e86b0d40
 x1 : 37d859939c964800 x0 : 0000000000000000
 Call trace:
  device_release+0x80/0x90
  kobject_put+0x74/0xe8
  device_unregister+0x20/0x30
  ec_device_remove+0x34/0x48 [cros_ec_dev]
  platform_drv_remove+0x28/0x48
  device_release_driver_internal+0x1a8/0x240
  driver_detach+0x40/0x80
  bus_remove_driver+0x54/0xa8
  driver_unregister+0x2c/0x58
  platform_driver_unregister+0x10/0x18
  cros_ec_dev_exit+0x1c/0x2d8 [cros_ec_dev]
  __arm64_sys_delete_module+0x16c/0x1f8
  el0_svc_common+0x84/0xd8
  el0_svc_handler+0x2c/0x80
  el0_svc+0x8/0xc
 ---[ end trace a57c4625f3c60ae8 ]---

Cc: stable@vger.kernel.org
Fixes: 3aa2177e4787 ("mfd: cros_ec: Use devm_kzalloc for private data")
Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Reviewed-by: Guenter Roeck <groeck@chromium.org>
Reviewed-by: Dmitry Torokhov <dtor@chromium.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/cros_ec_dev.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/mfd/cros_ec_dev.c b/drivers/mfd/cros_ec_dev.c
index 8f9d6964173ec..b99a194ce5a4a 100644
--- a/drivers/mfd/cros_ec_dev.c
+++ b/drivers/mfd/cros_ec_dev.c
@@ -263,6 +263,11 @@ static const struct file_operations fops = {
 #endif
 };
 
+static void cros_ec_class_release(struct device *dev)
+{
+	kfree(to_cros_ec_dev(dev));
+}
+
 static void cros_ec_sensors_register(struct cros_ec_dev *ec)
 {
 	/*
@@ -395,7 +400,7 @@ static int ec_device_probe(struct platform_device *pdev)
 	int retval = -ENOMEM;
 	struct device *dev = &pdev->dev;
 	struct cros_ec_platform *ec_platform = dev_get_platdata(dev);
-	struct cros_ec_dev *ec = devm_kzalloc(dev, sizeof(*ec), GFP_KERNEL);
+	struct cros_ec_dev *ec = kzalloc(sizeof(*ec), GFP_KERNEL);
 
 	if (!ec)
 		return retval;
@@ -417,6 +422,7 @@ static int ec_device_probe(struct platform_device *pdev)
 	ec->class_dev.devt = MKDEV(ec_major, pdev->id);
 	ec->class_dev.class = &cros_class;
 	ec->class_dev.parent = dev;
+	ec->class_dev.release = cros_ec_class_release;
 
 	retval = dev_set_name(&ec->class_dev, "%s", ec_platform->ec_name);
 	if (retval) {
-- 
GitLab


From f51ccf46217c28758b1f3b5bc0ccfc00eca658b2 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Tue, 4 Dec 2018 17:00:36 +0100
Subject: [PATCH 1795/1890] USB: serial: console: fix reported terminal
 settings

The USB-serial console implementation has never reported the actual
terminal settings used. Despite storing the corresponding cflags in its
struct console, these were never honoured on later tty open() where the
tty termios would be left initialised to the driver defaults.

Unlike the serial console implementation, the USB-serial code calls
subdriver open() already at console setup. While calling set_termios()
and write() before open() looks like it could work for some USB-serial
drivers, others definitely do not expect this, so modelling this after
serial core is going to be intrusive, if at all possible.

Instead, use a (renamed) tty helper to save the termios data used at
console setup so that the tty termios reflects the actual terminal
settings after a subsequent tty open().

Note that the calls to tty_init_termios() (tty_driver_install()) and
tty_save_termios() are serialised using the disconnect mutex.

This specifically fixes a regression that was triggered by a recent
change adding software flow control to the pl2303 driver: a getty trying
to disable flow control while leaving the baud rate unchanged would now
also set the baud rate to the driver default (prior to the flow-control
change this had been a noop).

Fixes: 7041d9c3f01b ("USB: serial: pl2303: add support for tx xon/xoff flow control")
Cc: stable <stable@vger.kernel.org>	# 4.18
Cc: Florian Zumbiehl <florz@florz.de>
Reported-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Tested-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/tty/tty_io.c         | 11 +++++++++--
 drivers/usb/serial/console.c |  2 +-
 include/linux/tty.h          |  1 +
 3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index ee80dfbd5442b..687250ec80323 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -1373,7 +1373,13 @@ struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx)
 	return ERR_PTR(retval);
 }
 
-static void tty_free_termios(struct tty_struct *tty)
+/**
+ * tty_save_termios() - save tty termios data in driver table
+ * @tty: tty whose termios data to save
+ *
+ * Locking: Caller guarantees serialisation with tty_init_termios().
+ */
+void tty_save_termios(struct tty_struct *tty)
 {
 	struct ktermios *tp;
 	int idx = tty->index;
@@ -1392,6 +1398,7 @@ static void tty_free_termios(struct tty_struct *tty)
 	}
 	*tp = tty->termios;
 }
+EXPORT_SYMBOL_GPL(tty_save_termios);
 
 /**
  *	tty_flush_works		-	flush all works of a tty/pty pair
@@ -1491,7 +1498,7 @@ static void release_tty(struct tty_struct *tty, int idx)
 	WARN_ON(!mutex_is_locked(&tty_mutex));
 	if (tty->ops->shutdown)
 		tty->ops->shutdown(tty);
-	tty_free_termios(tty);
+	tty_save_termios(tty);
 	tty_driver_remove_tty(tty->driver, tty);
 	tty->port->itty = NULL;
 	if (tty->link)
diff --git a/drivers/usb/serial/console.c b/drivers/usb/serial/console.c
index 17940589c647c..7d289302ff6cf 100644
--- a/drivers/usb/serial/console.c
+++ b/drivers/usb/serial/console.c
@@ -101,7 +101,6 @@ static int usb_console_setup(struct console *co, char *options)
 		cflag |= PARENB;
 		break;
 	}
-	co->cflag = cflag;
 
 	/*
 	 * no need to check the index here: if the index is wrong, console
@@ -164,6 +163,7 @@ static int usb_console_setup(struct console *co, char *options)
 			serial->type->set_termios(tty, port, &dummy);
 
 			tty_port_tty_set(&port->port, NULL);
+			tty_save_termios(tty);
 			tty_kref_put(tty);
 		}
 		tty_port_set_initialized(&port->port, 1);
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 414db2bce7150..392138fe59b69 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -556,6 +556,7 @@ extern struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx);
 extern void tty_release_struct(struct tty_struct *tty, int idx);
 extern int tty_release(struct inode *inode, struct file *filp);
 extern void tty_init_termios(struct tty_struct *tty);
+extern void tty_save_termios(struct tty_struct *tty);
 extern int tty_standard_install(struct tty_driver *driver,
 		struct tty_struct *tty);
 
-- 
GitLab


From 100bc3e2bebf95506da57cbdf5f26b25f6da4c81 Mon Sep 17 00:00:00 2001
From: Peter Shih <pihsun@chromium.org>
Date: Tue, 27 Nov 2018 12:49:50 +0800
Subject: [PATCH 1796/1890] tty: serial: 8250_mtk: always resume the device in
 probe.

serial8250_register_8250_port calls uart_config_port, which calls
config_port on the port before it tries to power on the port. So we need
the port to be on before calling serial8250_register_8250_port. Change
the code to always do a runtime resume in probe before registering port,
and always do a runtime suspend in remove.

This basically reverts the change in commit 68e5fc4a255a ("tty: serial:
8250_mtk: use pm_runtime callbacks for enabling"), but still use
pm_runtime callbacks.

Fixes: 68e5fc4a255a ("tty: serial: 8250_mtk: use pm_runtime callbacks for enabling")
Signed-off-by: Peter Shih <pihsun@chromium.org>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_mtk.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c
index dd5e1cede2b58..c3f933d10295e 100644
--- a/drivers/tty/serial/8250/8250_mtk.c
+++ b/drivers/tty/serial/8250/8250_mtk.c
@@ -213,17 +213,17 @@ static int mtk8250_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, data);
 
-	pm_runtime_enable(&pdev->dev);
-	if (!pm_runtime_enabled(&pdev->dev)) {
-		err = mtk8250_runtime_resume(&pdev->dev);
-		if (err)
-			return err;
-	}
+	err = mtk8250_runtime_resume(&pdev->dev);
+	if (err)
+		return err;
 
 	data->line = serial8250_register_8250_port(&uart);
 	if (data->line < 0)
 		return data->line;
 
+	pm_runtime_set_active(&pdev->dev);
+	pm_runtime_enable(&pdev->dev);
+
 	return 0;
 }
 
@@ -234,13 +234,11 @@ static int mtk8250_remove(struct platform_device *pdev)
 	pm_runtime_get_sync(&pdev->dev);
 
 	serial8250_unregister_port(data->line);
+	mtk8250_runtime_suspend(&pdev->dev);
 
 	pm_runtime_disable(&pdev->dev);
 	pm_runtime_put_noidle(&pdev->dev);
 
-	if (!pm_runtime_status_suspended(&pdev->dev))
-		mtk8250_runtime_suspend(&pdev->dev);
-
 	return 0;
 }
 
-- 
GitLab


From a317e65face482371de30246b6494feb093ff7f9 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Tue, 13 Nov 2018 20:32:13 +0100
Subject: [PATCH 1797/1890] mac80211: ignore tx status for PS stations in
 ieee80211_tx_status_ext

Make it behave like regular ieee80211_tx_status calls, except for the lack of
filtered frame processing.
This fixes spurious low-ack triggered disconnections with powersave clients
connected to an AP.

Fixes: f027c2aca0cf4 ("mac80211: add ieee80211_tx_status_noskb")
Cc: stable@vger.kernel.org
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/status.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index aa4afbf0abaf1..a794ca7290001 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -964,6 +964,8 @@ void ieee80211_tx_status_ext(struct ieee80211_hw *hw,
 			/* Track when last TDLS packet was ACKed */
 			if (test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH))
 				sta->status_stats.last_tdls_pkt_time = jiffies;
+		} else if (test_sta_flag(sta, WLAN_STA_PS_STA)) {
+			return;
 		} else {
 			ieee80211_lost_packet(sta, info);
 		}
-- 
GitLab


From 9ec1190d065998650fd9260dea8cf3e1f56c0e8c Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 28 Nov 2018 22:39:16 +0100
Subject: [PATCH 1798/1890] mac80211: fix reordering of buffered broadcast
 packets

If the buffered broadcast queue contains packets, letting new packets bypass
that queue can lead to heavy reordering, since the driver is probably throttling
transmission of buffered multicast packets after beacons.

Keep buffering packets until the buffer has been cleared (and no client
is in powersave mode).

Cc: stable@vger.kernel.org
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/tx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index e0ccee23fbcdb..1f536ba573b48 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -439,8 +439,8 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx)
 	if (ieee80211_hw_check(&tx->local->hw, QUEUE_CONTROL))
 		info->hw_queue = tx->sdata->vif.cab_queue;
 
-	/* no stations in PS mode */
-	if (!atomic_read(&ps->num_sta_ps))
+	/* no stations in PS mode and no buffered packets */
+	if (!atomic_read(&ps->num_sta_ps) && skb_queue_empty(&ps->bc_buf))
 		return TX_CONTINUE;
 
 	info->flags |= IEEE80211_TX_CTL_SEND_AFTER_DTIM;
-- 
GitLab


From 990d71846a0b7281bd933c34d734e6afc7408e7e Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Mon, 3 Dec 2018 21:16:07 +0200
Subject: [PATCH 1799/1890] mac80211: ignore NullFunc frames in the duplicate
 detection

NullFunc packets should never be duplicate just like
QoS-NullFunc packets.

We saw a client that enters / exits power save with
NullFunc frames (and not with QoS-NullFunc) despite the
fact that the association supports HT.
This specific client also re-uses a non-zero sequence number
for different NullFunc frames.
At some point, the client had to send a retransmission of
the NullFunc frame and we dropped it, leading to a
misalignment in the power save state.
Fix this by never consider a NullFunc frame as duplicate,
just like we do for QoS NullFunc frames.

This fixes https://bugzilla.kernel.org/show_bug.cgi?id=201449

CC: <stable@vger.kernel.org>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rx.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index a69ecfb212ed7..428f7ad5f9b59 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1403,6 +1403,7 @@ ieee80211_rx_h_check_dup(struct ieee80211_rx_data *rx)
 		return RX_CONTINUE;
 
 	if (ieee80211_is_ctl(hdr->frame_control) ||
+	    ieee80211_is_nullfunc(hdr->frame_control) ||
 	    ieee80211_is_qos_nullfunc(hdr->frame_control) ||
 	    is_multicast_ether_addr(hdr->addr1))
 		return RX_CONTINUE;
-- 
GitLab


From 312ca38ddda64bac6513ec68e0ac3789b4eb44dc Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni@codeaurora.org>
Date: Wed, 5 Dec 2018 12:55:54 +0200
Subject: [PATCH 1800/1890] cfg80211: Fix busy loop regression in
 ieee80211_ie_split_ric()

This function was modified to support the information element extension
case (WLAN_EID_EXTENSION) in a manner that would result in an infinite
loop when going through set of IEs that include WLAN_EID_RIC_DATA and
contain an IE that is in the after_ric array. The only place where this
can currently happen is in mac80211 ieee80211_send_assoc() where
ieee80211_ie_split_ric() is called with after_ric[].

This can be triggered by valid data from user space nl80211
association/connect request (i.e., requiring GENL_UNS_ADMIN_PERM). The
only known application having an option to include WLAN_EID_RIC_DATA in
these requests is wpa_supplicant and it had a bug that prevented this
specific contents from being used (and because of that, not triggering
this kernel bug in an automated test case ap_ft_ric) and now that this
bug is fixed, it has a workaround to avoid this kernel issue.
WLAN_EID_RIC_DATA is currently used only for testing purposes, so this
does not cause significant harm for production use cases.

Fixes: 2512b1b18d07 ("mac80211: extend ieee80211_ie_split to support EXTENSION")
Cc: stable@vger.kernel.org
Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/util.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/wireless/util.c b/net/wireless/util.c
index ef14d80ca03ee..d473bd135da8b 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1421,6 +1421,8 @@ size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen,
 							  ies[pos + ext],
 							  ext == 2))
 					pos = skip_ie(ies, ielen, pos);
+				else
+					break;
 			}
 		} else {
 			pos = skip_ie(ies, ielen, pos);
-- 
GitLab


From c443305529d1d3d3bee0d68fdd14ae89835e091f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Tue, 4 Dec 2018 07:52:11 -0500
Subject: [PATCH 1801/1890] SUNRPC: Fix RPC receive hangs

The RPC code is occasionally hanging when the receive code fails to
empty the socket buffer due to a partial read of the data. When we
convert that to an EAGAIN, it appears we occasionally leave data in the
socket. The fix is to just keep reading until the socket returns
EAGAIN/EWOULDBLOCK.

Reported-by: Catalin Marinas <catalin.marinas@arm.com>
Reported-by: Cristian Marussi <cristian.marussi@arm.com>
Reported-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Tested-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Cristian Marussi <cristian.marussi@arm.com>
---
 net/sunrpc/xprtsock.c | 39 +++++++++++++++++++--------------------
 1 file changed, 19 insertions(+), 20 deletions(-)

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index ae77c71c1f640..0898752cecfeb 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -398,7 +398,7 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
 		if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC))
 			goto out;
 		if (ret != want)
-			goto eagain;
+			goto out;
 		seek = 0;
 	} else {
 		seek -= buf->head[0].iov_len;
@@ -418,7 +418,7 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
 		if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC))
 			goto out;
 		if (ret != want)
-			goto eagain;
+			goto out;
 		seek = 0;
 	} else {
 		seek -= buf->page_len;
@@ -433,7 +433,7 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
 		if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC))
 			goto out;
 		if (ret != want)
-			goto eagain;
+			goto out;
 	} else
 		offset += buf->tail[0].iov_len;
 	ret = -EMSGSIZE;
@@ -441,9 +441,6 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
 out:
 	*read = offset - seek_init;
 	return ret;
-eagain:
-	ret = -EAGAIN;
-	goto out;
 sock_err:
 	offset += seek;
 	goto out;
@@ -486,19 +483,18 @@ xs_read_stream_request(struct sock_xprt *transport, struct msghdr *msg,
 	if (transport->recv.offset == transport->recv.len) {
 		if (xs_read_stream_request_done(transport))
 			msg->msg_flags |= MSG_EOR;
-		return transport->recv.copied;
+		return read;
 	}
 
 	switch (ret) {
+	default:
+		break;
 	case -EMSGSIZE:
-		return transport->recv.copied;
+		return read;
 	case 0:
 		return -ESHUTDOWN;
-	default:
-		if (ret < 0)
-			return ret;
 	}
-	return -EAGAIN;
+	return ret < 0 ? ret : read;
 }
 
 static size_t
@@ -537,7 +533,7 @@ xs_read_stream_call(struct sock_xprt *transport, struct msghdr *msg, int flags)
 
 	ret = xs_read_stream_request(transport, msg, flags, req);
 	if (msg->msg_flags & (MSG_EOR|MSG_TRUNC))
-		xprt_complete_bc_request(req, ret);
+		xprt_complete_bc_request(req, transport->recv.copied);
 
 	return ret;
 }
@@ -570,7 +566,7 @@ xs_read_stream_reply(struct sock_xprt *transport, struct msghdr *msg, int flags)
 
 	spin_lock(&xprt->queue_lock);
 	if (msg->msg_flags & (MSG_EOR|MSG_TRUNC))
-		xprt_complete_rqst(req->rq_task, ret);
+		xprt_complete_rqst(req->rq_task, transport->recv.copied);
 	xprt_unpin_rqst(req);
 out:
 	spin_unlock(&xprt->queue_lock);
@@ -591,10 +587,8 @@ xs_read_stream(struct sock_xprt *transport, int flags)
 		if (ret <= 0)
 			goto out_err;
 		transport->recv.offset = ret;
-		if (ret != want) {
-			ret = -EAGAIN;
-			goto out_err;
-		}
+		if (transport->recv.offset != want)
+			return transport->recv.offset;
 		transport->recv.len = be32_to_cpu(transport->recv.fraghdr) &
 			RPC_FRAGMENT_SIZE_MASK;
 		transport->recv.offset -= sizeof(transport->recv.fraghdr);
@@ -602,6 +596,9 @@ xs_read_stream(struct sock_xprt *transport, int flags)
 	}
 
 	switch (be32_to_cpu(transport->recv.calldir)) {
+	default:
+		msg.msg_flags |= MSG_TRUNC;
+		break;
 	case RPC_CALL:
 		ret = xs_read_stream_call(transport, &msg, flags);
 		break;
@@ -616,6 +613,8 @@ xs_read_stream(struct sock_xprt *transport, int flags)
 		goto out_err;
 	read += ret;
 	if (transport->recv.offset < transport->recv.len) {
+		if (!(msg.msg_flags & MSG_TRUNC))
+			return read;
 		ret = xs_read_discard(transport->sock, &msg, flags,
 				transport->recv.len - transport->recv.offset);
 		if (ret <= 0)
@@ -623,7 +622,7 @@ xs_read_stream(struct sock_xprt *transport, int flags)
 		transport->recv.offset += ret;
 		read += ret;
 		if (transport->recv.offset != transport->recv.len)
-			return -EAGAIN;
+			return read;
 	}
 	if (xs_read_stream_request_done(transport)) {
 		trace_xs_stream_read_request(transport);
@@ -653,7 +652,7 @@ static void xs_stream_data_receive(struct sock_xprt *transport)
 	clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
 	for (;;) {
 		ret = xs_read_stream(transport, MSG_DONTWAIT);
-		if (ret <= 0)
+		if (ret < 0)
 			break;
 		read += ret;
 		cond_resched();
-- 
GitLab


From 16e5e90f0e4f9b7b2e4d08558a2f695e2fa1fb0d Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Sun, 2 Dec 2018 15:22:48 -0500
Subject: [PATCH 1802/1890] SUNRPC: Fix up handling of the XDRBUF_SPARSE_PAGES
 flag

If the allocator fails before it has reached the target number of pages,
then we need to recheck that we're not seeking past the page buffer.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 net/sunrpc/xprtsock.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 0898752cecfeb..cd85c492c267b 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -330,18 +330,16 @@ xs_alloc_sparse_pages(struct xdr_buf *buf, size_t want, gfp_t gfp)
 {
 	size_t i,n;
 
-	if (!(buf->flags & XDRBUF_SPARSE_PAGES))
+	if (!want || !(buf->flags & XDRBUF_SPARSE_PAGES))
 		return want;
-	if (want > buf->page_len)
-		want = buf->page_len;
 	n = (buf->page_base + want + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	for (i = 0; i < n; i++) {
 		if (buf->pages[i])
 			continue;
 		buf->bvec[i].bv_page = buf->pages[i] = alloc_page(gfp);
 		if (!buf->pages[i]) {
-			buf->page_len = (i * PAGE_SIZE) - buf->page_base;
-			return buf->page_len;
+			i *= PAGE_SIZE;
+			return i > buf->page_base ? i - buf->page_base : 0;
 		}
 	}
 	return want;
@@ -404,10 +402,11 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
 		seek -= buf->head[0].iov_len;
 		offset += buf->head[0].iov_len;
 	}
-	if (seek < buf->page_len) {
-		want = xs_alloc_sparse_pages(buf,
-				min_t(size_t, count - offset, buf->page_len),
-				GFP_NOWAIT);
+
+	want = xs_alloc_sparse_pages(buf,
+			min_t(size_t, count - offset, buf->page_len),
+			GFP_NOWAIT);
+	if (seek < want) {
 		ret = xs_read_bvec(sock, msg, flags, buf->bvec,
 				xdr_buf_pagecount(buf),
 				want + buf->page_base,
@@ -421,9 +420,10 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
 			goto out;
 		seek = 0;
 	} else {
-		seek -= buf->page_len;
-		offset += buf->page_len;
+		seek -= want;
+		offset += want;
 	}
+
 	if (seek < buf->tail[0].iov_len) {
 		want = min_t(size_t, count - offset, buf->tail[0].iov_len);
 		ret = xs_read_kvec(sock, msg, flags, &buf->tail[0], want, seek);
-- 
GitLab


From 26781eab48ece79000ffc4e69be402f2524e1137 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Mon, 3 Dec 2018 15:41:38 -0500
Subject: [PATCH 1803/1890] SUNRPC: Treat EFAULT as a truncated message in
 xs_read_stream_request()

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 net/sunrpc/xprtsock.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index cd85c492c267b..86bb502e538a2 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -437,7 +437,6 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
 	} else
 		offset += buf->tail[0].iov_len;
 	ret = -EMSGSIZE;
-	msg->msg_flags |= MSG_TRUNC;
 out:
 	*read = offset - seek_init;
 	return ret;
@@ -489,7 +488,9 @@ xs_read_stream_request(struct sock_xprt *transport, struct msghdr *msg,
 	switch (ret) {
 	default:
 		break;
+	case -EFAULT:
 	case -EMSGSIZE:
+		msg->msg_flags |= MSG_TRUNC;
 		return read;
 	case 0:
 		return -ESHUTDOWN;
-- 
GitLab


From b76a5afdce6c6dacfbd51863b31b3d7cc61ca21e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Mon, 3 Dec 2018 15:58:58 -0500
Subject: [PATCH 1804/1890] SUNRPC: Use the discard iterator rather than
 MSG_TRUNC

When discarding message data from the stream, we're better off using
the discard iterator, since that will work with non-TCP streams.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 net/sunrpc/xprtsock.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 86bb502e538a2..fc6d129401ba3 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -376,8 +376,8 @@ static ssize_t
 xs_read_discard(struct socket *sock, struct msghdr *msg, int flags,
 		size_t count)
 {
-	struct kvec kvec = { 0 };
-	return xs_read_kvec(sock, msg, flags | MSG_TRUNC, &kvec, count, 0);
+	iov_iter_discard(&msg->msg_iter, READ, count);
+	return sock_recvmsg(sock, msg, flags);
 }
 
 static ssize_t
@@ -616,6 +616,7 @@ xs_read_stream(struct sock_xprt *transport, int flags)
 	if (transport->recv.offset < transport->recv.len) {
 		if (!(msg.msg_flags & MSG_TRUNC))
 			return read;
+		msg.msg_flags = 0;
 		ret = xs_read_discard(transport->sock, &msg, flags,
 				transport->recv.len - transport->recv.offset);
 		if (ret <= 0)
-- 
GitLab


From dfcf0380858b4e760ae02665649d884d1baa50c9 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Tue, 4 Dec 2018 07:50:06 -0500
Subject: [PATCH 1805/1890] SUNRPC: Fix up socket polling

Ensure that we do not exit the socket read callback without clearing
XPRT_SOCK_DATA_READY.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 net/sunrpc/xprtsock.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index fc6d129401ba3..92d08be2384d2 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -649,9 +649,9 @@ static void xs_stream_data_receive(struct sock_xprt *transport)
 	ssize_t ret = 0;
 
 	mutex_lock(&transport->recv_mutex);
+	clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
 	if (transport->sock == NULL)
 		goto out;
-	clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
 	for (;;) {
 		ret = xs_read_stream(transport, MSG_DONTWAIT);
 		if (ret < 0)
@@ -1346,10 +1346,10 @@ static void xs_udp_data_receive(struct sock_xprt *transport)
 	int err;
 
 	mutex_lock(&transport->recv_mutex);
+	clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
 	sk = transport->inet;
 	if (sk == NULL)
 		goto out;
-	clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
 	for (;;) {
 		skb = skb_recv_udp(sk, 0, 1, &err);
 		if (skb == NULL)
-- 
GitLab


From 79462857eb547e5d17fc8445b9768615e02dc1cf Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Mon, 3 Dec 2018 18:49:00 -0500
Subject: [PATCH 1806/1890] SUNRPC: Don't force a redundant disconnection in
 xs_read_stream()

If the connection is broken, then xs_tcp_state_change() will take care
of scheduling the socket close as soon as appropriate. xs_read_stream()
just needs to report the error.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 net/sunrpc/xprtsock.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 92d08be2384d2..8a5e823e0b339 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -634,13 +634,7 @@ xs_read_stream(struct sock_xprt *transport, int flags)
 	transport->recv.len = 0;
 	return read;
 out_err:
-	switch (ret) {
-	case 0:
-	case -ESHUTDOWN:
-		xprt_force_disconnect(&transport->xprt);
-		return -ESHUTDOWN;
-	}
-	return ret;
+	return ret != 0 ? ret : -ESHUTDOWN;
 }
 
 static void xs_stream_data_receive(struct sock_xprt *transport)
-- 
GitLab


From 2fc00c1e0f9d2abe0df74c33cf9f40d12b9b892f Mon Sep 17 00:00:00 2001
From: Chris Chiu <chiu@endlessm.com>
Date: Mon, 3 Dec 2018 14:46:20 +0800
Subject: [PATCH 1807/1890] HID: use macros in IS_INPUT_APPLICATION

Add missing definition for HID_DG_WHITEBOARD then replace the hid
usage hex with macros for better readibility.

Signed-off-by: Chris Chiu <chiu@endlessm.com>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
---
 include/linux/hid.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/include/linux/hid.h b/include/linux/hid.h
index a355d61940f28..ce5f996c8d3d1 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -238,6 +238,7 @@ struct hid_item {
 #define HID_DG_LIGHTPEN		0x000d0003
 #define HID_DG_TOUCHSCREEN	0x000d0004
 #define HID_DG_TOUCHPAD		0x000d0005
+#define HID_DG_WHITEBOARD	0x000d0006
 #define HID_DG_STYLUS		0x000d0020
 #define HID_DG_PUCK		0x000d0021
 #define HID_DG_FINGER		0x000d0022
@@ -836,7 +837,10 @@ static inline bool hid_is_using_ll_driver(struct hid_device *hdev,
 
 /* Applications from HID Usage Tables 4/8/99 Version 1.1 */
 /* We ignore a few input applications that are not widely used */
-#define IS_INPUT_APPLICATION(a) (((a >= 0x00010000) && (a <= 0x00010008)) || (a == 0x00010080) || (a == 0x000c0001) || ((a >= 0x000d0002) && (a <= 0x000d0006)))
+#define IS_INPUT_APPLICATION(a) \
+		(((a >= HID_UP_GENDESK) && (a <= HID_GD_MULTIAXIS)) \
+		|| ((a >= HID_DG_PEN) && (a <= HID_DG_WHITEBOARD)) \
+		|| (a == HID_GD_SYSTEM_CONTROL) || (a == HID_CP_CONSUMER_CONTROL))
 
 /* HID core API */
 
-- 
GitLab


From 7f5592742a429b4de770fc5b796d18de43a15fdc Mon Sep 17 00:00:00 2001
From: Chris Chiu <chiu@endlessm.com>
Date: Mon, 3 Dec 2018 14:46:21 +0800
Subject: [PATCH 1808/1890] HID: input: support Microsoft wireless radio
 control hotkey

The ASUS laptops start to support the airplane mode radio management
to replace the original mechanism of airplane mode toggle hotkey.
On the ASUS P5440FF, it presents as a HID device connecting via
I2C, named i2c-AMPD0001. When pressing it, the Embedded Controller
send hid report via I2C and switch the airplane mode indicator LED
based on the status.

However, it's not working because it fails to be identified as a
hidinput device. It fails in hidinput_connect() due to the macro
IS_INPUT_APPLICATION doesn't have HID_GD_WIRELESS_RADIO_CTLS as
a legit application code.

It's easy to add the HID I2C vendor and product id to the quirk
list and apply HID_QUIRK_HIDINPUT_FORCE to make it work. But it
makes more sense to support it as a generic input application.

Signed-off-by: Chris Chiu <chiu@endlessm.com>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
---
 include/linux/hid.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/linux/hid.h b/include/linux/hid.h
index ce5f996c8d3d1..42079116fb615 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -840,7 +840,8 @@ static inline bool hid_is_using_ll_driver(struct hid_device *hdev,
 #define IS_INPUT_APPLICATION(a) \
 		(((a >= HID_UP_GENDESK) && (a <= HID_GD_MULTIAXIS)) \
 		|| ((a >= HID_DG_PEN) && (a <= HID_DG_WHITEBOARD)) \
-		|| (a == HID_GD_SYSTEM_CONTROL) || (a == HID_CP_CONSUMER_CONTROL))
+		|| (a == HID_GD_SYSTEM_CONTROL) || (a == HID_CP_CONSUMER_CONTROL) \
+		|| (a == HID_GD_WIRELESS_RADIO_CTLS))
 
 /* HID core API */
 
-- 
GitLab


From 13cfb713c8c2035c058af3233c3432165c97ceeb Mon Sep 17 00:00:00 2001
From: Julia Lawall <Julia.Lawall@lip6.fr>
Date: Tue, 30 Oct 2018 16:14:59 +0100
Subject: [PATCH 1809/1890] thermal: armada: constify
 thermal_zone_of_device_ops structure

The thermal_zone_of_device_ops structure can be const as it is only
passed as the last argument of devm_thermal_zone_of_sensor_register
and the corresponding parameter is declared as const.

Done with the help of Coccinelle.

Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Reviewed-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/armada_thermal.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/thermal/armada_thermal.c b/drivers/thermal/armada_thermal.c
index a6d3ee6077f8b..d7105d01859ab 100644
--- a/drivers/thermal/armada_thermal.c
+++ b/drivers/thermal/armada_thermal.c
@@ -395,7 +395,7 @@ static int armada_get_temp(void *_sensor, int *temp)
 	return ret;
 }
 
-static struct thermal_zone_of_device_ops of_ops = {
+static const struct thermal_zone_of_device_ops of_ops = {
 	.get_temp = armada_get_temp,
 };
 
-- 
GitLab


From 1aea7aee805e3d1288f9f1fa4484964b51664960 Mon Sep 17 00:00:00 2001
From: Julia Lawall <Julia.Lawall@lip6.fr>
Date: Tue, 30 Oct 2018 16:15:00 +0100
Subject: [PATCH 1810/1890] thermal: broadcom: constify
 thermal_zone_of_device_ops structure

The thermal_zone_of_device_ops structure can be const as it is only
passed as the last argument of thermal_zone_of_sensor_register
and the corresponding parameter is declared as const.

Done with the help of Coccinelle.

Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Reviewed-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/broadcom/brcmstb_thermal.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/thermal/broadcom/brcmstb_thermal.c b/drivers/thermal/broadcom/brcmstb_thermal.c
index 1919f91fa7565..e8b1570cc3888 100644
--- a/drivers/thermal/broadcom/brcmstb_thermal.c
+++ b/drivers/thermal/broadcom/brcmstb_thermal.c
@@ -299,7 +299,7 @@ static int brcmstb_set_trips(void *data, int low, int high)
 	return 0;
 }
 
-static struct thermal_zone_of_device_ops of_ops = {
+static const struct thermal_zone_of_device_ops of_ops = {
 	.get_temp	= brcmstb_get_temp,
 	.set_trips	= brcmstb_set_trips,
 };
-- 
GitLab


From 33aaebd48ae2d2c78fef5063a0381e17db19b060 Mon Sep 17 00:00:00 2001
From: Chris Chiu <chiu@endlessm.com>
Date: Wed, 5 Dec 2018 14:48:53 +0800
Subject: [PATCH 1811/1890] ALSA: hda/realtek: ALC286 mic and headset-mode
 fixups for Acer Aspire U27-880

Acer Aspire U27-880(AIO) with ALC286 codec can not detect headset mic
and internal mic not working either. It needs the similar quirk like
Sony laptops to fix headphone jack sensing and enables use of the
internal microphone.

Unfortunately jack sensing for the headset mic is still not working.

Signed-off-by: Jian-Hong Pan <jian-hong@endlessm.com>
Signed-off-by: Daniel Drake <drake@endlessm.com>
Signed-off-by: Chris Chiu <chiu@endlessm.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 802f1f1b3a193..0693dbb2c1674 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -5519,6 +5519,7 @@ enum {
 	ALC221_FIXUP_HP_HEADSET_MIC,
 	ALC285_FIXUP_LENOVO_HEADPHONE_NOISE,
 	ALC295_FIXUP_HP_AUTO_MUTE,
+	ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -6396,6 +6397,15 @@ static const struct hda_fixup alc269_fixups[] = {
 		.type = HDA_FIXUP_FUNC,
 		.v.func = alc_fixup_auto_mute_via_amp,
 	},
+	[ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE] = {
+		.type = HDA_FIXUP_PINS,
+		.v.pins = (const struct hda_pintbl[]) {
+			{ 0x18, 0x01a1913c }, /* use as headset mic, without its own jack detect */
+			{ }
+		},
+		.chained = true,
+		.chain_id = ALC269_FIXUP_HEADSET_MIC
+	},
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -7074,6 +7084,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
 		{0x14, 0x90170110},
 		{0x19, 0x04a11040},
 		{0x21, 0x04211020}),
+	SND_HDA_PIN_QUIRK(0x10ec0286, 0x1025, "Acer", ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE,
+		{0x12, 0x90a60130},
+		{0x17, 0x90170110},
+		{0x21, 0x02211020}),
 	SND_HDA_PIN_QUIRK(0x10ec0288, 0x1028, "Dell", ALC288_FIXUP_DELL1_MIC_NO_PRESENCE,
 		{0x12, 0x90a60120},
 		{0x14, 0x90170110},
-- 
GitLab


From 705b65f107470499442240ff7afee5021a7002a6 Mon Sep 17 00:00:00 2001
From: Chris Chiu <chiu@endlessm.com>
Date: Wed, 5 Dec 2018 14:48:54 +0800
Subject: [PATCH 1812/1890] ALSA: hda/realtek - Add support for Acer Aspire
 C24-860 headset mic

The Acer AIO Aspire C24-860 with ALC286 can't detect the headset
microphone. Just like another Acer AIO U27-880, it needs a different
pin value for 0x18 and the headset fixup to make headset mic work.

Signed-off-by: Jian-Hong Pan <jian-hong@endlessm.com>
Signed-off-by: Daniel Drake <drake@endlessm.com>
Signed-off-by: Chris Chiu <chiu@endlessm.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 0693dbb2c1674..91e1487b25e29 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -6420,6 +6420,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1025, 0x0762, "Acer Aspire E1-472", ALC271_FIXUP_HP_GATE_MIC_JACK_E1_572),
 	SND_PCI_QUIRK(0x1025, 0x0775, "Acer Aspire E1-572", ALC271_FIXUP_HP_GATE_MIC_JACK_E1_572),
 	SND_PCI_QUIRK(0x1025, 0x079b, "Acer Aspire V5-573G", ALC282_FIXUP_ASPIRE_V5_PINS),
+	SND_PCI_QUIRK(0x1025, 0x102b, "Acer Aspire C24-860", ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1025, 0x106d, "Acer Cloudbook 14", ALC283_FIXUP_CHROME_BOOK),
 	SND_PCI_QUIRK(0x1028, 0x0470, "Dell M101z", ALC269_FIXUP_DELL_M101Z),
 	SND_PCI_QUIRK(0x1028, 0x054b, "Dell XPS one 2710", ALC275_FIXUP_DELL_XPS),
-- 
GitLab


From 9f8aefed9623a91dec54eab8908f3810b7f8d73a Mon Sep 17 00:00:00 2001
From: Chris Chiu <chiu@endlessm.com>
Date: Wed, 5 Dec 2018 14:48:55 +0800
Subject: [PATCH 1813/1890] ALSA: hda/realtek: Fix mic issue on Acer AIO
 Veriton Z4660G

Acer AIO Veriton Z4660G with ALC286 codec has issue with the input
from external microphones connecting via 'Front Mic' jack. The fixup
ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE enables the jack sensing of
the headset and fix the audio input issue of external microphone.

Signed-off-by: Jian-Hong Pan <jian-hong@endlessm.com>
Signed-off-by: Daniel Drake <drake@endlessm.com>
Signed-off-by: Chris Chiu <chiu@endlessm.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 91e1487b25e29..2a5ecdf261484 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -6422,6 +6422,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1025, 0x079b, "Acer Aspire V5-573G", ALC282_FIXUP_ASPIRE_V5_PINS),
 	SND_PCI_QUIRK(0x1025, 0x102b, "Acer Aspire C24-860", ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1025, 0x106d, "Acer Cloudbook 14", ALC283_FIXUP_CHROME_BOOK),
+	SND_PCI_QUIRK(0x1025, 0x1291, "Acer Veriton Z4660G", ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x0470, "Dell M101z", ALC269_FIXUP_DELL_M101Z),
 	SND_PCI_QUIRK(0x1028, 0x054b, "Dell XPS one 2710", ALC275_FIXUP_DELL_XPS),
 	SND_PCI_QUIRK(0x1028, 0x05bd, "Dell Latitude E6440", ALC292_FIXUP_DELL_E7X),
-- 
GitLab


From b72f936f6b325f4fde06b02e4b6ab682f6f2e73f Mon Sep 17 00:00:00 2001
From: Chris Chiu <chiu@endlessm.com>
Date: Wed, 5 Dec 2018 14:48:56 +0800
Subject: [PATCH 1814/1890] ALSA: hda/realtek: Fix mic issue on Acer AIO
 Veriton Z4860G/Z6860G

Acer AIO Veriton Z4860G/Z6860G with the same ALC286 codec has issues
with the input from external microphone. The issue can be fixed by
the fixup ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE for Veriton Z4660G.

Signed-off-by: Jian-Hong Pan <jian-hong@endlessm.com>
Signed-off-by: Daniel Drake <drake@endlessm.com>
Signed-off-by: Chris Chiu <chiu@endlessm.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 2a5ecdf261484..8d75597028eeb 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -6422,6 +6422,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1025, 0x079b, "Acer Aspire V5-573G", ALC282_FIXUP_ASPIRE_V5_PINS),
 	SND_PCI_QUIRK(0x1025, 0x102b, "Acer Aspire C24-860", ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1025, 0x106d, "Acer Cloudbook 14", ALC283_FIXUP_CHROME_BOOK),
+	SND_PCI_QUIRK(0x1025, 0x128f, "Acer Veriton Z6860G", ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1025, 0x1290, "Acer Veriton Z4860G", ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1025, 0x1291, "Acer Veriton Z4660G", ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x0470, "Dell M101z", ALC269_FIXUP_DELL_M101Z),
 	SND_PCI_QUIRK(0x1028, 0x054b, "Dell XPS one 2710", ALC275_FIXUP_DELL_XPS),
-- 
GitLab


From 0a9b89b2e2e7b6d90f81ddc47e489be1043e01b1 Mon Sep 17 00:00:00 2001
From: James Zhu <James.Zhu@amd.com>
Date: Mon, 3 Dec 2018 22:04:28 -0500
Subject: [PATCH 1815/1890] drm/amdgpu/vcn: Update vcn.cur_state during suspend

Replace vcn_v1_0_stop with vcn_v1_0_set_powergating_state during suspend,
to keep adev->vcn.cur_state update. It will fix VCN S3 hung issue.

Signed-off-by: James Zhu <James.Zhu@amd.com>
Reviewed-by: Leo Liu <leo.liu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index eae90922fdbe0..322e09b5b4489 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -48,6 +48,7 @@ static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev);
 static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev);
 static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev);
 static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr);
+static int vcn_v1_0_set_powergating_state(void *handle, enum amd_powergating_state state);
 
 /**
  * vcn_v1_0_early_init - set function pointers
@@ -222,7 +223,7 @@ static int vcn_v1_0_hw_fini(void *handle)
 	struct amdgpu_ring *ring = &adev->vcn.ring_dec;
 
 	if (RREG32_SOC15(VCN, 0, mmUVD_STATUS))
-		vcn_v1_0_stop(adev);
+		vcn_v1_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
 
 	ring->ready = false;
 
-- 
GitLab


From 704620afc70cf47abb9d6a1a57f3825d2bca49cf Mon Sep 17 00:00:00 2001
From: Mathias Payer <mathias.payer@nebelwelt.net>
Date: Wed, 5 Dec 2018 21:19:59 +0100
Subject: [PATCH 1816/1890] USB: check usb_get_extra_descriptor for proper size

When reading an extra descriptor, we need to properly check the minimum
and maximum size allowed, to prevent from invalid data being sent by a
device.

Reported-by: Hui Peng <benquike@gmail.com>
Reported-by: Mathias Payer <mathias.payer@nebelwelt.net>
Co-developed-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Hui Peng <benquike@gmail.com>
Signed-off-by: Mathias Payer <mathias.payer@nebelwelt.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hub.c    | 2 +-
 drivers/usb/core/usb.c    | 6 +++---
 drivers/usb/host/hwa-hc.c | 2 +-
 include/linux/usb.h       | 4 ++--
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 528664059a126..f76b2e0aba9d5 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -2251,7 +2251,7 @@ static int usb_enumerate_device_otg(struct usb_device *udev)
 		/* descriptor may appear anywhere in config */
 		err = __usb_get_extra_descriptor(udev->rawdescriptors[0],
 				le16_to_cpu(udev->config[0].desc.wTotalLength),
-				USB_DT_OTG, (void **) &desc);
+				USB_DT_OTG, (void **) &desc, sizeof(*desc));
 		if (err || !(desc->bmAttributes & USB_OTG_HNP))
 			return 0;
 
diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index 79d8bd7a612e6..4ebfbd7379051 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -832,14 +832,14 @@ EXPORT_SYMBOL_GPL(usb_get_current_frame_number);
  */
 
 int __usb_get_extra_descriptor(char *buffer, unsigned size,
-			       unsigned char type, void **ptr)
+			       unsigned char type, void **ptr, size_t minsize)
 {
 	struct usb_descriptor_header *header;
 
 	while (size >= sizeof(struct usb_descriptor_header)) {
 		header = (struct usb_descriptor_header *)buffer;
 
-		if (header->bLength < 2) {
+		if (header->bLength < 2 || header->bLength > size) {
 			printk(KERN_ERR
 				"%s: bogus descriptor, type %d length %d\n",
 				usbcore_name,
@@ -848,7 +848,7 @@ int __usb_get_extra_descriptor(char *buffer, unsigned size,
 			return -1;
 		}
 
-		if (header->bDescriptorType == type) {
+		if (header->bDescriptorType == type && header->bLength >= minsize) {
 			*ptr = header;
 			return 0;
 		}
diff --git a/drivers/usb/host/hwa-hc.c b/drivers/usb/host/hwa-hc.c
index 684d6f074c3a4..09a8ebd955888 100644
--- a/drivers/usb/host/hwa-hc.c
+++ b/drivers/usb/host/hwa-hc.c
@@ -640,7 +640,7 @@ static int hwahc_security_create(struct hwahc *hwahc)
 	top = itr + itr_size;
 	result = __usb_get_extra_descriptor(usb_dev->rawdescriptors[index],
 			le16_to_cpu(usb_dev->actconfig->desc.wTotalLength),
-			USB_DT_SECURITY, (void **) &secd);
+			USB_DT_SECURITY, (void **) &secd, sizeof(*secd));
 	if (result == -1) {
 		dev_warn(dev, "BUG? WUSB host has no security descriptors\n");
 		return 0;
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 4cdd515a4385f..5e49e82c43684 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -407,11 +407,11 @@ struct usb_host_bos {
 };
 
 int __usb_get_extra_descriptor(char *buffer, unsigned size,
-	unsigned char type, void **ptr);
+	unsigned char type, void **ptr, size_t min);
 #define usb_get_extra_descriptor(ifpoint, type, ptr) \
 				__usb_get_extra_descriptor((ifpoint)->extra, \
 				(ifpoint)->extralen, \
-				type, (void **)ptr)
+				type, (void **)ptr, sizeof(**(ptr)))
 
 /* ----------------------------------------------------------------------- */
 
-- 
GitLab


From e3f5df762d4a6ef6326c3c09bc9f89ea8a2eab2c Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 5 Dec 2018 14:11:48 -0800
Subject: [PATCH 1817/1890] tools/testing/nvdimm: Align test resources to 128M

In preparation for libnvdimm growing new restrictions to detect section
conflicts between persistent memory regions, enable nfit_test to
allocate aligned resources. Use a gen_pool to allocate nfit_test's fake
resources in a separate address space from the virtual translation of
the same.

Reviewed-by: Vishal Verma <vishal.l.verma@intel.com>
Tested-by: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 tools/testing/nvdimm/test/nfit.c | 35 ++++++++++++++++++++++++++++++--
 1 file changed, 33 insertions(+), 2 deletions(-)

diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 01ec04bf91b59..6c16ac36d482c 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -15,6 +15,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/workqueue.h>
 #include <linux/libnvdimm.h>
+#include <linux/genalloc.h>
 #include <linux/vmalloc.h>
 #include <linux/device.h>
 #include <linux/module.h>
@@ -215,6 +216,8 @@ struct nfit_test {
 
 static struct workqueue_struct *nfit_wq;
 
+static struct gen_pool *nfit_pool;
+
 static struct nfit_test *to_nfit_test(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
@@ -1132,6 +1135,9 @@ static void release_nfit_res(void *data)
 	list_del(&nfit_res->list);
 	spin_unlock(&nfit_test_lock);
 
+	if (resource_size(&nfit_res->res) >= DIMM_SIZE)
+		gen_pool_free(nfit_pool, nfit_res->res.start,
+				resource_size(&nfit_res->res));
 	vfree(nfit_res->buf);
 	kfree(nfit_res);
 }
@@ -1144,7 +1150,7 @@ static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma,
 			GFP_KERNEL);
 	int rc;
 
-	if (!buf || !nfit_res)
+	if (!buf || !nfit_res || !*dma)
 		goto err;
 	rc = devm_add_action(dev, release_nfit_res, nfit_res);
 	if (rc)
@@ -1164,6 +1170,8 @@ static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma,
 
 	return nfit_res->buf;
  err:
+	if (*dma && size >= DIMM_SIZE)
+		gen_pool_free(nfit_pool, *dma, size);
 	if (buf)
 		vfree(buf);
 	kfree(nfit_res);
@@ -1172,9 +1180,16 @@ static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma,
 
 static void *test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma)
 {
+	struct genpool_data_align data = {
+		.align = SZ_128M,
+	};
 	void *buf = vmalloc(size);
 
-	*dma = (unsigned long) buf;
+	if (size >= DIMM_SIZE)
+		*dma = gen_pool_alloc_algo(nfit_pool, size,
+				gen_pool_first_fit_align, &data);
+	else
+		*dma = (unsigned long) buf;
 	return __test_alloc(t, size, dma, buf);
 }
 
@@ -2839,6 +2854,17 @@ static __init int nfit_test_init(void)
 		goto err_register;
 	}
 
+	nfit_pool = gen_pool_create(ilog2(SZ_4M), NUMA_NO_NODE);
+	if (!nfit_pool) {
+		rc = -ENOMEM;
+		goto err_register;
+	}
+
+	if (gen_pool_add(nfit_pool, SZ_4G, SZ_4G, NUMA_NO_NODE)) {
+		rc = -ENOMEM;
+		goto err_register;
+	}
+
 	for (i = 0; i < NUM_NFITS; i++) {
 		struct nfit_test *nfit_test;
 		struct platform_device *pdev;
@@ -2894,6 +2920,9 @@ static __init int nfit_test_init(void)
 	return 0;
 
  err_register:
+	if (nfit_pool)
+		gen_pool_destroy(nfit_pool);
+
 	destroy_workqueue(nfit_wq);
 	for (i = 0; i < NUM_NFITS; i++)
 		if (instances[i])
@@ -2917,6 +2946,8 @@ static __exit void nfit_test_exit(void)
 	platform_driver_unregister(&nfit_test_driver);
 	nfit_test_teardown();
 
+	gen_pool_destroy(nfit_pool);
+
 	for (i = 0; i < NUM_NFITS; i++)
 		put_device(&instances[i]->pdev.dev);
 	class_destroy(nfit_test_dimm);
-- 
GitLab


From ae86cbfef3818300f1972e52f67a93211acb0e24 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sat, 24 Nov 2018 10:47:04 -0800
Subject: [PATCH 1818/1890] libnvdimm, pfn: Pad pfn namespaces relative to
 other regions

Commit cfe30b872058 "libnvdimm, pmem: adjust for section collisions with
'System RAM'" enabled Linux to workaround occasions where platform
firmware arranges for "System RAM" and "Persistent Memory" to collide
within a single section boundary. Unfortunately, as reported in this
issue [1], platform firmware can inflict the same collision between
persistent memory regions.

The approach of interrogating iomem_resource does not work in this
case because platform firmware may merge multiple regions into a single
iomem_resource range. Instead provide a method to interrogate regions
that share the same parent bus.

This is a stop-gap until the core-MM can grow support for hotplug on
sub-section boundaries.

[1]: https://github.com/pmem/ndctl/issues/76

Fixes: cfe30b872058 ("libnvdimm, pmem: adjust for section collisions with...")
Cc: <stable@vger.kernel.org>
Reported-by: Patrick Geary <patrickg@supermicro.com>
Tested-by: Patrick Geary <patrickg@supermicro.com>
Reviewed-by: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/nd-core.h     |  2 ++
 drivers/nvdimm/pfn_devs.c    | 64 +++++++++++++++++++++---------------
 drivers/nvdimm/region_devs.c | 41 +++++++++++++++++++++++
 3 files changed, 80 insertions(+), 27 deletions(-)

diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h
index 182258f64417b..d0c621b32f723 100644
--- a/drivers/nvdimm/nd-core.h
+++ b/drivers/nvdimm/nd-core.h
@@ -111,6 +111,8 @@ resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
 		struct nd_mapping *nd_mapping, resource_size_t *overlap);
 resource_size_t nd_blk_available_dpa(struct nd_region *nd_region);
 resource_size_t nd_region_available_dpa(struct nd_region *nd_region);
+int nd_region_conflict(struct nd_region *nd_region, resource_size_t start,
+		resource_size_t size);
 resource_size_t nvdimm_allocated_dpa(struct nvdimm_drvdata *ndd,
 		struct nd_label_id *label_id);
 int alias_dpa_busy(struct device *dev, void *data);
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index 24c64090169e6..6f22272e8d801 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -649,14 +649,47 @@ static u64 phys_pmem_align_down(struct nd_pfn *nd_pfn, u64 phys)
 			ALIGN_DOWN(phys, nd_pfn->align));
 }
 
+/*
+ * Check if pmem collides with 'System RAM', or other regions when
+ * section aligned.  Trim it accordingly.
+ */
+static void trim_pfn_device(struct nd_pfn *nd_pfn, u32 *start_pad, u32 *end_trunc)
+{
+	struct nd_namespace_common *ndns = nd_pfn->ndns;
+	struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
+	struct nd_region *nd_region = to_nd_region(nd_pfn->dev.parent);
+	const resource_size_t start = nsio->res.start;
+	const resource_size_t end = start + resource_size(&nsio->res);
+	resource_size_t adjust, size;
+
+	*start_pad = 0;
+	*end_trunc = 0;
+
+	adjust = start - PHYS_SECTION_ALIGN_DOWN(start);
+	size = resource_size(&nsio->res) + adjust;
+	if (region_intersects(start - adjust, size, IORESOURCE_SYSTEM_RAM,
+				IORES_DESC_NONE) == REGION_MIXED
+			|| nd_region_conflict(nd_region, start - adjust, size))
+		*start_pad = PHYS_SECTION_ALIGN_UP(start) - start;
+
+	/* Now check that end of the range does not collide. */
+	adjust = PHYS_SECTION_ALIGN_UP(end) - end;
+	size = resource_size(&nsio->res) + adjust;
+	if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM,
+				IORES_DESC_NONE) == REGION_MIXED
+			|| !IS_ALIGNED(end, nd_pfn->align)
+			|| nd_region_conflict(nd_region, start, size + adjust))
+		*end_trunc = end - phys_pmem_align_down(nd_pfn, end);
+}
+
 static int nd_pfn_init(struct nd_pfn *nd_pfn)
 {
 	u32 dax_label_reserve = is_nd_dax(&nd_pfn->dev) ? SZ_128K : 0;
 	struct nd_namespace_common *ndns = nd_pfn->ndns;
-	u32 start_pad = 0, end_trunc = 0;
+	struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
 	resource_size_t start, size;
-	struct nd_namespace_io *nsio;
 	struct nd_region *nd_region;
+	u32 start_pad, end_trunc;
 	struct nd_pfn_sb *pfn_sb;
 	unsigned long npfns;
 	phys_addr_t offset;
@@ -688,30 +721,7 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
 
 	memset(pfn_sb, 0, sizeof(*pfn_sb));
 
-	/*
-	 * Check if pmem collides with 'System RAM' when section aligned and
-	 * trim it accordingly
-	 */
-	nsio = to_nd_namespace_io(&ndns->dev);
-	start = PHYS_SECTION_ALIGN_DOWN(nsio->res.start);
-	size = resource_size(&nsio->res);
-	if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM,
-				IORES_DESC_NONE) == REGION_MIXED) {
-		start = nsio->res.start;
-		start_pad = PHYS_SECTION_ALIGN_UP(start) - start;
-	}
-
-	start = nsio->res.start;
-	size = PHYS_SECTION_ALIGN_UP(start + size) - start;
-	if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM,
-				IORES_DESC_NONE) == REGION_MIXED
-			|| !IS_ALIGNED(start + resource_size(&nsio->res),
-				nd_pfn->align)) {
-		size = resource_size(&nsio->res);
-		end_trunc = start + size - phys_pmem_align_down(nd_pfn,
-				start + size);
-	}
-
+	trim_pfn_device(nd_pfn, &start_pad, &end_trunc);
 	if (start_pad + end_trunc)
 		dev_info(&nd_pfn->dev, "%s alignment collision, truncate %d bytes\n",
 				dev_name(&ndns->dev), start_pad + end_trunc);
@@ -722,7 +732,7 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
 	 * implementation will limit the pfns advertised through
 	 * ->direct_access() to those that are included in the memmap.
 	 */
-	start += start_pad;
+	start = nsio->res.start + start_pad;
 	size = resource_size(&nsio->res);
 	npfns = PFN_SECTION_ALIGN_UP((size - start_pad - end_trunc - SZ_8K)
 			/ PAGE_SIZE);
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index 174a418cb1715..e7377f1028ef6 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -1184,6 +1184,47 @@ int nvdimm_has_cache(struct nd_region *nd_region)
 }
 EXPORT_SYMBOL_GPL(nvdimm_has_cache);
 
+struct conflict_context {
+	struct nd_region *nd_region;
+	resource_size_t start, size;
+};
+
+static int region_conflict(struct device *dev, void *data)
+{
+	struct nd_region *nd_region;
+	struct conflict_context *ctx = data;
+	resource_size_t res_end, region_end, region_start;
+
+	if (!is_memory(dev))
+		return 0;
+
+	nd_region = to_nd_region(dev);
+	if (nd_region == ctx->nd_region)
+		return 0;
+
+	res_end = ctx->start + ctx->size;
+	region_start = nd_region->ndr_start;
+	region_end = region_start + nd_region->ndr_size;
+	if (ctx->start >= region_start && ctx->start < region_end)
+		return -EBUSY;
+	if (res_end > region_start && res_end <= region_end)
+		return -EBUSY;
+	return 0;
+}
+
+int nd_region_conflict(struct nd_region *nd_region, resource_size_t start,
+		resource_size_t size)
+{
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev);
+	struct conflict_context ctx = {
+		.nd_region = nd_region,
+		.start = start,
+		.size = size,
+	};
+
+	return device_for_each_child(&nvdimm_bus->dev, &ctx, region_conflict);
+}
+
 void __exit nd_region_devs_exit(void)
 {
 	ida_destroy(&region_ida);
-- 
GitLab


From b5fd2e00a60248902315fb32210550ac3cb9f44c Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 3 Dec 2018 10:30:25 -0800
Subject: [PATCH 1819/1890] acpi/nfit: Fix user-initiated ARS to be "ARS-long"
 rather than "ARS-short"

A "short" ARS (address range scrub) instructs the platform firmware to
return known errors. In contrast, a "long" ARS instructs platform
firmware to arrange every data address on the DIMM to be read / checked
for poisoned data.

The conversion of the flags in commit d3abaf43bab8 "acpi, nfit: Fix
Address Range Scrub completion tracking", changed the meaning of passing
'0' to acpi_nfit_ars_rescan(). Previously '0' meant "not short", now '0'
is ARS_REQ_SHORT. Pass ARS_REQ_LONG to restore the expected scrub-type
behavior of user-initiated ARS sessions.

Fixes: d3abaf43bab8 ("acpi, nfit: Fix Address Range Scrub completion tracking")
Reported-by: Jacek Zloch <jacek.zloch@intel.com>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/nfit/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 14d9f5bea0151..5912d30020c71 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -1308,7 +1308,7 @@ static ssize_t scrub_store(struct device *dev,
 	if (nd_desc) {
 		struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
 
-		rc = acpi_nfit_ars_rescan(acpi_desc, 0);
+		rc = acpi_nfit_ars_rescan(acpi_desc, ARS_REQ_LONG);
 	}
 	device_unlock(dev);
 	if (rc)
-- 
GitLab


From 2f0799a0ffc033bf3cc82d5032acc3ec633464c2 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Wed, 5 Dec 2018 15:45:54 -0800
Subject: [PATCH 1820/1890] mm, thp: restore node-local hugepage allocations

This is a full revert of ac5b2c18911f ("mm: thp: relax __GFP_THISNODE for
MADV_HUGEPAGE mappings") and a partial revert of 89c83fb539f9 ("mm, thp:
consolidate THP gfp handling into alloc_hugepage_direct_gfpmask").

By not setting __GFP_THISNODE, applications can allocate remote hugepages
when the local node is fragmented or low on memory when either the thp
defrag setting is "always" or the vma has been madvised with
MADV_HUGEPAGE.

Remote access to hugepages often has much higher latency than local pages
of the native page size.  On Haswell, ac5b2c18911f was shown to have a
13.9% access regression after this commit for binaries that remap their
text segment to be backed by transparent hugepages.

The intent of ac5b2c18911f is to address an issue where a local node is
low on memory or fragmented such that a hugepage cannot be allocated.  In
every scenario where this was described as a fix, there is abundant and
unfragmented remote memory available to allocate from, even with a greater
access latency.

If remote memory is also low or fragmented, not setting __GFP_THISNODE was
also measured on Haswell to have a 40% regression in allocation latency.

Restore __GFP_THISNODE for thp allocations.

Fixes: ac5b2c18911f ("mm: thp: relax __GFP_THISNODE for MADV_HUGEPAGE mappings")
Fixes: 89c83fb539f9 ("mm, thp: consolidate THP gfp handling into alloc_hugepage_direct_gfpmask")
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mempolicy.h |  2 --
 mm/huge_memory.c          | 42 +++++++++++++++------------------------
 mm/mempolicy.c            |  2 +-
 3 files changed, 17 insertions(+), 29 deletions(-)

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index bac395f1d00a0..5228c62af4165 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -139,8 +139,6 @@ struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,
 struct mempolicy *get_task_policy(struct task_struct *p);
 struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
 		unsigned long addr);
-struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
-						unsigned long addr);
 bool vma_policy_mof(struct vm_area_struct *vma);
 
 extern void numa_default_policy(void);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 622cced74fd90..f2d19e4fe854f 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -632,37 +632,27 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
 static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma, unsigned long addr)
 {
 	const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE);
-	gfp_t this_node = 0;
-
-#ifdef CONFIG_NUMA
-	struct mempolicy *pol;
-	/*
-	 * __GFP_THISNODE is used only when __GFP_DIRECT_RECLAIM is not
-	 * specified, to express a general desire to stay on the current
-	 * node for optimistic allocation attempts. If the defrag mode
-	 * and/or madvise hint requires the direct reclaim then we prefer
-	 * to fallback to other node rather than node reclaim because that
-	 * can lead to excessive reclaim even though there is free memory
-	 * on other nodes. We expect that NUMA preferences are specified
-	 * by memory policies.
-	 */
-	pol = get_vma_policy(vma, addr);
-	if (pol->mode != MPOL_BIND)
-		this_node = __GFP_THISNODE;
-	mpol_cond_put(pol);
-#endif
+	const gfp_t gfp_mask = GFP_TRANSHUGE_LIGHT | __GFP_THISNODE;
 
+	/* Always do synchronous compaction */
 	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
-		return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY);
+		return GFP_TRANSHUGE | __GFP_THISNODE |
+		       (vma_madvised ? 0 : __GFP_NORETRY);
+
+	/* Kick kcompactd and fail quickly */
 	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags))
-		return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM | this_node;
+		return gfp_mask | __GFP_KSWAPD_RECLAIM;
+
+	/* Synchronous compaction if madvised, otherwise kick kcompactd */
 	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags))
-		return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM :
-							     __GFP_KSWAPD_RECLAIM | this_node);
+		return gfp_mask | (vma_madvised ? __GFP_DIRECT_RECLAIM :
+						  __GFP_KSWAPD_RECLAIM);
+
+	/* Only do synchronous compaction if madvised */
 	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags))
-		return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM :
-							     this_node);
-	return GFP_TRANSHUGE_LIGHT | this_node;
+		return gfp_mask | (vma_madvised ? __GFP_DIRECT_RECLAIM : 0);
+
+	return gfp_mask;
 }
 
 /* Caller must hold page table lock. */
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 5837a067124d8..69e278b469efd 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1662,7 +1662,7 @@ struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
  * freeing by another task.  It is the caller's responsibility to free the
  * extra reference for shared policies.
  */
-struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
+static struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
 						unsigned long addr)
 {
 	struct mempolicy *pol = __get_vma_policy(vma, addr);
-- 
GitLab


From 22f6bbb7bcfcef0b373b0502a7ff390275c575dd Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Tue, 4 Dec 2018 17:37:57 +0000
Subject: [PATCH 1821/1890] net: use skb_list_del_init() to remove from RX
 sublists

list_del() leaves the skb->next pointer poisoned, which can then lead to
 a crash in e.g. OVS forwarding.  For example, setting up an OVS VXLAN
 forwarding bridge on sfc as per:

========
$ ovs-vsctl show
5dfd9c47-f04b-4aaa-aa96-4fbb0a522a30
    Bridge "br0"
        Port "br0"
            Interface "br0"
                type: internal
        Port "enp6s0f0"
            Interface "enp6s0f0"
        Port "vxlan0"
            Interface "vxlan0"
                type: vxlan
                options: {key="1", local_ip="10.0.0.5", remote_ip="10.0.0.4"}
    ovs_version: "2.5.0"
========
(where 10.0.0.5 is an address on enp6s0f1)
and sending traffic across it will lead to the following panic:
========
general protection fault: 0000 [#1] SMP PTI
CPU: 5 PID: 0 Comm: swapper/5 Not tainted 4.20.0-rc3-ehc+ #701
Hardware name: Dell Inc. PowerEdge R710/0M233H, BIOS 6.4.0 07/23/2013
RIP: 0010:dev_hard_start_xmit+0x38/0x200
Code: 53 48 89 fb 48 83 ec 20 48 85 ff 48 89 54 24 08 48 89 4c 24 18 0f 84 ab 01 00 00 48 8d 86 90 00 00 00 48 89 f5 48 89 44 24 10 <4c> 8b 33 48 c7 03 00 00 00 00 48 8b 05 c7 d1 b3 00 4d 85 f6 0f 95
RSP: 0018:ffff888627b437e0 EFLAGS: 00010202
RAX: 0000000000000000 RBX: dead000000000100 RCX: ffff88862279c000
RDX: ffff888614a342c0 RSI: 0000000000000000 RDI: 0000000000000000
RBP: ffff888618a88000 R08: 0000000000000001 R09: 00000000000003e8
R10: 0000000000000000 R11: ffff888614a34140 R12: 0000000000000000
R13: 0000000000000062 R14: dead000000000100 R15: ffff888616430000
FS:  0000000000000000(0000) GS:ffff888627b40000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f6d2bc6d000 CR3: 000000000200a000 CR4: 00000000000006e0
Call Trace:
 <IRQ>
 __dev_queue_xmit+0x623/0x870
 ? masked_flow_lookup+0xf7/0x220 [openvswitch]
 ? ep_poll_callback+0x101/0x310
 do_execute_actions+0xaba/0xaf0 [openvswitch]
 ? __wake_up_common+0x8a/0x150
 ? __wake_up_common_lock+0x87/0xc0
 ? queue_userspace_packet+0x31c/0x5b0 [openvswitch]
 ovs_execute_actions+0x47/0x120 [openvswitch]
 ovs_dp_process_packet+0x7d/0x110 [openvswitch]
 ovs_vport_receive+0x6e/0xd0 [openvswitch]
 ? dst_alloc+0x64/0x90
 ? rt_dst_alloc+0x50/0xd0
 ? ip_route_input_slow+0x19a/0x9a0
 ? __udp_enqueue_schedule_skb+0x198/0x1b0
 ? __udp4_lib_rcv+0x856/0xa30
 ? __udp4_lib_rcv+0x856/0xa30
 ? cpumask_next_and+0x19/0x20
 ? find_busiest_group+0x12d/0xcd0
 netdev_frame_hook+0xce/0x150 [openvswitch]
 __netif_receive_skb_core+0x205/0xae0
 __netif_receive_skb_list_core+0x11e/0x220
 netif_receive_skb_list+0x203/0x460
 ? __efx_rx_packet+0x335/0x5e0 [sfc]
 efx_poll+0x182/0x320 [sfc]
 net_rx_action+0x294/0x3c0
 __do_softirq+0xca/0x297
 irq_exit+0xa6/0xb0
 do_IRQ+0x54/0xd0
 common_interrupt+0xf/0xf
 </IRQ>
========
So, in all listified-receive handling, instead pull skbs off the lists with
 skb_list_del_init().

Fixes: 9af86f933894 ("net: core: fix use-after-free in __netif_receive_skb_list_core")
Fixes: 7da517a3bc52 ("net: core: Another step of skb receive list processing")
Fixes: a4ca8b7df73c ("net: ipv4: fix drop handling in ip_list_rcv() and ip_list_rcv_finish()")
Fixes: d8269e2cbf90 ("net: ipv6: listify ipv6_rcv() and ip6_rcv_finish()")
Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c       | 8 ++++----
 net/ipv4/ip_input.c  | 4 ++--
 net/ipv6/ip6_input.c | 4 ++--
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index e06223b65674e..722d50dbf8a45 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5014,7 +5014,7 @@ static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemallo
 		struct net_device *orig_dev = skb->dev;
 		struct packet_type *pt_prev = NULL;
 
-		list_del(&skb->list);
+		skb_list_del_init(skb);
 		__netif_receive_skb_core(skb, pfmemalloc, &pt_prev);
 		if (!pt_prev)
 			continue;
@@ -5170,7 +5170,7 @@ static void netif_receive_skb_list_internal(struct list_head *head)
 	INIT_LIST_HEAD(&sublist);
 	list_for_each_entry_safe(skb, next, head, list) {
 		net_timestamp_check(netdev_tstamp_prequeue, skb);
-		list_del(&skb->list);
+		skb_list_del_init(skb);
 		if (!skb_defer_rx_timestamp(skb))
 			list_add_tail(&skb->list, &sublist);
 	}
@@ -5181,7 +5181,7 @@ static void netif_receive_skb_list_internal(struct list_head *head)
 		rcu_read_lock();
 		list_for_each_entry_safe(skb, next, head, list) {
 			xdp_prog = rcu_dereference(skb->dev->xdp_prog);
-			list_del(&skb->list);
+			skb_list_del_init(skb);
 			if (do_xdp_generic(xdp_prog, skb) == XDP_PASS)
 				list_add_tail(&skb->list, &sublist);
 		}
@@ -5200,7 +5200,7 @@ static void netif_receive_skb_list_internal(struct list_head *head)
 
 			if (cpu >= 0) {
 				/* Will be handled, remove from list */
-				list_del(&skb->list);
+				skb_list_del_init(skb);
 				enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
 			}
 		}
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 35a786c0aaa06..e609b08c9df4f 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -547,7 +547,7 @@ static void ip_list_rcv_finish(struct net *net, struct sock *sk,
 	list_for_each_entry_safe(skb, next, head, list) {
 		struct dst_entry *dst;
 
-		list_del(&skb->list);
+		skb_list_del_init(skb);
 		/* if ingress device is enslaved to an L3 master device pass the
 		 * skb to its handler for processing
 		 */
@@ -594,7 +594,7 @@ void ip_list_rcv(struct list_head *head, struct packet_type *pt,
 		struct net_device *dev = skb->dev;
 		struct net *net = dev_net(dev);
 
-		list_del(&skb->list);
+		skb_list_del_init(skb);
 		skb = ip_rcv_core(skb, net);
 		if (skb == NULL)
 			continue;
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 96577e742afd4..c1d85830c906f 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -95,7 +95,7 @@ static void ip6_list_rcv_finish(struct net *net, struct sock *sk,
 	list_for_each_entry_safe(skb, next, head, list) {
 		struct dst_entry *dst;
 
-		list_del(&skb->list);
+		skb_list_del_init(skb);
 		/* if ingress device is enslaved to an L3 master device pass the
 		 * skb to its handler for processing
 		 */
@@ -296,7 +296,7 @@ void ipv6_list_rcv(struct list_head *head, struct packet_type *pt,
 		struct net_device *dev = skb->dev;
 		struct net *net = dev_net(dev);
 
-		list_del(&skb->list);
+		skb_list_del_init(skb);
 		skb = ip6_rcv_core(skb, dev, net);
 		if (skb == NULL)
 			continue;
-- 
GitLab


From 41727549de3e7281feb174d568c6e46823db8684 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 5 Dec 2018 14:24:31 -0800
Subject: [PATCH 1822/1890] tcp: Do not underestimate rwnd_limited

If available rwnd is too small, tcp_tso_should_defer()
can decide it is worth waiting before splitting a TSO packet.

This really means we are rwnd limited.

Fixes: 5615f88614a4 ("tcp: instrument how long TCP is limited by receive window")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Reviewed-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 68b5326f73212..3186902347584 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2356,8 +2356,11 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 		} else {
 			if (!push_one &&
 			    tcp_tso_should_defer(sk, skb, &is_cwnd_limited,
-						 max_segs))
+						 max_segs)) {
+				if (!is_cwnd_limited)
+					is_rwnd_limited = true;
 				break;
+			}
 		}
 
 		limit = mss_now;
-- 
GitLab


From b2b7af861122a0c0f6260155c29a1b2e594cd5b5 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Wed, 5 Dec 2018 14:38:38 -0800
Subject: [PATCH 1823/1890] tcp: fix NULL ref in tail loss probe

TCP loss probe timer may fire when the retranmission queue is empty but
has a non-zero tp->packets_out counter. tcp_send_loss_probe will call
tcp_rearm_rto which triggers NULL pointer reference by fetching the
retranmission queue head in its sub-routines.

Add a more detailed warning to help catch the root cause of the inflight
accounting inconsistency.

Reported-by: Rafael Tinoco <rafael.tinoco@linaro.org>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 3186902347584..5aa6009006956 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2497,15 +2497,18 @@ void tcp_send_loss_probe(struct sock *sk)
 		goto rearm_timer;
 	}
 	skb = skb_rb_last(&sk->tcp_rtx_queue);
+	if (unlikely(!skb)) {
+		WARN_ONCE(tp->packets_out,
+			  "invalid inflight: %u state %u cwnd %u mss %d\n",
+			  tp->packets_out, sk->sk_state, tp->snd_cwnd, mss);
+		inet_csk(sk)->icsk_pending = 0;
+		return;
+	}
 
 	/* At most one outstanding TLP retransmission. */
 	if (tp->tlp_high_seq)
 		goto rearm_timer;
 
-	/* Retransmit last segment. */
-	if (WARN_ON(!skb))
-		goto rearm_timer;
-
 	if (skb_still_in_host_queue(sk, skb))
 		goto rearm_timer;
 
-- 
GitLab


From 1e434b703248580b7aaaf8a115d93e682f57d29f Mon Sep 17 00:00:00 2001
From: Anson Huang <anson.huang@nxp.com>
Date: Tue, 4 Dec 2018 03:17:45 +0000
Subject: [PATCH 1824/1890] ARM: imx: update the cpu power up timing setting on
 i.mx6sx

The sw2iso count should cover ARM LDO ramp-up time,
the MAX ARM LDO ramp-up time may be up to more than
100us on some boards, this patch sets sw2iso to 0xf
(~384us) which is the reset value, and it is much
more safe to cover different boards, since we have
observed that some customer boards failed with current
setting of 0x2.

Fixes: 05136f0897b5 ("ARM: imx: support arm power off in cpuidle for i.mx6sx")
Signed-off-by: Anson Huang <Anson.Huang@nxp.com>
Reviewed-by: Fabio Estevam <festevam@gmail.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm/mach-imx/cpuidle-imx6sx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/mach-imx/cpuidle-imx6sx.c b/arch/arm/mach-imx/cpuidle-imx6sx.c
index 243a108a940b4..fd0053e47a151 100644
--- a/arch/arm/mach-imx/cpuidle-imx6sx.c
+++ b/arch/arm/mach-imx/cpuidle-imx6sx.c
@@ -110,7 +110,7 @@ int __init imx6sx_cpuidle_init(void)
 	 * except for power up sw2iso which need to be
 	 * larger than LDO ramp up time.
 	 */
-	imx_gpc_set_arm_power_up_timing(2, 1);
+	imx_gpc_set_arm_power_up_timing(0xf, 1);
 	imx_gpc_set_arm_power_down_timing(1, 1);
 
 	return cpuidle_register(&imx6sx_cpuidle_driver, NULL);
-- 
GitLab


From e9c7d656610e3fff7c96b199d418add2528aea6e Mon Sep 17 00:00:00 2001
From: Anders Roxell <anders.roxell@linaro.org>
Date: Fri, 30 Nov 2018 16:08:59 +0100
Subject: [PATCH 1825/1890] stackleak: Mark stackleak_track_stack() as notrace

Function graph tracing recurses into itself when stackleak is enabled,
causing the ftrace graph selftest to run for up to 90 seconds and
trigger the softlockup watchdog.

Breakpoint 2, ftrace_graph_caller () at ../arch/arm64/kernel/entry-ftrace.S:200
200             mcount_get_lr_addr        x0    //     pointer to function's saved lr
(gdb) bt
\#0  ftrace_graph_caller () at ../arch/arm64/kernel/entry-ftrace.S:200
\#1  0xffffff80081d5280 in ftrace_caller () at ../arch/arm64/kernel/entry-ftrace.S:153
\#2  0xffffff8008555484 in stackleak_track_stack () at ../kernel/stackleak.c:106
\#3  0xffffff8008421ff8 in ftrace_ops_test (ops=0xffffff8009eaa840 <graph_ops>, ip=18446743524091297036, regs=<optimized out>) at ../kernel/trace/ftrace.c:1507
\#4  0xffffff8008428770 in __ftrace_ops_list_func (regs=<optimized out>, ignored=<optimized out>, parent_ip=<optimized out>, ip=<optimized out>) at ../kernel/trace/ftrace.c:6286
\#5  ftrace_ops_no_ops (ip=18446743524091297036, parent_ip=18446743524091242824) at ../kernel/trace/ftrace.c:6321
\#6  0xffffff80081d5280 in ftrace_caller () at ../arch/arm64/kernel/entry-ftrace.S:153
\#7  0xffffff800832fd10 in irq_find_mapping (domain=0xffffffc03fc4bc80, hwirq=27) at ../kernel/irq/irqdomain.c:876
\#8  0xffffff800832294c in __handle_domain_irq (domain=0xffffffc03fc4bc80, hwirq=27, lookup=true, regs=0xffffff800814b840) at ../kernel/irq/irqdesc.c:650
\#9  0xffffff80081d52b4 in ftrace_graph_caller () at ../arch/arm64/kernel/entry-ftrace.S:205

Rework so we mark stackleak_track_stack as notrace

Co-developed-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 kernel/stackleak.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/stackleak.c b/kernel/stackleak.c
index 08cb57eed3893..b193a59fc05b0 100644
--- a/kernel/stackleak.c
+++ b/kernel/stackleak.c
@@ -104,7 +104,7 @@ asmlinkage void notrace stackleak_erase(void)
 }
 NOKPROBE_SYMBOL(stackleak_erase);
 
-void __used stackleak_track_stack(void)
+void __used notrace stackleak_track_stack(void)
 {
 	/*
 	 * N.B. stackleak_erase() fills the kernel stack with the poison value,
-- 
GitLab


From 1aed58e67a6ec1e7a18bfabe8ba6ec2d27c15636 Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Date: Wed, 5 Dec 2018 09:04:23 +0530
Subject: [PATCH 1826/1890] Uprobes: Fix kernel oops with
 delayed_uprobe_remove()

There could be a race between task exit and probe unregister:

  exit_mm()
  mmput()
  __mmput()                     uprobe_unregister()
  uprobe_clear_state()          put_uprobe()
  delayed_uprobe_remove()       delayed_uprobe_remove()

put_uprobe() is calling delayed_uprobe_remove() without taking
delayed_uprobe_lock and thus the race sometimes results in a
kernel crash. Fix this by taking delayed_uprobe_lock before
calling delayed_uprobe_remove() from put_uprobe().

Detailed crash log can be found at:
  Link: http://lkml.kernel.org/r/000000000000140c370577db5ece@google.com

Link: http://lkml.kernel.org/r/20181205033423.26242-1-ravi.bangoria@linux.ibm.com

Acked-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Reported-by: syzbot+cb1fb754b771caca0a88@syzkaller.appspotmail.com
Fixes: 1cc33161a83d ("uprobes: Support SDT markers having reference count (semaphore)")
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/events/uprobes.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 96d4bee83489b..98b9312ce6b29 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -572,7 +572,9 @@ static void put_uprobe(struct uprobe *uprobe)
 		 * gets called, we don't get a chance to remove uprobe from
 		 * delayed_uprobe_list from remove_breakpoint(). Do it here.
 		 */
+		mutex_lock(&delayed_uprobe_lock);
 		delayed_uprobe_remove(uprobe, NULL);
+		mutex_unlock(&delayed_uprobe_lock);
 		kfree(uprobe);
 	}
 }
-- 
GitLab


From e594a5e349ddbfdaca1951bb3f8d72f3f1660d73 Mon Sep 17 00:00:00 2001
From: Sam Bobroff <sbobroff@linux.ibm.com>
Date: Mon, 3 Dec 2018 11:53:21 +1100
Subject: [PATCH 1827/1890] drm/ast: Fix connector leak during driver unload

When unloading the ast driver, a warning message is printed by
drm_mode_config_cleanup() because a reference is still held to one of
the drm_connector structs.

Correct this by calling drm_crtc_force_disable_all() in
ast_fbdev_destroy().

Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1e613f3c630c7bbc72e04a44b178259b9164d2f6.1543798395.git.sbobroff@linux.ibm.com
---
 drivers/gpu/drm/ast/ast_fb.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/ast/ast_fb.c b/drivers/gpu/drm/ast/ast_fb.c
index 0cd827e11fa20..de26df0c6044d 100644
--- a/drivers/gpu/drm/ast/ast_fb.c
+++ b/drivers/gpu/drm/ast/ast_fb.c
@@ -263,6 +263,7 @@ static void ast_fbdev_destroy(struct drm_device *dev,
 {
 	struct ast_framebuffer *afb = &afbdev->afb;
 
+	drm_crtc_force_disable_all(dev);
 	drm_fb_helper_unregister_fbi(&afbdev->helper);
 
 	if (afb->obj) {
-- 
GitLab


From afd0a8006e98b1890908f81746c94ca5dae29d7c Mon Sep 17 00:00:00 2001
From: Jakub Audykowicz <jakub.audykowicz@gmail.com>
Date: Tue, 4 Dec 2018 20:27:41 +0100
Subject: [PATCH 1828/1890] sctp: frag_point sanity check

If for some reason an association's fragmentation point is zero,
sctp_datamsg_from_user will try to endlessly try to divide a message
into zero-sized chunks. This eventually causes kernel panic due to
running out of memory.

Although this situation is quite unlikely, it has occurred before as
reported. I propose to add this simple last-ditch sanity check due to
the severity of the potential consequences.

Signed-off-by: Jakub Audykowicz <jakub.audykowicz@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sctp.h | 5 +++++
 net/sctp/chunk.c        | 6 ++++++
 net/sctp/socket.c       | 3 +--
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index ab9242e51d9e0..2abbc15824af9 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -620,4 +620,9 @@ static inline bool sctp_transport_pmtu_check(struct sctp_transport *t)
 	return false;
 }
 
+static inline __u32 sctp_min_frag_point(struct sctp_sock *sp, __u16 datasize)
+{
+	return sctp_mtu_payload(sp, SCTP_DEFAULT_MINSEGMENT, datasize);
+}
+
 #endif /* __net_sctp_h__ */
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index ce8087846f059..d2048de86e7c2 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -191,6 +191,12 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
 	 * the packet
 	 */
 	max_data = asoc->frag_point;
+	if (unlikely(!max_data)) {
+		max_data = sctp_min_frag_point(sctp_sk(asoc->base.sk),
+					       sctp_datachk_len(&asoc->stream));
+		pr_warn_ratelimited("%s: asoc:%p frag_point is zero, forcing max_data to default minimum (%Zu)",
+				    __func__, asoc, max_data);
+	}
 
 	/* If the the peer requested that we authenticate DATA chunks
 	 * we need to account for bundling of the AUTH chunks along with
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index bf618d1b41fd2..b8cebd5a87e5c 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3324,8 +3324,7 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned
 		__u16 datasize = asoc ? sctp_datachk_len(&asoc->stream) :
 				 sizeof(struct sctp_data_chunk);
 
-		min_len = sctp_mtu_payload(sp, SCTP_DEFAULT_MINSEGMENT,
-					   datasize);
+		min_len = sctp_min_frag_point(sp, datasize);
 		max_len = SCTP_MAX_CHUNK_LEN - datasize;
 
 		if (val < min_len || val > max_len)
-- 
GitLab


From ebaf39e6032faf77218220707fc3fa22487784e0 Mon Sep 17 00:00:00 2001
From: Jiri Wiesner <jwiesner@suse.com>
Date: Wed, 5 Dec 2018 16:55:29 +0100
Subject: [PATCH 1829/1890] ipv4: ipv6: netfilter: Adjust the frag mem limit
 when truesize changes

The *_frag_reasm() functions are susceptible to miscalculating the byte
count of packet fragments in case the truesize of a head buffer changes.
The truesize member may be changed by the call to skb_unclone(), leaving
the fragment memory limit counter unbalanced even if all fragments are
processed. This miscalculation goes unnoticed as long as the network
namespace which holds the counter is not destroyed.

Should an attempt be made to destroy a network namespace that holds an
unbalanced fragment memory limit counter the cleanup of the namespace
never finishes. The thread handling the cleanup gets stuck in
inet_frags_exit_net() waiting for the percpu counter to reach zero. The
thread is usually in running state with a stacktrace similar to:

 PID: 1073   TASK: ffff880626711440  CPU: 1   COMMAND: "kworker/u48:4"
  #5 [ffff880621563d48] _raw_spin_lock at ffffffff815f5480
  #6 [ffff880621563d48] inet_evict_bucket at ffffffff8158020b
  #7 [ffff880621563d80] inet_frags_exit_net at ffffffff8158051c
  #8 [ffff880621563db0] ops_exit_list at ffffffff814f5856
  #9 [ffff880621563dd8] cleanup_net at ffffffff814f67c0
 #10 [ffff880621563e38] process_one_work at ffffffff81096f14

It is not possible to create new network namespaces, and processes
that call unshare() end up being stuck in uninterruptible sleep state
waiting to acquire the net_mutex.

The bug was observed in the IPv6 netfilter code by Per Sundstrom.
I thank him for his analysis of the problem. The parts of this patch
that apply to IPv4 and IPv6 fragment reassembly are preemptive measures.

Signed-off-by: Jiri Wiesner <jwiesner@suse.com>
Reported-by: Per Sundstrom <per.sundstrom@redqube.se>
Acked-by: Peter Oskolkov <posk@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_fragment.c                  | 7 +++++++
 net/ipv6/netfilter/nf_conntrack_reasm.c | 8 +++++++-
 net/ipv6/reassembly.c                   | 8 +++++++-
 3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index d6ee343fdb864..aa0b22697998a 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -515,6 +515,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
 	struct rb_node *rbn;
 	int len;
 	int ihlen;
+	int delta;
 	int err;
 	u8 ecn;
 
@@ -556,10 +557,16 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
 	if (len > 65535)
 		goto out_oversize;
 
+	delta = - head->truesize;
+
 	/* Head of list must not be cloned. */
 	if (skb_unclone(head, GFP_ATOMIC))
 		goto out_nomem;
 
+	delta += head->truesize;
+	if (delta)
+		add_frag_mem_limit(qp->q.net, delta);
+
 	/* If the first fragment is fragmented itself, we split
 	 * it to two chunks: the first with data and paged part
 	 * and the second, holding only fragments. */
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index d219979c3e529..181da2c40f9a9 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -341,7 +341,7 @@ static bool
 nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev,  struct net_device *dev)
 {
 	struct sk_buff *fp, *head = fq->q.fragments;
-	int    payload_len;
+	int    payload_len, delta;
 	u8 ecn;
 
 	inet_frag_kill(&fq->q);
@@ -363,10 +363,16 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev,  struct net_devic
 		return false;
 	}
 
+	delta = - head->truesize;
+
 	/* Head of list must not be cloned. */
 	if (skb_unclone(head, GFP_ATOMIC))
 		return false;
 
+	delta += head->truesize;
+	if (delta)
+		add_frag_mem_limit(fq->q.net, delta);
+
 	/* If the first fragment is fragmented itself, we split
 	 * it to two chunks: the first with data and paged part
 	 * and the second, holding only fragments. */
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 5c3c927130962..aa26c45486d94 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -281,7 +281,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 {
 	struct net *net = container_of(fq->q.net, struct net, ipv6.frags);
 	struct sk_buff *fp, *head = fq->q.fragments;
-	int    payload_len;
+	int    payload_len, delta;
 	unsigned int nhoff;
 	int sum_truesize;
 	u8 ecn;
@@ -322,10 +322,16 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 	if (payload_len > IPV6_MAXPLEN)
 		goto out_oversize;
 
+	delta = - head->truesize;
+
 	/* Head of list must not be cloned. */
 	if (skb_unclone(head, GFP_ATOMIC))
 		goto out_oom;
 
+	delta += head->truesize;
+	if (delta)
+		add_frag_mem_limit(fq->q.net, delta);
+
 	/* If the first fragment is fragmented itself, we split
 	 * it to two chunks: the first with data and paged part
 	 * and the second, holding only fragments. */
-- 
GitLab


From f15096f12a4e9340168df5fdd9201aa8ed60d59e Mon Sep 17 00:00:00 2001
From: Fabio Estevam <festevam@gmail.com>
Date: Wed, 5 Dec 2018 09:05:30 -0200
Subject: [PATCH 1830/1890] ARM: dts: imx7d-nitrogen7: Fix the description of
 the Wifi clock

According to bindings/regulator/fixed-regulator.txt the 'clocks' and
'clock-names' properties are not valid ones.

In order to turn on the Wifi clock the correct location for describing
the CLKO2 clock is via a mmc-pwrseq handle, so do it accordingly.

Fixes: 56354959cfec ("ARM: dts: imx: add Boundary Devices Nitrogen7 board")
Signed-off-by: Fabio Estevam <festevam@gmail.com>
Acked-by: Troy Kisky <troy.kisky@boundarydevices.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 arch/arm/boot/dts/imx7d-nitrogen7.dts | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/arch/arm/boot/dts/imx7d-nitrogen7.dts b/arch/arm/boot/dts/imx7d-nitrogen7.dts
index d8aac4a2d02a2..177d21fdeb288 100644
--- a/arch/arm/boot/dts/imx7d-nitrogen7.dts
+++ b/arch/arm/boot/dts/imx7d-nitrogen7.dts
@@ -86,13 +86,17 @@ reg_wlan: regulator-wlan {
 		compatible = "regulator-fixed";
 		regulator-min-microvolt = <3300000>;
 		regulator-max-microvolt = <3300000>;
-		clocks = <&clks IMX7D_CLKO2_ROOT_DIV>;
-		clock-names = "slow";
 		regulator-name = "reg_wlan";
 		startup-delay-us = <70000>;
 		gpio = <&gpio4 21 GPIO_ACTIVE_HIGH>;
 		enable-active-high;
 	};
+
+	usdhc2_pwrseq: usdhc2_pwrseq {
+		compatible = "mmc-pwrseq-simple";
+		clocks = <&clks IMX7D_CLKO2_ROOT_DIV>;
+		clock-names = "ext_clock";
+	};
 };
 
 &adc1 {
@@ -375,6 +379,7 @@ &usdhc2 {
 	bus-width = <4>;
 	non-removable;
 	vmmc-supply = <&reg_wlan>;
+	mmc-pwrseq = <&usdhc2_pwrseq>;
 	cap-power-off-card;
 	keep-power-in-suspend;
 	status = "okay";
-- 
GitLab


From 6e785302dad32228819d8066e5376acd15d0e6ba Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Sat, 3 Nov 2018 15:02:44 -0500
Subject: [PATCH 1831/1890] cifs: In Kconfig CONFIG_CIFS_POSIX needs depends on
 legacy (insecure cifs)

Missing a dependency.  Shouldn't show cifs posix extensions
in Kconfig if CONFIG_CIFS_ALLOW_INSECURE_DIALECTS (ie SMB1
protocol) is disabled.

Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
---
 fs/cifs/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index abcd78e332feb..85dadb93c9926 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -133,7 +133,7 @@ config CIFS_XATTR
 
 config CIFS_POSIX
         bool "CIFS POSIX Extensions"
-        depends on CIFS_XATTR
+        depends on CIFS && CIFS_ALLOW_INSECURE_LEGACY && CIFS_XATTR
         help
           Enabling this option will cause the cifs client to attempt to
 	  negotiate a newer dialect with servers, such as Samba 3.0.5
-- 
GitLab


From c988de29ca161823db6a7125e803d597ef75b49c Mon Sep 17 00:00:00 2001
From: Paulo Alcantara <palcantara@suse.com>
Date: Thu, 15 Nov 2018 15:20:52 +0100
Subject: [PATCH 1832/1890] cifs: Fix separator when building path from dentry

Make sure to use the CIFS_DIR_SEP(cifs_sb) as path separator for
prefixpath too. Fixes a bug with smb1 UNIX extensions.

Fixes: a6b5058fafdf ("fs/cifs: make share unaccessible at root level mountable")
Signed-off-by: Paulo Alcantara <palcantara@suse.com>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
CC: Stable <stable@vger.kernel.org>
---
 fs/cifs/dir.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 3713d22b95a70..907e85d65bb4e 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -174,7 +174,7 @@ build_path_from_dentry_optional_prefix(struct dentry *direntry, bool prefix)
 
 		cifs_dbg(FYI, "using cifs_sb prepath <%s>\n", cifs_sb->prepath);
 		memcpy(full_path+dfsplen+1, cifs_sb->prepath, pplen-1);
-		full_path[dfsplen] = '\\';
+		full_path[dfsplen] = dirsep;
 		for (i = 0; i < pplen-1; i++)
 			if (full_path[dfsplen+1+i] == '/')
 				full_path[dfsplen+1+i] = CIFS_DIR_SEP(cifs_sb);
-- 
GitLab


From a7d57abcc8a5bdeb53bbf8e87558e8e0a2c2a29d Mon Sep 17 00:00:00 2001
From: Sandeep Singh <sandeep.singh@amd.com>
Date: Wed, 5 Dec 2018 14:22:38 +0200
Subject: [PATCH 1833/1890] xhci: workaround CSS timeout on AMD SNPS 3.0 xHC

Occasionally AMD SNPS 3.0 xHC does not respond to
CSS when set, also it does not flag anything on SRE and HCE
to point the internal xHC errors on USBSTS register. This stalls
the entire system wide suspend and there is no point in stalling
just because of xHC CSS is not responding.

To work around this problem, if the xHC does not flag
anything on SRE and HCE, we can skip the CSS
timeout and allow the system to continue the suspend. Once the
system resume happens we can internally reset the controller
using XHCI_RESET_ON_RESUME quirk

Signed-off-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
Signed-off-by: Sandeep Singh <Sandeep.Singh@amd.com>
cc: Nehal Shah <Nehal-bakulchandra.Shah@amd.com>
Cc: <stable@vger.kernel.org>
Tested-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-pci.c |  4 ++++
 drivers/usb/host/xhci.c     | 26 ++++++++++++++++++++++----
 drivers/usb/host/xhci.h     |  3 +++
 3 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index a9515265db4db..a9ec7051f2864 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -139,6 +139,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
 		 pdev->device == 0x43bb))
 		xhci->quirks |= XHCI_SUSPEND_DELAY;
 
+	if (pdev->vendor == PCI_VENDOR_ID_AMD &&
+	    (pdev->device == 0x15e0 || pdev->device == 0x15e1))
+		xhci->quirks |= XHCI_SNPS_BROKEN_SUSPEND;
+
 	if (pdev->vendor == PCI_VENDOR_ID_AMD)
 		xhci->quirks |= XHCI_TRUST_TX_LENGTH;
 
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index c928dbbff8811..c20b85e28d81c 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -968,6 +968,7 @@ int xhci_suspend(struct xhci_hcd *xhci, bool do_wakeup)
 	unsigned int		delay = XHCI_MAX_HALT_USEC;
 	struct usb_hcd		*hcd = xhci_to_hcd(xhci);
 	u32			command;
+	u32			res;
 
 	if (!hcd->state)
 		return 0;
@@ -1021,11 +1022,28 @@ int xhci_suspend(struct xhci_hcd *xhci, bool do_wakeup)
 	command = readl(&xhci->op_regs->command);
 	command |= CMD_CSS;
 	writel(command, &xhci->op_regs->command);
+	xhci->broken_suspend = 0;
 	if (xhci_handshake(&xhci->op_regs->status,
 				STS_SAVE, 0, 10 * 1000)) {
-		xhci_warn(xhci, "WARN: xHC save state timeout\n");
-		spin_unlock_irq(&xhci->lock);
-		return -ETIMEDOUT;
+	/*
+	 * AMD SNPS xHC 3.0 occasionally does not clear the
+	 * SSS bit of USBSTS and when driver tries to poll
+	 * to see if the xHC clears BIT(8) which never happens
+	 * and driver assumes that controller is not responding
+	 * and times out. To workaround this, its good to check
+	 * if SRE and HCE bits are not set (as per xhci
+	 * Section 5.4.2) and bypass the timeout.
+	 */
+		res = readl(&xhci->op_regs->status);
+		if ((xhci->quirks & XHCI_SNPS_BROKEN_SUSPEND) &&
+		    (((res & STS_SRE) == 0) &&
+				((res & STS_HCE) == 0))) {
+			xhci->broken_suspend = 1;
+		} else {
+			xhci_warn(xhci, "WARN: xHC save state timeout\n");
+			spin_unlock_irq(&xhci->lock);
+			return -ETIMEDOUT;
+		}
 	}
 	spin_unlock_irq(&xhci->lock);
 
@@ -1078,7 +1096,7 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated)
 	set_bit(HCD_FLAG_HW_ACCESSIBLE, &xhci->shared_hcd->flags);
 
 	spin_lock_irq(&xhci->lock);
-	if (xhci->quirks & XHCI_RESET_ON_RESUME)
+	if ((xhci->quirks & XHCI_RESET_ON_RESUME) || xhci->broken_suspend)
 		hibernated = true;
 
 	if (!hibernated) {
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index 260b259b72bcb..c3515bad5dbba 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1850,6 +1850,7 @@ struct xhci_hcd {
 #define XHCI_ZERO_64B_REGS	BIT_ULL(32)
 #define XHCI_DEFAULT_PM_RUNTIME_ALLOW	BIT_ULL(33)
 #define XHCI_RESET_PLL_ON_DISCONNECT	BIT_ULL(34)
+#define XHCI_SNPS_BROKEN_SUSPEND    BIT_ULL(35)
 
 	unsigned int		num_active_eps;
 	unsigned int		limit_active_eps;
@@ -1879,6 +1880,8 @@ struct xhci_hcd {
 	void			*dbc;
 	/* platform-specific data -- must come last */
 	unsigned long		priv[0] __aligned(sizeof(s64));
+	/* Broken Suspend flag for SNPS Suspend resume issue */
+	u8			broken_suspend;
 };
 
 /* Platform specific overrides to generic XHCI hc_driver ops */
-- 
GitLab


From 0472bf06c6fd33c1a18aaead4c8f91e5a03d8d7b Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman@linux.intel.com>
Date: Wed, 5 Dec 2018 14:22:39 +0200
Subject: [PATCH 1834/1890] xhci: Prevent U1/U2 link pm states if exit latency
 is too long

Don't allow USB3 U1 or U2 if the latency to wake up from the U-state
reaches the service interval for a periodic endpoint.

This is according to xhci 1.1 specification section 4.23.5.2 extra note:

"Software shall ensure that a device is prevented from entering a U-state
 where its worst case exit latency approaches the ESIT."

Allowing too long exit latencies for periodic endpoint confuses xHC
internal scheduling, and new devices may fail to enumerate with a
"Not enough bandwidth for new device state" error from the host.

Cc: <stable@vger.kernel.org>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index c20b85e28d81c..dae3be1b9c8f0 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -4514,6 +4514,14 @@ static u16 xhci_calculate_u1_timeout(struct xhci_hcd *xhci,
 {
 	unsigned long long timeout_ns;
 
+	/* Prevent U1 if service interval is shorter than U1 exit latency */
+	if (usb_endpoint_xfer_int(desc) || usb_endpoint_xfer_isoc(desc)) {
+		if (xhci_service_interval_to_ns(desc) <= udev->u1_params.mel) {
+			dev_dbg(&udev->dev, "Disable U1, ESIT shorter than exit latency\n");
+			return USB3_LPM_DISABLED;
+		}
+	}
+
 	if (xhci->quirks & XHCI_INTEL_HOST)
 		timeout_ns = xhci_calculate_intel_u1_timeout(udev, desc);
 	else
@@ -4570,6 +4578,14 @@ static u16 xhci_calculate_u2_timeout(struct xhci_hcd *xhci,
 {
 	unsigned long long timeout_ns;
 
+	/* Prevent U2 if service interval is shorter than U2 exit latency */
+	if (usb_endpoint_xfer_int(desc) || usb_endpoint_xfer_isoc(desc)) {
+		if (xhci_service_interval_to_ns(desc) <= udev->u2_params.mel) {
+			dev_dbg(&udev->dev, "Disable U2, ESIT shorter than exit latency\n");
+			return USB3_LPM_DISABLED;
+		}
+	}
+
 	if (xhci->quirks & XHCI_INTEL_HOST)
 		timeout_ns = xhci_calculate_intel_u2_timeout(udev, desc);
 	else
-- 
GitLab


From dada6a43b0402eba438a17ac86fdc64ac56a4607 Mon Sep 17 00:00:00 2001
From: Macpaul Lin <macpaul@gmail.com>
Date: Wed, 17 Oct 2018 23:08:38 +0800
Subject: [PATCH 1835/1890] kgdboc: fix KASAN global-out-of-bounds bug in
 param_set_kgdboc_var()

This patch is trying to fix KE issue due to
"BUG: KASAN: global-out-of-bounds in param_set_kgdboc_var+0x194/0x198"
reported by Syzkaller scan."

[26364:syz-executor0][name:report8t]BUG: KASAN: global-out-of-bounds in param_set_kgdboc_var+0x194/0x198
[26364:syz-executor0][name:report&]Read of size 1 at addr ffffff900e44f95f by task syz-executor0/26364
[26364:syz-executor0][name:report&]
[26364:syz-executor0]CPU: 7 PID: 26364 Comm: syz-executor0 Tainted: G W 0
[26364:syz-executor0]Call trace:
[26364:syz-executor0][<ffffff9008095cf8>] dump_bacIctrace+Ox0/0x470
[26364:syz-executor0][<ffffff9008096de0>] show_stack+0x20/0x30
[26364:syz-executor0][<ffffff90089cc9c8>] dump_stack+Oxd8/0x128
[26364:syz-executor0][<ffffff90084edb38>] print_address_description +0x80/0x4a8
[26364:syz-executor0][<ffffff90084ee270>] kasan_report+Ox178/0x390
[26364:syz-executor0][<ffffff90084ee4a0>] _asan_report_loadi_noabort+Ox18/0x20
[26364:syz-executor0][<ffffff9008b092ac>] param_set_kgdboc_var+Ox194/0x198
[26364:syz-executor0][<ffffff900813af64>] param_attr_store+Ox14c/0x270
[26364:syz-executor0][<ffffff90081394c8>] module_attr_store+0x60/0x90
[26364:syz-executor0][<ffffff90086690c0>] sysfs_kl_write+Ox100/0x158
[26364:syz-executor0][<ffffff9008666d84>] kernfs_fop_write+0x27c/0x3a8
[26364:syz-executor0][<ffffff9008508264>] do_loop_readv_writev+0x114/0x1b0
[26364:syz-executor0][<ffffff9008509ac8>] do_readv_writev+0x4f8/0x5e0
[26364:syz-executor0][<ffffff9008509ce4>] vfs_writev+0x7c/Oxb8
[26364:syz-executor0][<ffffff900850ba64>] SyS_writev+Oxcc/0x208
[26364:syz-executor0][<ffffff90080883f0>] elO_svc_naked +0x24/0x28
[26364:syz-executor0][name:report&]
[26364:syz-executor0][name:report&]The buggy address belongs to the variable:
[26364:syz-executor0][name:report&] kgdb_tty_line+Ox3f/0x40
[26364:syz-executor0][name:report&]
[26364:syz-executor0][name:report&]Memory state around the buggy address:
[26364:syz-executor0] ffffff900e44f800: 00 00 00 00 00 04 fa fa fa fa fa fa 00 fa fa fa
[26364:syz-executor0] ffffff900e44f880: fa fa fa fa 00 fa fa fa fa fa fa fa 00 fa fa fa
[26364:syz-executor0]> ffffff900e44f900: fa fa fa fa 04 fa fa fa fa fa fa fa 00 00 00 00
[26364:syz-executor0][name:report&]                                       ^
[26364:syz-executor0] ffffff900e44f980: 00 fa fa fa fa fa fa fa 04 fa fa fa fa fa fa fa
[26364:syz-executor0] ffffff900e44fa00: 04 fa fa fa fa fa fa fa 00 fa fa fa fa fa fa fa
[26364:syz-executor0][name:report&]
[26364:syz-executor0][name:panic&]Disabling lock debugging due to kernel taint
[26364:syz-executor0]------------[cut here]------------

After checking the source code, we've found there might be an out-of-bounds
access to "config[len - 1]" array when the variable "len" is zero.

Signed-off-by: Macpaul Lin <macpaul@gmail.com>
Acked-by: Daniel Thompson <daniel.thompson@linaro.org>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/kgdboc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/serial/kgdboc.c b/drivers/tty/serial/kgdboc.c
index baeeeaec3f030..6fb312e7af713 100644
--- a/drivers/tty/serial/kgdboc.c
+++ b/drivers/tty/serial/kgdboc.c
@@ -233,7 +233,7 @@ static void kgdboc_put_char(u8 chr)
 static int param_set_kgdboc_var(const char *kmessage,
 				const struct kernel_param *kp)
 {
-	int len = strlen(kmessage);
+	size_t len = strlen(kmessage);
 
 	if (len >= MAX_CONFIG_LEN) {
 		pr_err("config string too long\n");
@@ -254,7 +254,7 @@ static int param_set_kgdboc_var(const char *kmessage,
 
 	strcpy(config, kmessage);
 	/* Chop out \n char as a result of echo */
-	if (config[len - 1] == '\n')
+	if (len && config[len - 1] == '\n')
 		config[len - 1] = '\0';
 
 	if (configured == 1)
-- 
GitLab


From a50480cb6d61d5c5fc13308479407b628b6bc1c5 Mon Sep 17 00:00:00 2001
From: Andrea Righi <righi.andrea@gmail.com>
Date: Thu, 6 Dec 2018 10:56:48 +0100
Subject: [PATCH 1836/1890] kprobes/x86: Blacklist non-attachable interrupt
 functions

These interrupt functions are already non-attachable by kprobes.
Blacklist them explicitly so that they can show up in
/sys/kernel/debug/kprobes/blacklist and tools like BCC can use this
additional information.

Signed-off-by: Andrea Righi <righi.andrea@gmail.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: David S. Miller <davem@davemloft.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Yonghong Song <yhs@fb.com>
Link: http://lkml.kernel.org/r/20181206095648.GA8249@Dell
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/entry/entry_64.S | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index ce25d84023c02..1f0efdb7b6294 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -566,6 +566,7 @@ ENTRY(interrupt_entry)
 
 	ret
 END(interrupt_entry)
+_ASM_NOKPROBE(interrupt_entry)
 
 
 /* Interrupt entry/exit. */
@@ -766,6 +767,7 @@ native_irq_return_ldt:
 	jmp	native_irq_return_iret
 #endif
 END(common_interrupt)
+_ASM_NOKPROBE(common_interrupt)
 
 /*
  * APIC interrupts.
@@ -780,6 +782,7 @@ ENTRY(\sym)
 	call	\do_sym	/* rdi points to pt_regs */
 	jmp	ret_from_intr
 END(\sym)
+_ASM_NOKPROBE(\sym)
 .endm
 
 /* Make sure APIC interrupt handlers end up in the irqentry section: */
@@ -960,6 +963,7 @@ ENTRY(\sym)
 
 	jmp	error_exit
 	.endif
+_ASM_NOKPROBE(\sym)
 END(\sym)
 .endm
 
-- 
GitLab


From b7d624ab431227af376787148cd7d474088c03aa Mon Sep 17 00:00:00 2001
From: Guo Ren <ren_guo@c-sky.com>
Date: Thu, 6 Dec 2018 10:07:40 +0800
Subject: [PATCH 1837/1890] asm-generic: unistd.h: fixup broken macro include.

The broken macros make the glibc compile error. If there is no
__NR3264_fstat*, we should also removed related definitions.

Reported-by: Marcin Juszkiewicz <marcin.juszkiewicz@linaro.org>
Fixes: bf4b6a7d371e ("y2038: Remove stat64 family from default syscall set")
[arnd: Both Marcin and Guo provided this patch to fix up my clearly
       broken commit, I applied the version with the better changelog.]
Signed-off-by: Guo Ren <ren_guo@c-sky.com>
Signed-off-by: Mao Han <han_mao@c-sky.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/uapi/asm-generic/unistd.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 538546edbfbd2..c7f3321fbe438 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -760,8 +760,10 @@ __SYSCALL(__NR_rseq, sys_rseq)
 #define __NR_ftruncate __NR3264_ftruncate
 #define __NR_lseek __NR3264_lseek
 #define __NR_sendfile __NR3264_sendfile
+#if defined(__ARCH_WANT_NEW_STAT) || defined(__ARCH_WANT_STAT64)
 #define __NR_newfstatat __NR3264_fstatat
 #define __NR_fstat __NR3264_fstat
+#endif
 #define __NR_mmap __NR3264_mmap
 #define __NR_fadvise64 __NR3264_fadvise64
 #ifdef __NR3264_stat
@@ -776,8 +778,10 @@ __SYSCALL(__NR_rseq, sys_rseq)
 #define __NR_ftruncate64 __NR3264_ftruncate
 #define __NR_llseek __NR3264_lseek
 #define __NR_sendfile64 __NR3264_sendfile
+#if defined(__ARCH_WANT_NEW_STAT) || defined(__ARCH_WANT_STAT64)
 #define __NR_fstatat64 __NR3264_fstatat
 #define __NR_fstat64 __NR3264_fstat
+#endif
 #define __NR_mmap2 __NR3264_mmap
 #define __NR_fadvise64_64 __NR3264_fadvise64
 #ifdef __NR3264_stat
-- 
GitLab


From 06fd9ab12b804451b14d538adbf98a57c2d6846b Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Wed, 5 Dec 2018 11:21:49 +0100
Subject: [PATCH 1838/1890] gnss: sirf: fix activation retry handling

Fix activation helper which would return -ETIMEDOUT even if the last
retry attempt was successful.

Also change the semantics of the retries variable so that it actually
holds the number of retries (rather than tries).

Fixes: d2efbbd18b1e ("gnss: add driver for sirfstar-based receivers")
Cc: stable <stable@vger.kernel.org>	# 4.19
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/gnss/sirf.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gnss/sirf.c b/drivers/gnss/sirf.c
index 71d014edd1676..2c22836d3ffd5 100644
--- a/drivers/gnss/sirf.c
+++ b/drivers/gnss/sirf.c
@@ -168,7 +168,7 @@ static int sirf_set_active(struct sirf_data *data, bool active)
 	else
 		timeout = SIRF_HIBERNATE_TIMEOUT;
 
-	while (retries-- > 0) {
+	do {
 		sirf_pulse_on_off(data);
 		ret = sirf_wait_for_power_state(data, active, timeout);
 		if (ret < 0) {
@@ -179,9 +179,9 @@ static int sirf_set_active(struct sirf_data *data, bool active)
 		}
 
 		break;
-	}
+	} while (retries--);
 
-	if (retries == 0)
+	if (retries < 0)
 		return -ETIMEDOUT;
 
 	return 0;
-- 
GitLab


From 85280de450f1ce8997b3ccfd0e504a942e5fa2d8 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Thu, 6 Dec 2018 07:23:17 +0100
Subject: [PATCH 1839/1890] MAINTAINERS: add gnss scm tree

Add SCM tree for the gnss subsystem.

Signed-off-by: Johan Hovold <johan@kernel.org>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 6682420421c15..03766ddf95a38 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6316,6 +6316,7 @@ F:	include/uapi/linux/gigaset_dev.h
 
 GNSS SUBSYSTEM
 M:	Johan Hovold <johan@kernel.org>
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/johan/gnss.git
 S:	Maintained
 F:	Documentation/ABI/testing/sysfs-class-gnss
 F:	Documentation/devicetree/bindings/gnss/
-- 
GitLab


From 63cea1f735284b2244e3f031625c65373520eff9 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Thu, 6 Dec 2018 07:23:18 +0100
Subject: [PATCH 1840/1890] MAINTAINERS: exclude gnss from SIRFPRIMA2 regex
 matching

Exclude the gnss subsystem from SIRMPRIMA2 regex matching, which would
otherwise match the unrelated gnss sirf driver.

Cc: Barry Song <baohua@kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 03766ddf95a38..0f083103d625e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1472,6 +1472,7 @@ F:	drivers/clk/sirf/
 F:	drivers/clocksource/timer-prima2.c
 F:	drivers/clocksource/timer-atlas7.c
 N:	[^a-z]sirf
+X:	drivers/gnss
 
 ARM/EBSA110 MACHINE SUPPORT
 M:	Russell King <linux@armlinux.org.uk>
-- 
GitLab


From 3a4d0c2172bcf15b7a3d9d498b2b355f9864286b Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Thu, 6 Dec 2018 16:36:38 +0000
Subject: [PATCH 1841/1890] ARM: ensure that processor vtables is not lost
 after boot

Marek Szyprowski reported problems with CPU hotplug in current kernels.
This was tracked down to the processor vtables being located in an
init section, and therefore discarded after kernel boot, despite being
required after boot to properly initialise the non-boot CPUs.

Arrange for these tables to end up in .rodata when required.

Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
Tested-by: Krzysztof Kozlowski <krzk@kernel.org>
Fixes: 383fb3ee8024 ("ARM: spectre-v2: per-CPU vtables to work around big.Little systems")
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/mm/proc-macros.S | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S
index 81d0efb055c66..19516fbc2c55a 100644
--- a/arch/arm/mm/proc-macros.S
+++ b/arch/arm/mm/proc-macros.S
@@ -274,6 +274,13 @@
 	.endm
 
 .macro define_processor_functions name:req, dabort:req, pabort:req, nommu=0, suspend=0, bugs=0
+/*
+ * If we are building for big.Little with branch predictor hardening,
+ * we need the processor function tables to remain available after boot.
+ */
+#if 1 // defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR)
+	.section ".rodata"
+#endif
 	.type	\name\()_processor_functions, #object
 	.align 2
 ENTRY(\name\()_processor_functions)
@@ -309,6 +316,9 @@ ENTRY(\name\()_processor_functions)
 	.endif
 
 	.size	\name\()_processor_functions, . - \name\()_processor_functions
+#if 1 // defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR)
+	.previous
+#endif
 .endm
 
 .macro define_cache_functions name:req
-- 
GitLab


From 8fb2dfb228df785bbeb4d055a74402ef4b07fc25 Mon Sep 17 00:00:00 2001
From: Alexander Popov <alex.popov@linux.com>
Date: Thu, 6 Dec 2018 18:13:07 +0300
Subject: [PATCH 1842/1890] stackleak: Register the 'stackleak_cleanup' pass
 before the '*free_cfg' pass

Currently the 'stackleak_cleanup' pass deleting a CALL insn is executed
after the 'reload' pass. That allows gcc to do some weird optimization in
function prologues and epilogues, which are generated later [1].

Let's avoid that by registering the 'stackleak_cleanup' pass before
the '*free_cfg' pass. It's the moment when the stack frame size is
already final, function prologues and epilogues are generated, and the
machine-dependent code transformations are not done.

[1] https://www.openwall.com/lists/kernel-hardening/2018/11/23/2

Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Alexander Popov <alex.popov@linux.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 scripts/gcc-plugins/stackleak_plugin.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/scripts/gcc-plugins/stackleak_plugin.c b/scripts/gcc-plugins/stackleak_plugin.c
index 2f48da98b5d42..dbd37460c573e 100644
--- a/scripts/gcc-plugins/stackleak_plugin.c
+++ b/scripts/gcc-plugins/stackleak_plugin.c
@@ -363,10 +363,12 @@ __visible int plugin_init(struct plugin_name_args *plugin_info,
 						PASS_POS_INSERT_BEFORE);
 
 	/*
-	 * The stackleak_cleanup pass should be executed after the
-	 * "reload" pass, when the stack frame size is final.
+	 * The stackleak_cleanup pass should be executed before the "*free_cfg"
+	 * pass. It's the moment when the stack frame size is already final,
+	 * function prologues and epilogues are generated, and the
+	 * machine-dependent code transformations are not done.
 	 */
-	PASS_INFO(stackleak_cleanup, "reload", 1, PASS_POS_INSERT_AFTER);
+	PASS_INFO(stackleak_cleanup, "*free_cfg", 1, PASS_POS_INSERT_BEFORE);
 
 	if (!plugin_default_version_check(version, &gcc_version)) {
 		error(G_("incompatible gcc/plugin versions"));
-- 
GitLab


From ffe843b18211301ad25893eba09f402c19d12304 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 5 Dec 2018 18:33:59 +0200
Subject: [PATCH 1843/1890] dmaengine: dw: Fix FIFO size for Intel Merrifield

Intel Merrifield has a reduced size of FIFO used in iDMA 32-bit controller,
i.e. 512 bytes instead of 1024.

Fix this by partitioning it as 64 bytes per channel.

Note, in the future we might switch to 'fifo-size' property instead of
hard coded value.

Fixes: 199244d69458 ("dmaengine: dw: add support of iDMA 32-bit hardware")
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: stable@vger.kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/dw/core.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c
index d0c3e50b39fbd..1fc488e90f363 100644
--- a/drivers/dma/dw/core.c
+++ b/drivers/dma/dw/core.c
@@ -1059,12 +1059,12 @@ static void dwc_issue_pending(struct dma_chan *chan)
 /*
  * Program FIFO size of channels.
  *
- * By default full FIFO (1024 bytes) is assigned to channel 0. Here we
+ * By default full FIFO (512 bytes) is assigned to channel 0. Here we
  * slice FIFO on equal parts between channels.
  */
 static void idma32_fifo_partition(struct dw_dma *dw)
 {
-	u64 value = IDMA32C_FP_PSIZE_CH0(128) | IDMA32C_FP_PSIZE_CH1(128) |
+	u64 value = IDMA32C_FP_PSIZE_CH0(64) | IDMA32C_FP_PSIZE_CH1(64) |
 		    IDMA32C_FP_UPDATE;
 	u64 fifo_partition = 0;
 
@@ -1077,7 +1077,7 @@ static void idma32_fifo_partition(struct dw_dma *dw)
 	/* Fill FIFO_PARTITION high bits (Channels 2..3, 6..7) */
 	fifo_partition |= value << 32;
 
-	/* Program FIFO Partition registers - 128 bytes for each channel */
+	/* Program FIFO Partition registers - 64 bytes per channel */
 	idma32_writeq(dw, FIFO_PARTITION1, fifo_partition);
 	idma32_writeq(dw, FIFO_PARTITION0, fifo_partition);
 }
-- 
GitLab


From c38f57da428b033f2721b611d84b1f40bde674a8 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 6 Dec 2018 19:14:34 +0000
Subject: [PATCH 1844/1890] vhost/vsock: fix reset orphans race with close
 timeout

If a local process has closed a connected socket and hasn't received a
RST packet yet, then the socket remains in the table until a timeout
expires.

When a vhost_vsock instance is released with the timeout still pending,
the socket is never freed because vhost_vsock has already set the
SOCK_DONE flag.

Check if the close timer is pending and let it close the socket.  This
prevents the race which can leak sockets.

Reported-by: Maximilian Riemensberger <riemensberger@cadami.net>
Cc: Graham Whaley <graham.whaley@gmail.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/vhost/vsock.c | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 34bc3ab40c6da..731e2ea2aeca6 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -563,13 +563,21 @@ static void vhost_vsock_reset_orphans(struct sock *sk)
 	 * executing.
 	 */
 
-	if (!vhost_vsock_get(vsk->remote_addr.svm_cid)) {
-		sock_set_flag(sk, SOCK_DONE);
-		vsk->peer_shutdown = SHUTDOWN_MASK;
-		sk->sk_state = SS_UNCONNECTED;
-		sk->sk_err = ECONNRESET;
-		sk->sk_error_report(sk);
-	}
+	/* If the peer is still valid, no need to reset connection */
+	if (vhost_vsock_get(vsk->remote_addr.svm_cid))
+		return;
+
+	/* If the close timeout is pending, let it expire.  This avoids races
+	 * with the timeout callback.
+	 */
+	if (vsk->close_work_scheduled)
+		return;
+
+	sock_set_flag(sk, SOCK_DONE);
+	vsk->peer_shutdown = SHUTDOWN_MASK;
+	sk->sk_state = SS_UNCONNECTED;
+	sk->sk_err = ECONNRESET;
+	sk->sk_error_report(sk);
 }
 
 static int vhost_vsock_dev_release(struct inode *inode, struct file *file)
-- 
GitLab


From 2448a299ec416a80f699940a86f4a6d9a4f643b1 Mon Sep 17 00:00:00 2001
From: Halil Pasic <pasic@linux.ibm.com>
Date: Wed, 26 Sep 2018 18:48:29 +0200
Subject: [PATCH 1845/1890] virtio/s390: avoid race on vcdev->config

Currently we have a race on vcdev->config in virtio_ccw_get_config() and
in virtio_ccw_set_config().

This normally does not cause problems, as these are usually infrequent
operations. However, for some devices writing to/reading from the config
space can be triggered through sysfs attributes. For these, userspace can
force the race by increasing the frequency.

Signed-off-by: Halil Pasic <pasic@linux.ibm.com>
Cc: stable@vger.kernel.org
Message-Id: <20180925121309.58524-2-pasic@linux.ibm.com>
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/s390/virtio/virtio_ccw.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
index 97b6f197f0079..c94f38ef9a94c 100644
--- a/drivers/s390/virtio/virtio_ccw.c
+++ b/drivers/s390/virtio/virtio_ccw.c
@@ -828,6 +828,7 @@ static void virtio_ccw_get_config(struct virtio_device *vdev,
 	int ret;
 	struct ccw1 *ccw;
 	void *config_area;
+	unsigned long flags;
 
 	ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL);
 	if (!ccw)
@@ -846,11 +847,13 @@ static void virtio_ccw_get_config(struct virtio_device *vdev,
 	if (ret)
 		goto out_free;
 
+	spin_lock_irqsave(&vcdev->lock, flags);
 	memcpy(vcdev->config, config_area, offset + len);
-	if (buf)
-		memcpy(buf, &vcdev->config[offset], len);
 	if (vcdev->config_ready < offset + len)
 		vcdev->config_ready = offset + len;
+	spin_unlock_irqrestore(&vcdev->lock, flags);
+	if (buf)
+		memcpy(buf, config_area + offset, len);
 
 out_free:
 	kfree(config_area);
@@ -864,6 +867,7 @@ static void virtio_ccw_set_config(struct virtio_device *vdev,
 	struct virtio_ccw_device *vcdev = to_vc_device(vdev);
 	struct ccw1 *ccw;
 	void *config_area;
+	unsigned long flags;
 
 	ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL);
 	if (!ccw)
@@ -876,9 +880,11 @@ static void virtio_ccw_set_config(struct virtio_device *vdev,
 	/* Make sure we don't overwrite fields. */
 	if (vcdev->config_ready < offset)
 		virtio_ccw_get_config(vdev, 0, NULL, offset);
+	spin_lock_irqsave(&vcdev->lock, flags);
 	memcpy(&vcdev->config[offset], buf, len);
 	/* Write the config area to the host. */
 	memcpy(config_area, vcdev->config, sizeof(vcdev->config));
+	spin_unlock_irqrestore(&vcdev->lock, flags);
 	ccw->cmd_code = CCW_CMD_WRITE_CONF;
 	ccw->flags = 0;
 	ccw->count = offset + len;
-- 
GitLab


From 78b1a52e05c9db11d293342e8d6d8a230a04b4e7 Mon Sep 17 00:00:00 2001
From: Halil Pasic <pasic@linux.ibm.com>
Date: Wed, 26 Sep 2018 18:48:30 +0200
Subject: [PATCH 1846/1890] virtio/s390: fix race in ccw_io_helper()

While ccw_io_helper() seems like intended to be exclusive in a sense that
it is supposed to facilitate I/O for at most one thread at any given
time, there is actually nothing ensuring that threads won't pile up at
vcdev->wait_q. If they do, all threads get woken up and see the status
that belongs to some other request than their own. This can lead to bugs.
For an example see:
https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1788432

This race normally does not cause any problems. The operations provided
by struct virtio_config_ops are usually invoked in a well defined
sequence, normally don't fail, and are normally used quite infrequent
too.

Yet, if some of the these operations are directly triggered via sysfs
attributes, like in the case described by the referenced bug, userspace
is given an opportunity to force races by increasing the frequency of the
given operations.

Let us fix the problem by ensuring, that for each device, we finish
processing the previous request before starting with a new one.

Signed-off-by: Halil Pasic <pasic@linux.ibm.com>
Reported-by: Colin Ian King <colin.king@canonical.com>
Cc: stable@vger.kernel.org
Message-Id: <20180925121309.58524-3-pasic@linux.ibm.com>
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/s390/virtio/virtio_ccw.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
index c94f38ef9a94c..c9c57b4a0b718 100644
--- a/drivers/s390/virtio/virtio_ccw.c
+++ b/drivers/s390/virtio/virtio_ccw.c
@@ -56,6 +56,7 @@ struct virtio_ccw_device {
 	unsigned int revision; /* Transport revision */
 	wait_queue_head_t wait_q;
 	spinlock_t lock;
+	struct mutex io_lock; /* Serializes I/O requests */
 	struct list_head virtqueues;
 	unsigned long indicators;
 	unsigned long indicators2;
@@ -296,6 +297,7 @@ static int ccw_io_helper(struct virtio_ccw_device *vcdev,
 	unsigned long flags;
 	int flag = intparm & VIRTIO_CCW_INTPARM_MASK;
 
+	mutex_lock(&vcdev->io_lock);
 	do {
 		spin_lock_irqsave(get_ccwdev_lock(vcdev->cdev), flags);
 		ret = ccw_device_start(vcdev->cdev, ccw, intparm, 0, 0);
@@ -308,7 +310,9 @@ static int ccw_io_helper(struct virtio_ccw_device *vcdev,
 		cpu_relax();
 	} while (ret == -EBUSY);
 	wait_event(vcdev->wait_q, doing_io(vcdev, flag) == 0);
-	return ret ? ret : vcdev->err;
+	ret = ret ? ret : vcdev->err;
+	mutex_unlock(&vcdev->io_lock);
+	return ret;
 }
 
 static void virtio_ccw_drop_indicator(struct virtio_ccw_device *vcdev,
@@ -1253,6 +1257,7 @@ static int virtio_ccw_online(struct ccw_device *cdev)
 	init_waitqueue_head(&vcdev->wait_q);
 	INIT_LIST_HEAD(&vcdev->virtqueues);
 	spin_lock_init(&vcdev->lock);
+	mutex_init(&vcdev->io_lock);
 
 	spin_lock_irqsave(get_ccwdev_lock(cdev), flags);
 	dev_set_drvdata(&cdev->dev, vcdev);
-- 
GitLab


From 834e772c8db0c6a275d75315d90aba4ebbb1e249 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Mon, 5 Nov 2018 10:35:47 +0000
Subject: [PATCH 1847/1890] vhost/vsock: fix use-after-free in network stack
 callers

If the network stack calls .send_pkt()/.cancel_pkt() during .release(),
a struct vhost_vsock use-after-free is possible.  This occurs because
.release() does not wait for other CPUs to stop using struct
vhost_vsock.

Switch to an RCU-enabled hashtable (indexed by guest CID) so that
.release() can wait for other CPUs by calling synchronize_rcu().  This
also eliminates vhost_vsock_lock acquisition in the data path so it
could have a positive effect on performance.

This is CVE-2018-14625 "kernel: use-after-free Read in vhost_transport_send_pkt".

Cc: stable@vger.kernel.org
Reported-and-tested-by: syzbot+bd391451452fb0b93039@syzkaller.appspotmail.com
Reported-by: syzbot+e3e074963495f92a89ed@syzkaller.appspotmail.com
Reported-by: syzbot+d5a0a170c5069658b141@syzkaller.appspotmail.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
 drivers/vhost/vsock.c | 57 +++++++++++++++++++++++++------------------
 1 file changed, 33 insertions(+), 24 deletions(-)

diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 731e2ea2aeca6..98ed5be132c6a 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -15,6 +15,7 @@
 #include <net/sock.h>
 #include <linux/virtio_vsock.h>
 #include <linux/vhost.h>
+#include <linux/hashtable.h>
 
 #include <net/af_vsock.h>
 #include "vhost.h"
@@ -27,14 +28,14 @@ enum {
 
 /* Used to track all the vhost_vsock instances on the system. */
 static DEFINE_SPINLOCK(vhost_vsock_lock);
-static LIST_HEAD(vhost_vsock_list);
+static DEFINE_READ_MOSTLY_HASHTABLE(vhost_vsock_hash, 8);
 
 struct vhost_vsock {
 	struct vhost_dev dev;
 	struct vhost_virtqueue vqs[2];
 
-	/* Link to global vhost_vsock_list, protected by vhost_vsock_lock */
-	struct list_head list;
+	/* Link to global vhost_vsock_hash, writes use vhost_vsock_lock */
+	struct hlist_node hash;
 
 	struct vhost_work send_pkt_work;
 	spinlock_t send_pkt_list_lock;
@@ -50,11 +51,14 @@ static u32 vhost_transport_get_local_cid(void)
 	return VHOST_VSOCK_DEFAULT_HOST_CID;
 }
 
-static struct vhost_vsock *__vhost_vsock_get(u32 guest_cid)
+/* Callers that dereference the return value must hold vhost_vsock_lock or the
+ * RCU read lock.
+ */
+static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
 {
 	struct vhost_vsock *vsock;
 
-	list_for_each_entry(vsock, &vhost_vsock_list, list) {
+	hash_for_each_possible_rcu(vhost_vsock_hash, vsock, hash, guest_cid) {
 		u32 other_cid = vsock->guest_cid;
 
 		/* Skip instances that have no CID yet */
@@ -69,17 +73,6 @@ static struct vhost_vsock *__vhost_vsock_get(u32 guest_cid)
 	return NULL;
 }
 
-static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
-{
-	struct vhost_vsock *vsock;
-
-	spin_lock_bh(&vhost_vsock_lock);
-	vsock = __vhost_vsock_get(guest_cid);
-	spin_unlock_bh(&vhost_vsock_lock);
-
-	return vsock;
-}
-
 static void
 vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
 			    struct vhost_virtqueue *vq)
@@ -210,9 +203,12 @@ vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt)
 	struct vhost_vsock *vsock;
 	int len = pkt->len;
 
+	rcu_read_lock();
+
 	/* Find the vhost_vsock according to guest context id  */
 	vsock = vhost_vsock_get(le64_to_cpu(pkt->hdr.dst_cid));
 	if (!vsock) {
+		rcu_read_unlock();
 		virtio_transport_free_pkt(pkt);
 		return -ENODEV;
 	}
@@ -225,6 +221,8 @@ vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt)
 	spin_unlock_bh(&vsock->send_pkt_list_lock);
 
 	vhost_work_queue(&vsock->dev, &vsock->send_pkt_work);
+
+	rcu_read_unlock();
 	return len;
 }
 
@@ -234,12 +232,15 @@ vhost_transport_cancel_pkt(struct vsock_sock *vsk)
 	struct vhost_vsock *vsock;
 	struct virtio_vsock_pkt *pkt, *n;
 	int cnt = 0;
+	int ret = -ENODEV;
 	LIST_HEAD(freeme);
 
+	rcu_read_lock();
+
 	/* Find the vhost_vsock according to guest context id  */
 	vsock = vhost_vsock_get(vsk->remote_addr.svm_cid);
 	if (!vsock)
-		return -ENODEV;
+		goto out;
 
 	spin_lock_bh(&vsock->send_pkt_list_lock);
 	list_for_each_entry_safe(pkt, n, &vsock->send_pkt_list, list) {
@@ -265,7 +266,10 @@ vhost_transport_cancel_pkt(struct vsock_sock *vsk)
 			vhost_poll_queue(&tx_vq->poll);
 	}
 
-	return 0;
+	ret = 0;
+out:
+	rcu_read_unlock();
+	return ret;
 }
 
 static struct virtio_vsock_pkt *
@@ -533,10 +537,6 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file)
 	spin_lock_init(&vsock->send_pkt_list_lock);
 	INIT_LIST_HEAD(&vsock->send_pkt_list);
 	vhost_work_init(&vsock->send_pkt_work, vhost_transport_send_pkt_work);
-
-	spin_lock_bh(&vhost_vsock_lock);
-	list_add_tail(&vsock->list, &vhost_vsock_list);
-	spin_unlock_bh(&vhost_vsock_lock);
 	return 0;
 
 out:
@@ -585,9 +585,13 @@ static int vhost_vsock_dev_release(struct inode *inode, struct file *file)
 	struct vhost_vsock *vsock = file->private_data;
 
 	spin_lock_bh(&vhost_vsock_lock);
-	list_del(&vsock->list);
+	if (vsock->guest_cid)
+		hash_del_rcu(&vsock->hash);
 	spin_unlock_bh(&vhost_vsock_lock);
 
+	/* Wait for other CPUs to finish using vsock */
+	synchronize_rcu();
+
 	/* Iterating over all connections for all CIDs to find orphans is
 	 * inefficient.  Room for improvement here. */
 	vsock_for_each_connected_socket(vhost_vsock_reset_orphans);
@@ -628,12 +632,17 @@ static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u64 guest_cid)
 
 	/* Refuse if CID is already in use */
 	spin_lock_bh(&vhost_vsock_lock);
-	other = __vhost_vsock_get(guest_cid);
+	other = vhost_vsock_get(guest_cid);
 	if (other && other != vsock) {
 		spin_unlock_bh(&vhost_vsock_lock);
 		return -EADDRINUSE;
 	}
+
+	if (vsock->guest_cid)
+		hash_del_rcu(&vsock->hash);
+
 	vsock->guest_cid = guest_cid;
+	hash_add_rcu(vhost_vsock_hash, &vsock->hash, guest_cid);
 	spin_unlock_bh(&vhost_vsock_lock);
 
 	return 0;
-- 
GitLab


From 050fc01fb1d916058605569cd7f4e15152afc3af Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Thu, 6 Dec 2018 17:44:50 +0000
Subject: [PATCH 1848/1890] mlxsw: spectrum_nve: Remove easily triggerable
 warnings

It is possible to trigger a warning in mlxsw in case a flood entry which
mlxsw is not aware of is deleted from the VxLAN device. This is because
mlxsw expects to find a singly linked list where the flood entry is
present in.

Fix by removing these warnings for now.

Will re-add them in the next release after we teach mlxsw to ask for a
dump of FDB entries from the VxLAN device, once it is enslaved to a
bridge mlxsw cares about.

Fixes: 6e6030bd5412 ("mlxsw: spectrum_nve: Implement common NVE core")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c
index ad06d9969bc13..5c13674439f1f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c
@@ -560,7 +560,7 @@ static void mlxsw_sp_nve_mc_list_ip_del(struct mlxsw_sp *mlxsw_sp,
 
 	mc_record = mlxsw_sp_nve_mc_record_find(mc_list, proto, addr,
 						&mc_entry);
-	if (WARN_ON(!mc_record))
+	if (!mc_record)
 		return;
 
 	mlxsw_sp_nve_mc_record_entry_del(mc_record, mc_entry);
@@ -647,7 +647,7 @@ void mlxsw_sp_nve_flood_ip_del(struct mlxsw_sp *mlxsw_sp,
 
 	key.fid_index = mlxsw_sp_fid_index(fid);
 	mc_list = mlxsw_sp_nve_mc_list_find(mlxsw_sp, &key);
-	if (WARN_ON(!mc_list))
+	if (!mc_list)
 		return;
 
 	mlxsw_sp_nve_fid_flood_index_clear(fid, mc_list);
-- 
GitLab


From f58a83c207b791c6586b9675a589db5c6ac7909e Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Thu, 6 Dec 2018 17:44:51 +0000
Subject: [PATCH 1849/1890] mlxsw: spectrum_switchdev: Avoid leaking FID's
 reference count

It should never be possible for a user to set a VNI on a FID in case one
is already set. The driver therefore returns an error, but fails to drop
the reference count taken earlier when calling
mlxsw_sp_fid_8021d_lookup().

Drop the reference when this unlikely error is hit.

Fixes: 1c30d1836aeb ("mlxsw: spectrum: Enable VxLAN enslavement to bridges")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 739a51f0a366f..7f2091c2648ef 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -2134,8 +2134,10 @@ mlxsw_sp_bridge_8021d_vxlan_join(struct mlxsw_sp_bridge_device *bridge_device,
 	if (!fid)
 		return -EINVAL;
 
-	if (mlxsw_sp_fid_vni_is_set(fid))
-		return -EINVAL;
+	if (mlxsw_sp_fid_vni_is_set(fid)) {
+		err = -EINVAL;
+		goto err_vni_exists;
+	}
 
 	err = mlxsw_sp_nve_fid_enable(mlxsw_sp, fid, &params, extack);
 	if (err)
@@ -2149,6 +2151,7 @@ mlxsw_sp_bridge_8021d_vxlan_join(struct mlxsw_sp_bridge_device *bridge_device,
 	return 0;
 
 err_nve_fid_enable:
+err_vni_exists:
 	mlxsw_sp_fid_put(fid);
 	return err;
 }
-- 
GitLab


From da93d2913fdf43d5cde3c5a53ac9cc29684d5c7c Mon Sep 17 00:00:00 2001
From: Nir Dotan <nird@mellanox.com>
Date: Thu, 6 Dec 2018 17:44:52 +0000
Subject: [PATCH 1850/1890] mlxsw: spectrum_router: Relax GRE decap matching
 check

GRE decap offload is configured when local routes prefix correspond to the
local address of one of the offloaded GRE tunnels. The matching check was
found to be too strict, such that for a flat GRE configuration, in which
the overlay and underlay traffic share the same non-default VRF, decap flow
was not offloaded.

Relax the check for decap flow offloading. A match occurs if the local
address of the tunnel matches the local route address while both share the
same VRF table.

Fixes: 4607f6d26950 ("mlxsw: spectrum_router: Support IPv4 underlay decap")
Signed-off-by: Nir Dotan <nird@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 9e9bb57134f2c..6ebf99cc31544 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -1275,15 +1275,12 @@ mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp,
 {
 	u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
 	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
-	struct net_device *ipip_ul_dev;
 
 	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
 		return false;
 
-	ipip_ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
 	return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip,
-						 ul_tb_id, ipip_entry) &&
-	       (!ipip_ul_dev || ipip_ul_dev == ul_dev);
+						 ul_tb_id, ipip_entry);
 }
 
 /* Given decap parameters, find the corresponding IPIP entry. */
-- 
GitLab


From 993107fea5eefdfdfde1ca38d3f01f0bebf76e77 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Thu, 6 Dec 2018 17:44:53 +0000
Subject: [PATCH 1851/1890] mlxsw: spectrum_switchdev: Fix VLAN device deletion
 via ioctl

When deleting a VLAN device using an ioctl the netdev is unregistered
before the VLAN filter is updated via ndo_vlan_rx_kill_vid(). It can
lead to a use-after-free in mlxsw in case the VLAN device is deleted
while being enslaved to a bridge.

The reason for the above is that when mlxsw receives the CHANGEUPPER
event, it wrongly assumes that the VLAN device is no longer its upper
and thus destroys the internal representation of the bridge port despite
the reference count being non-zero.

Fix this by checking if the VLAN device is our upper using its real
device. In net-next I'm going to remove this trick and instead make
mlxsw completely agnostic to the order of the events.

Fixes: c57529e1d5d8 ("mlxsw: spectrum: Replace vPorts with Port-VLAN")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_switchdev.c   | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 7f2091c2648ef..50080c60a2794 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -296,7 +296,13 @@ static bool
 mlxsw_sp_bridge_port_should_destroy(const struct mlxsw_sp_bridge_port *
 				    bridge_port)
 {
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_port->dev);
+	struct net_device *dev = bridge_port->dev;
+	struct mlxsw_sp *mlxsw_sp;
+
+	if (is_vlan_dev(dev))
+		mlxsw_sp = mlxsw_sp_lower_get(vlan_dev_real_dev(dev));
+	else
+		mlxsw_sp = mlxsw_sp_lower_get(dev);
 
 	/* In case ports were pulled from out of a bridged LAG, then
 	 * it's possible the reference count isn't zero, yet the bridge
@@ -2109,7 +2115,7 @@ mlxsw_sp_bridge_8021d_port_leave(struct mlxsw_sp_bridge_device *bridge_device,
 
 	vid = is_vlan_dev(dev) ? vlan_dev_vlan_id(dev) : 1;
 	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
-	if (WARN_ON(!mlxsw_sp_port_vlan))
+	if (!mlxsw_sp_port_vlan)
 		return;
 
 	mlxsw_sp_port_vlan_bridge_leave(mlxsw_sp_port_vlan);
-- 
GitLab


From 6c7f25cae54b840302e4f1b371dbf318fbf09ab2 Mon Sep 17 00:00:00 2001
From: "Adamski, Krzysztof (Nokia - PL/Wroclaw)" <krzysztof.adamski@nokia.com>
Date: Fri, 16 Nov 2018 13:24:41 +0000
Subject: [PATCH 1852/1890] i2c: axxia: properly handle master timeout

According to Intel (R) Axxia TM Lionfish Communication Processor
Peripheral Subsystem Hardware Reference Manual, the AXXIA I2C module
have a programmable Master Wait Timer, which among others, checks the
time between commands send in manual mode. When a timeout (25ms) passes,
TSS bit is set in Master Interrupt Status register and a Stop command is
issued by the hardware.

The axxia_i2c_xfer(), does not properly handle this situation, however.
For each message a separate axxia_i2c_xfer_msg() is called and this
function incorrectly assumes that any interrupt might happen only when
waiting for completion. This is mostly correct but there is one
exception - a master timeout can trigger if enough time has passed
between individual transfers. It will, by definition, happen between
transfers when the interrupts are disabled by the code. If that happens,
the hardware issues Stop command.

The interrupt indicating timeout will not be triggered as soon as we
enable them since the Master Interrupt Status is cleared when master
mode is entered again (which happens before enabling irqs) meaning this
error is lost and the transfer is continued even though the Stop was
issued on the bus. The subsequent operations completes without error but
a bogus value (0xFF in case of read) is read as the client device is
confused because aborted transfer. No error is returned from
master_xfer() making caller believe that a valid value was read.

To fix the problem, the TSS bit (indicating timeout) in Master Interrupt
Status register is checked before each transfer. If it is set, there was
a timeout before this transfer and (as described above) the hardware
already issued Stop command so the transaction should be aborted thus
-ETIMEOUT is returned from the master_xfer() callback. In order to be
sure no timeout was issued we can't just read the status just before
starting new transaction as there will always be a small window of time
(few CPU cycles at best) where this might still happen. For this reason
we have to temporally disable the timer before checking for TSS bit.
Disabling it will, however, clear the TSS bit so in order to preserve
that information, we have to read it in ISR so we have to ensure that
the TSS interrupt is not masked between transfers of one transaction.
There is no need to call bus recovery or controller reinitialization if
that happens so it's skipped.

Signed-off-by: Krzysztof Adamski <krzysztof.adamski@nokia.com>
Reviewed-by: Alexander Sverdlin <alexander.sverdlin@nokia.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-axxia.c | 40 ++++++++++++++++++++++++----------
 1 file changed, 29 insertions(+), 11 deletions(-)

diff --git a/drivers/i2c/busses/i2c-axxia.c b/drivers/i2c/busses/i2c-axxia.c
index 8e60048a33f8f..51d34959709ba 100644
--- a/drivers/i2c/busses/i2c-axxia.c
+++ b/drivers/i2c/busses/i2c-axxia.c
@@ -74,8 +74,7 @@
 				 MST_STATUS_ND)
 #define   MST_STATUS_ERR	(MST_STATUS_NAK | \
 				 MST_STATUS_AL  | \
-				 MST_STATUS_IP  | \
-				 MST_STATUS_TSS)
+				 MST_STATUS_IP)
 #define MST_TX_BYTES_XFRD	0x50
 #define MST_RX_BYTES_XFRD	0x54
 #define SCL_HIGH_PERIOD		0x80
@@ -241,7 +240,7 @@ static int axxia_i2c_empty_rx_fifo(struct axxia_i2c_dev *idev)
 			 */
 			if (c <= 0 || c > I2C_SMBUS_BLOCK_MAX) {
 				idev->msg_err = -EPROTO;
-				i2c_int_disable(idev, ~0);
+				i2c_int_disable(idev, ~MST_STATUS_TSS);
 				complete(&idev->msg_complete);
 				break;
 			}
@@ -299,14 +298,19 @@ static irqreturn_t axxia_i2c_isr(int irq, void *_dev)
 
 	if (status & MST_STATUS_SCC) {
 		/* Stop completed */
-		i2c_int_disable(idev, ~0);
+		i2c_int_disable(idev, ~MST_STATUS_TSS);
 		complete(&idev->msg_complete);
 	} else if (status & MST_STATUS_SNS) {
 		/* Transfer done */
-		i2c_int_disable(idev, ~0);
+		i2c_int_disable(idev, ~MST_STATUS_TSS);
 		if (i2c_m_rd(idev->msg) && idev->msg_xfrd < idev->msg->len)
 			axxia_i2c_empty_rx_fifo(idev);
 		complete(&idev->msg_complete);
+	} else if (status & MST_STATUS_TSS) {
+		/* Transfer timeout */
+		idev->msg_err = -ETIMEDOUT;
+		i2c_int_disable(idev, ~MST_STATUS_TSS);
+		complete(&idev->msg_complete);
 	} else if (unlikely(status & MST_STATUS_ERR)) {
 		/* Transfer error */
 		i2c_int_disable(idev, ~0);
@@ -339,10 +343,10 @@ static int axxia_i2c_xfer_msg(struct axxia_i2c_dev *idev, struct i2c_msg *msg)
 	u32 rx_xfer, tx_xfer;
 	u32 addr_1, addr_2;
 	unsigned long time_left;
+	unsigned int wt_value;
 
 	idev->msg = msg;
 	idev->msg_xfrd = 0;
-	idev->msg_err = 0;
 	reinit_completion(&idev->msg_complete);
 
 	if (i2c_m_ten(msg)) {
@@ -383,9 +387,18 @@ static int axxia_i2c_xfer_msg(struct axxia_i2c_dev *idev, struct i2c_msg *msg)
 	else if (axxia_i2c_fill_tx_fifo(idev) != 0)
 		int_mask |= MST_STATUS_TFL;
 
+	wt_value = WT_VALUE(readl(idev->base + WAIT_TIMER_CONTROL));
+	/* Disable wait timer temporarly */
+	writel(wt_value, idev->base + WAIT_TIMER_CONTROL);
+	/* Check if timeout error happened */
+	if (idev->msg_err)
+		goto out;
+
 	/* Start manual mode */
 	writel(CMD_MANUAL, idev->base + MST_COMMAND);
 
+	writel(WT_EN | wt_value, idev->base + WAIT_TIMER_CONTROL);
+
 	i2c_int_enable(idev, int_mask);
 
 	time_left = wait_for_completion_timeout(&idev->msg_complete,
@@ -396,13 +409,15 @@ static int axxia_i2c_xfer_msg(struct axxia_i2c_dev *idev, struct i2c_msg *msg)
 	if (readl(idev->base + MST_COMMAND) & CMD_BUSY)
 		dev_warn(idev->dev, "busy after xfer\n");
 
-	if (time_left == 0)
+	if (time_left == 0) {
 		idev->msg_err = -ETIMEDOUT;
-
-	if (idev->msg_err == -ETIMEDOUT)
 		i2c_recover_bus(&idev->adapter);
+		axxia_i2c_init(idev);
+	}
 
-	if (unlikely(idev->msg_err) && idev->msg_err != -ENXIO)
+out:
+	if (unlikely(idev->msg_err) && idev->msg_err != -ENXIO &&
+			idev->msg_err != -ETIMEDOUT)
 		axxia_i2c_init(idev);
 
 	return idev->msg_err;
@@ -410,7 +425,7 @@ static int axxia_i2c_xfer_msg(struct axxia_i2c_dev *idev, struct i2c_msg *msg)
 
 static int axxia_i2c_stop(struct axxia_i2c_dev *idev)
 {
-	u32 int_mask = MST_STATUS_ERR | MST_STATUS_SCC;
+	u32 int_mask = MST_STATUS_ERR | MST_STATUS_SCC | MST_STATUS_TSS;
 	unsigned long time_left;
 
 	reinit_completion(&idev->msg_complete);
@@ -437,6 +452,9 @@ axxia_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
 	int i;
 	int ret = 0;
 
+	idev->msg_err = 0;
+	i2c_int_enable(idev, MST_STATUS_TSS);
+
 	for (i = 0; ret == 0 && i < num; ++i)
 		ret = axxia_i2c_xfer_msg(idev, &msgs[i]);
 
-- 
GitLab


From 0544ee4b1ad574aec3b6379af5f5cdee42840971 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 21 Nov 2018 10:19:55 +0100
Subject: [PATCH 1853/1890] i2c: scmi: Fix probe error on devices with an empty
 SMB0001 ACPI device node

Some AMD based HP laptops have a SMB0001 ACPI device node which does not
define any methods.

This leads to the following error in dmesg:

[    5.222731] cmi: probe of SMB0001:00 failed with error -5

This commit makes acpi_smbus_cmi_add() return -ENODEV instead in this case
silencing the error. In case of a failure of the i2c_add_adapter() call
this commit now propagates the error from that call instead of -EIO.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-scmi.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/i2c/busses/i2c-scmi.c b/drivers/i2c/busses/i2c-scmi.c
index 7e9a2bbf5ddcb..ff3f4553648f3 100644
--- a/drivers/i2c/busses/i2c-scmi.c
+++ b/drivers/i2c/busses/i2c-scmi.c
@@ -367,6 +367,7 @@ static int acpi_smbus_cmi_add(struct acpi_device *device)
 {
 	struct acpi_smbus_cmi *smbus_cmi;
 	const struct acpi_device_id *id;
+	int ret;
 
 	smbus_cmi = kzalloc(sizeof(struct acpi_smbus_cmi), GFP_KERNEL);
 	if (!smbus_cmi)
@@ -388,8 +389,10 @@ static int acpi_smbus_cmi_add(struct acpi_device *device)
 	acpi_walk_namespace(ACPI_TYPE_METHOD, smbus_cmi->handle, 1,
 			    acpi_smbus_cmi_query_methods, NULL, smbus_cmi, NULL);
 
-	if (smbus_cmi->cap_info == 0)
+	if (smbus_cmi->cap_info == 0) {
+		ret = -ENODEV;
 		goto err;
+	}
 
 	snprintf(smbus_cmi->adapter.name, sizeof(smbus_cmi->adapter.name),
 		"SMBus CMI adapter %s",
@@ -400,7 +403,8 @@ static int acpi_smbus_cmi_add(struct acpi_device *device)
 	smbus_cmi->adapter.class = I2C_CLASS_HWMON | I2C_CLASS_SPD;
 	smbus_cmi->adapter.dev.parent = &device->dev;
 
-	if (i2c_add_adapter(&smbus_cmi->adapter)) {
+	ret = i2c_add_adapter(&smbus_cmi->adapter);
+	if (ret) {
 		dev_err(&device->dev, "Couldn't register adapter!\n");
 		goto err;
 	}
@@ -410,7 +414,7 @@ static int acpi_smbus_cmi_add(struct acpi_device *device)
 err:
 	kfree(smbus_cmi);
 	device->driver_data = NULL;
-	return -EIO;
+	return ret;
 }
 
 static int acpi_smbus_cmi_remove(struct acpi_device *device)
-- 
GitLab


From c2a653deaa81f5a750c0dfcbaf9f8e5195cbe4a5 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 6 Dec 2018 12:55:25 +0900
Subject: [PATCH 1854/1890] i2c: uniphier-f: fix timeout error after reading 8
 bytes

I was totally screwed up in commit eaba68785c2d ("i2c: uniphier-f:
fix race condition when IRQ is cleared"). Since that commit, if the
number of read bytes is multiple of the FIFO size (8, 16, 24... bytes),
the STOP condition could be issued twice, depending on the timing.
If this happens, the controller will go wrong, resulting in the timeout
error.

It was more than 3 years ago when I wrote this driver, so my memory
about this hardware was vague. Please let me correct the description
in the commit log of eaba68785c2d.

Clearing the IRQ status on exiting the IRQ handler is absolutely
fine. This controller makes a pause while any IRQ status is asserted.
If the IRQ status is cleared first, the hardware may start the next
transaction before the IRQ handler finishes what it supposed to do.

This partially reverts the bad commit with clear comments so that I
will never repeat this mistake.

I also investigated what is happening at the last moment of the read
mode. The UNIPHIER_FI2C_INT_RF interrupt is asserted a bit earlier
(by half a period of the clock cycle) than UNIPHIER_FI2C_INT_RB.

I consulted a hardware engineer, and I got the following information:

UNIPHIER_FI2C_INT_RF
    asserted at the falling edge of SCL at the 8th bit.

UNIPHIER_FI2C_INT_RB
    asserted at the rising edge of SCL at the 9th (ACK) bit.

In order to avoid calling uniphier_fi2c_stop() twice, check the latter
interrupt. I also commented this because it is obscure hardware internal.

Fixes: eaba68785c2d ("i2c: uniphier-f: fix race condition when IRQ is cleared")
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-uniphier-f.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/i2c/busses/i2c-uniphier-f.c b/drivers/i2c/busses/i2c-uniphier-f.c
index dd384743dbbd0..fad2b00a8c8cb 100644
--- a/drivers/i2c/busses/i2c-uniphier-f.c
+++ b/drivers/i2c/busses/i2c-uniphier-f.c
@@ -173,8 +173,6 @@ static irqreturn_t uniphier_fi2c_interrupt(int irq, void *dev_id)
 		"interrupt: enabled_irqs=%04x, irq_status=%04x\n",
 		priv->enabled_irqs, irq_status);
 
-	uniphier_fi2c_clear_irqs(priv, irq_status);
-
 	if (irq_status & UNIPHIER_FI2C_INT_STOP)
 		goto complete;
 
@@ -214,7 +212,13 @@ static irqreturn_t uniphier_fi2c_interrupt(int irq, void *dev_id)
 
 	if (irq_status & (UNIPHIER_FI2C_INT_RF | UNIPHIER_FI2C_INT_RB)) {
 		uniphier_fi2c_drain_rxfifo(priv);
-		if (!priv->len)
+		/*
+		 * If the number of bytes to read is multiple of the FIFO size
+		 * (msg->len == 8, 16, 24, ...), the INT_RF bit is set a little
+		 * earlier than INT_RB. We wait for INT_RB to confirm the
+		 * completion of the current message.
+		 */
+		if (!priv->len && (irq_status & UNIPHIER_FI2C_INT_RB))
 			goto data_done;
 
 		if (unlikely(priv->flags & UNIPHIER_FI2C_MANUAL_NACK)) {
@@ -253,6 +257,13 @@ static irqreturn_t uniphier_fi2c_interrupt(int irq, void *dev_id)
 	}
 
 handled:
+	/*
+	 * This controller makes a pause while any bit of the IRQ status is
+	 * asserted. Clear the asserted bit to kick the controller just before
+	 * exiting the handler.
+	 */
+	uniphier_fi2c_clear_irqs(priv, irq_status);
+
 	spin_unlock(&priv->lock);
 
 	return IRQ_HANDLED;
-- 
GitLab


From cd8843f541cc0ef057e27f1afba46374bbb84219 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 6 Dec 2018 12:55:26 +0900
Subject: [PATCH 1855/1890] i2c: uniphier-f: fill TX-FIFO only in IRQ handler
 for repeated START

- For a repeated START condition, this controller starts data transfer
   immediately after the slave address is written to the TX-FIFO.

 - Once the TX-FIFO empty interrupt is asserted, the controller makes
   a pause even if additional data are written to the TX-FIFO.

Given those circumstances, the data after a repeated START may not be
transferred if the interrupt is asserted while the TX-FIFO is being
filled up. A more reliable way is to append TX data only in the
interrupt handler.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-uniphier-f.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/i2c/busses/i2c-uniphier-f.c b/drivers/i2c/busses/i2c-uniphier-f.c
index fad2b00a8c8cb..d8a5db14da796 100644
--- a/drivers/i2c/busses/i2c-uniphier-f.c
+++ b/drivers/i2c/busses/i2c-uniphier-f.c
@@ -269,7 +269,8 @@ static irqreturn_t uniphier_fi2c_interrupt(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static void uniphier_fi2c_tx_init(struct uniphier_fi2c_priv *priv, u16 addr)
+static void uniphier_fi2c_tx_init(struct uniphier_fi2c_priv *priv, u16 addr,
+				  bool repeat)
 {
 	priv->enabled_irqs |= UNIPHIER_FI2C_INT_TE;
 	uniphier_fi2c_set_irqs(priv);
@@ -279,8 +280,12 @@ static void uniphier_fi2c_tx_init(struct uniphier_fi2c_priv *priv, u16 addr)
 	/* set slave address */
 	writel(UNIPHIER_FI2C_DTTX_CMD | addr << 1,
 	       priv->membase + UNIPHIER_FI2C_DTTX);
-	/* first chunk of data */
-	uniphier_fi2c_fill_txfifo(priv, true);
+	/*
+	 * First chunk of data. For a repeated START condition, do not write
+	 * data to the TX fifo here to avoid the timing issue.
+	 */
+	if (!repeat)
+		uniphier_fi2c_fill_txfifo(priv, true);
 }
 
 static void uniphier_fi2c_rx_init(struct uniphier_fi2c_priv *priv, u16 addr)
@@ -361,7 +366,7 @@ static int uniphier_fi2c_master_xfer_one(struct i2c_adapter *adap,
 	if (is_read)
 		uniphier_fi2c_rx_init(priv, msg->addr);
 	else
-		uniphier_fi2c_tx_init(priv, msg->addr);
+		uniphier_fi2c_tx_init(priv, msg->addr, repeat);
 
 	dev_dbg(&adap->dev, "start condition\n");
 	/*
-- 
GitLab


From 8469636ab5d8c77645b953746c10fda6983a8830 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 6 Dec 2018 12:55:27 +0900
Subject: [PATCH 1856/1890] i2c: uniphier: fix violation of tLOW requirement
 for Fast-mode

Currently, the clock duty is set as tLOW/tHIGH = 1/1. For Fast-mode,
tLOW is set to 1.25 us while the I2C spec requires tLOW >= 1.3 us.

tLOW/tHIGH = 5/4 would meet both Standard-mode and Fast-mode:
  Standard-mode: tLOW = 5.56 us, tHIGH = 4.44 us
  Fast-mode:     tLOW = 1.39 us, tHIGH = 1.11 us

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-uniphier.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-uniphier.c b/drivers/i2c/busses/i2c-uniphier.c
index 454f914ae66db..c488e558aef70 100644
--- a/drivers/i2c/busses/i2c-uniphier.c
+++ b/drivers/i2c/busses/i2c-uniphier.c
@@ -320,7 +320,13 @@ static void uniphier_i2c_hw_init(struct uniphier_i2c_priv *priv)
 
 	uniphier_i2c_reset(priv, true);
 
-	writel((cyc / 2 << 16) | cyc, priv->membase + UNIPHIER_I2C_CLK);
+	/*
+	 * Bit30-16: clock cycles of tLOW.
+	 *  Standard-mode: tLOW = 4.7 us, tHIGH = 4.0 us
+	 *  Fast-mode:     tLOW = 1.3 us, tHIGH = 0.6 us
+	 * "tLow/tHIGH = 5/4" meets both.
+	 */
+	writel((cyc * 5 / 9 << 16) | cyc, priv->membase + UNIPHIER_I2C_CLK);
 
 	uniphier_i2c_reset(priv, false);
 }
-- 
GitLab


From ece27a337d42a3197935711997f2880f0957ed7e Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 6 Dec 2018 12:55:28 +0900
Subject: [PATCH 1857/1890] i2c: uniphier-f: fix violation of tLOW requirement
 for Fast-mode

Currently, the clock duty is set as tLOW/tHIGH = 1/1. For Fast-mode,
tLOW is set to 1.25 us while the I2C spec requires tLOW >= 1.3 us.

tLOW/tHIGH = 5/4 would meet both Standard-mode and Fast-mode:
  Standard-mode: tLOW = 5.56 us, tHIGH = 4.44 us
  Fast-mode:     tLOW = 1.39 us, tHIGH = 1.11 us

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-uniphier-f.c | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-uniphier-f.c b/drivers/i2c/busses/i2c-uniphier-f.c
index d8a5db14da796..03da4a539a2f2 100644
--- a/drivers/i2c/busses/i2c-uniphier-f.c
+++ b/drivers/i2c/busses/i2c-uniphier-f.c
@@ -518,9 +518,26 @@ static void uniphier_fi2c_hw_init(struct uniphier_fi2c_priv *priv)
 
 	uniphier_fi2c_reset(priv);
 
+	/*
+	 *  Standard-mode: tLOW + tHIGH = 10 us
+	 *  Fast-mode:     tLOW + tHIGH = 2.5 us
+	 */
 	writel(cyc, priv->membase + UNIPHIER_FI2C_CYC);
-	writel(cyc / 2, priv->membase + UNIPHIER_FI2C_LCTL);
+	/*
+	 *  Standard-mode: tLOW = 4.7 us, tHIGH = 4.0 us, tBUF = 4.7 us
+	 *  Fast-mode:     tLOW = 1.3 us, tHIGH = 0.6 us, tBUF = 1.3 us
+	 * "tLow/tHIGH = 5/4" meets both.
+	 */
+	writel(cyc * 5 / 9, priv->membase + UNIPHIER_FI2C_LCTL);
+	/*
+	 *  Standard-mode: tHD;STA = 4.0 us, tSU;STA = 4.7 us, tSU;STO = 4.0 us
+	 *  Fast-mode:     tHD;STA = 0.6 us, tSU;STA = 0.6 us, tSU;STO = 0.6 us
+	 */
 	writel(cyc / 2, priv->membase + UNIPHIER_FI2C_SSUT);
+	/*
+	 *  Standard-mode: tSU;DAT = 250 ns
+	 *  Fast-mode:     tSU;DAT = 100 ns
+	 */
 	writel(cyc / 16, priv->membase + UNIPHIER_FI2C_DSUT);
 
 	uniphier_fi2c_prepare_operation(priv);
-- 
GitLab


From e61efff4ae94f4509707a5e03f8edb6455f490e1 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 7 Dec 2018 13:56:08 +0800
Subject: [PATCH 1858/1890] crypto: user - Disable statistics interface

Since this user-space API is still undergoing significant changes,
this patch disables it for the current merge window.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crypto/Kconfig b/crypto/Kconfig
index f7a235db56aaa..05c91eb10ca1f 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1812,7 +1812,7 @@ config CRYPTO_USER_API_AEAD
 	  cipher algorithms.
 
 config CRYPTO_STATS
-	bool "Crypto usage statistics for User-space"
+	bool
 	help
 	  This option enables the gathering of crypto stats.
 	  This will collect:
-- 
GitLab


From 6ac79291fb7dae4402b306789b1cb85f84687e44 Mon Sep 17 00:00:00 2001
From: Long Li <longli@microsoft.com>
Date: Thu, 6 Dec 2018 04:51:06 +0000
Subject: [PATCH 1859/1890] CIFS: Avoid returning EBUSY to upper layer VFS

EBUSY is not handled by VFS, and will be passed to user-mode. This is not
correct as we need to wait for more credits.

This patch also fixes a bug where rsize or wsize is used uninitialized when
the call to server->ops->wait_mtu_credits() fails.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Long Li <longli@microsoft.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
---
 fs/cifs/file.c | 31 ++++++-------------------------
 1 file changed, 6 insertions(+), 25 deletions(-)

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 74c33d5fafc83..c9bc56b1baac2 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2541,14 +2541,13 @@ static int
 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
 	struct cifs_aio_ctx *ctx)
 {
-	int wait_retry = 0;
 	unsigned int wsize, credits;
 	int rc;
 	struct TCP_Server_Info *server =
 		tlink_tcon(wdata->cfile->tlink)->ses->server;
 
 	/*
-	 * Try to resend this wdata, waiting for credits up to 3 seconds.
+	 * Wait for credits to resend this wdata.
 	 * Note: we are attempting to resend the whole wdata not in segments
 	 */
 	do {
@@ -2556,19 +2555,13 @@ cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
 			server, wdata->bytes, &wsize, &credits);
 
 		if (rc)
-			break;
+			goto out;
 
 		if (wsize < wdata->bytes) {
 			add_credits_and_wake_if(server, credits, 0);
 			msleep(1000);
-			wait_retry++;
 		}
-	} while (wsize < wdata->bytes && wait_retry < 3);
-
-	if (wsize < wdata->bytes) {
-		rc = -EBUSY;
-		goto out;
-	}
+	} while (wsize < wdata->bytes);
 
 	rc = -EAGAIN;
 	while (rc == -EAGAIN) {
@@ -3234,14 +3227,13 @@ static int cifs_resend_rdata(struct cifs_readdata *rdata,
 			struct list_head *rdata_list,
 			struct cifs_aio_ctx *ctx)
 {
-	int wait_retry = 0;
 	unsigned int rsize, credits;
 	int rc;
 	struct TCP_Server_Info *server =
 		tlink_tcon(rdata->cfile->tlink)->ses->server;
 
 	/*
-	 * Try to resend this rdata, waiting for credits up to 3 seconds.
+	 * Wait for credits to resend this rdata.
 	 * Note: we are attempting to resend the whole rdata not in segments
 	 */
 	do {
@@ -3249,24 +3241,13 @@ static int cifs_resend_rdata(struct cifs_readdata *rdata,
 						&rsize, &credits);
 
 		if (rc)
-			break;
+			goto out;
 
 		if (rsize < rdata->bytes) {
 			add_credits_and_wake_if(server, credits, 0);
 			msleep(1000);
-			wait_retry++;
 		}
-	} while (rsize < rdata->bytes && wait_retry < 3);
-
-	/*
-	 * If we can't find enough credits to send this rdata
-	 * release the rdata and return failure, this will pass
-	 * whatever I/O amount we have finished to VFS.
-	 */
-	if (rsize < rdata->bytes) {
-		rc = -EBUSY;
-		goto out;
-	}
+	} while (rsize < rdata->bytes);
 
 	rc = -EAGAIN;
 	while (rc == -EAGAIN) {
-- 
GitLab


From ba7aeae5539c7a7cccc4cf07a2bc61281a93c50e Mon Sep 17 00:00:00 2001
From: Paolo Valente <paolo.valente@linaro.org>
Date: Thu, 6 Dec 2018 19:18:18 +0100
Subject: [PATCH 1860/1890] block, bfq: fix decrement of num_active_groups

Since commit '2d29c9f89fcd ("block, bfq: improve asymmetric scenarios
detection")', if there are process groups with I/O requests waiting for
completion, then BFQ tags the scenario as 'asymmetric'. This detection
is needed for preserving service guarantees (for details, see comments
on the computation * of the variable asymmetric_scenario in the
function bfq_better_to_idle).

Unfortunately, commit '2d29c9f89fcd ("block, bfq: improve asymmetric
scenarios detection")' contains an error exactly in the updating of
the number of groups with I/O requests waiting for completion: if a
group has more than one descendant process, then the above number of
groups, which is renamed from num_active_groups to a more appropriate
num_groups_with_pending_reqs by this commit, may happen to be wrongly
decremented multiple times, namely every time one of the descendant
processes gets all its pending I/O requests completed.

A correct, complete solution should work as follows. Consider a group
that is inactive, i.e., that has no descendant process with pending
I/O inside BFQ queues. Then suppose that num_groups_with_pending_reqs
is still accounting for this group, because the group still has some
descendant process with some I/O request still in
flight. num_groups_with_pending_reqs should be decremented when the
in-flight request of the last descendant process is finally completed
(assuming that nothing else has changed for the group in the meantime,
in terms of composition of the group and active/inactive state of
child groups and processes). To accomplish this, an additional
pending-request counter must be added to entities, and must be
updated correctly.

To avoid this additional field and operations, this commit resorts to
the following tradeoff between simplicity and accuracy: for an
inactive group that is still counted in num_groups_with_pending_reqs,
this commit decrements num_groups_with_pending_reqs when the first
descendant process of the group remains with no request waiting for
completion.

This simplified scheme provides a fix to the unbalanced decrements
introduced by 2d29c9f89fcd. Since this error was also caused by lack
of comments on this non-trivial issue, this commit also adds related
comments.

Fixes: 2d29c9f89fcd ("block, bfq: improve asymmetric scenarios detection")
Reported-by: Steven Barrett <steven@liquorix.net>
Tested-by: Steven Barrett <steven@liquorix.net>
Tested-by: Lucjan Lucjanov <lucjan.lucjanov@gmail.com>
Reviewed-by: Federico Motta <federico@willer.it>
Signed-off-by: Paolo Valente <paolo.valente@linaro.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-iosched.c | 76 ++++++++++++++++++++++++++++++++-------------
 block/bfq-iosched.h | 51 ++++++++++++++++++++++++++++--
 block/bfq-wf2q.c    |  5 ++-
 3 files changed, 107 insertions(+), 25 deletions(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 3a27d31fcda60..97337214bec42 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -638,7 +638,7 @@ static bool bfq_varied_queue_weights_or_active_groups(struct bfq_data *bfqd)
 		 bfqd->queue_weights_tree.rb_node->rb_right)
 #ifdef CONFIG_BFQ_GROUP_IOSCHED
 	       ) ||
-		(bfqd->num_active_groups > 0
+		(bfqd->num_groups_with_pending_reqs > 0
 #endif
 	       );
 }
@@ -802,7 +802,21 @@ void bfq_weights_tree_remove(struct bfq_data *bfqd,
 			 */
 			break;
 		}
-		bfqd->num_active_groups--;
+
+		/*
+		 * The decrement of num_groups_with_pending_reqs is
+		 * not performed immediately upon the deactivation of
+		 * entity, but it is delayed to when it also happens
+		 * that the first leaf descendant bfqq of entity gets
+		 * all its pending requests completed. The following
+		 * instructions perform this delayed decrement, if
+		 * needed. See the comments on
+		 * num_groups_with_pending_reqs for details.
+		 */
+		if (entity->in_groups_with_pending_reqs) {
+			entity->in_groups_with_pending_reqs = false;
+			bfqd->num_groups_with_pending_reqs--;
+		}
 	}
 }
 
@@ -3529,27 +3543,44 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq)
 	 * fact, if there are active groups, then, for condition (i)
 	 * to become false, it is enough that an active group contains
 	 * more active processes or sub-groups than some other active
-	 * group. We address this issue with the following bi-modal
-	 * behavior, implemented in the function
+	 * group. More precisely, for condition (i) to hold because of
+	 * such a group, it is not even necessary that the group is
+	 * (still) active: it is sufficient that, even if the group
+	 * has become inactive, some of its descendant processes still
+	 * have some request already dispatched but still waiting for
+	 * completion. In fact, requests have still to be guaranteed
+	 * their share of the throughput even after being
+	 * dispatched. In this respect, it is easy to show that, if a
+	 * group frequently becomes inactive while still having
+	 * in-flight requests, and if, when this happens, the group is
+	 * not considered in the calculation of whether the scenario
+	 * is asymmetric, then the group may fail to be guaranteed its
+	 * fair share of the throughput (basically because idling may
+	 * not be performed for the descendant processes of the group,
+	 * but it had to be).  We address this issue with the
+	 * following bi-modal behavior, implemented in the function
 	 * bfq_symmetric_scenario().
 	 *
-	 * If there are active groups, then the scenario is tagged as
+	 * If there are groups with requests waiting for completion
+	 * (as commented above, some of these groups may even be
+	 * already inactive), then the scenario is tagged as
 	 * asymmetric, conservatively, without checking any of the
 	 * conditions (i) and (ii). So the device is idled for bfqq.
 	 * This behavior matches also the fact that groups are created
-	 * exactly if controlling I/O (to preserve bandwidth and
-	 * latency guarantees) is a primary concern.
+	 * exactly if controlling I/O is a primary concern (to
+	 * preserve bandwidth and latency guarantees).
 	 *
-	 * On the opposite end, if there are no active groups, then
-	 * only condition (i) is actually controlled, i.e., provided
-	 * that condition (i) holds, idling is not performed,
-	 * regardless of whether condition (ii) holds. In other words,
-	 * only if condition (i) does not hold, then idling is
-	 * allowed, and the device tends to be prevented from queueing
-	 * many requests, possibly of several processes. Since there
-	 * are no active groups, then, to control condition (i) it is
-	 * enough to check whether all active queues have the same
-	 * weight.
+	 * On the opposite end, if there are no groups with requests
+	 * waiting for completion, then only condition (i) is actually
+	 * controlled, i.e., provided that condition (i) holds, idling
+	 * is not performed, regardless of whether condition (ii)
+	 * holds. In other words, only if condition (i) does not hold,
+	 * then idling is allowed, and the device tends to be
+	 * prevented from queueing many requests, possibly of several
+	 * processes. Since there are no groups with requests waiting
+	 * for completion, then, to control condition (i) it is enough
+	 * to check just whether all the queues with requests waiting
+	 * for completion also have the same weight.
 	 *
 	 * Not checking condition (ii) evidently exposes bfqq to the
 	 * risk of getting less throughput than its fair share.
@@ -3607,10 +3638,11 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq)
 	 * bfqq is weight-raised is checked explicitly here. More
 	 * precisely, the compound condition below takes into account
 	 * also the fact that, even if bfqq is being weight-raised,
-	 * the scenario is still symmetric if all active queues happen
-	 * to be weight-raised. Actually, we should be even more
-	 * precise here, and differentiate between interactive weight
-	 * raising and soft real-time weight raising.
+	 * the scenario is still symmetric if all queues with requests
+	 * waiting for completion happen to be
+	 * weight-raised. Actually, we should be even more precise
+	 * here, and differentiate between interactive weight raising
+	 * and soft real-time weight raising.
 	 *
 	 * As a side note, it is worth considering that the above
 	 * device-idling countermeasures may however fail in the
@@ -5417,7 +5449,7 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
 	bfqd->idle_slice_timer.function = bfq_idle_slice_timer;
 
 	bfqd->queue_weights_tree = RB_ROOT;
-	bfqd->num_active_groups = 0;
+	bfqd->num_groups_with_pending_reqs = 0;
 
 	INIT_LIST_HEAD(&bfqd->active_list);
 	INIT_LIST_HEAD(&bfqd->idle_list);
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index 77651d817ecd3..0b02bf302de07 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -196,6 +196,9 @@ struct bfq_entity {
 
 	/* flag, set to request a weight, ioprio or ioprio_class change  */
 	int prio_changed;
+
+	/* flag, set if the entity is counted in groups_with_pending_reqs */
+	bool in_groups_with_pending_reqs;
 };
 
 struct bfq_group;
@@ -448,10 +451,54 @@ struct bfq_data {
 	 * bfq_weights_tree_[add|remove] for further details).
 	 */
 	struct rb_root queue_weights_tree;
+
 	/*
-	 * number of groups with requests still waiting for completion
+	 * Number of groups with at least one descendant process that
+	 * has at least one request waiting for completion. Note that
+	 * this accounts for also requests already dispatched, but not
+	 * yet completed. Therefore this number of groups may differ
+	 * (be larger) than the number of active groups, as a group is
+	 * considered active only if its corresponding entity has
+	 * descendant queues with at least one request queued. This
+	 * number is used to decide whether a scenario is symmetric.
+	 * For a detailed explanation see comments on the computation
+	 * of the variable asymmetric_scenario in the function
+	 * bfq_better_to_idle().
+	 *
+	 * However, it is hard to compute this number exactly, for
+	 * groups with multiple descendant processes. Consider a group
+	 * that is inactive, i.e., that has no descendant process with
+	 * pending I/O inside BFQ queues. Then suppose that
+	 * num_groups_with_pending_reqs is still accounting for this
+	 * group, because the group has descendant processes with some
+	 * I/O request still in flight. num_groups_with_pending_reqs
+	 * should be decremented when the in-flight request of the
+	 * last descendant process is finally completed (assuming that
+	 * nothing else has changed for the group in the meantime, in
+	 * terms of composition of the group and active/inactive state of child
+	 * groups and processes). To accomplish this, an additional
+	 * pending-request counter must be added to entities, and must
+	 * be updated correctly. To avoid this additional field and operations,
+	 * we resort to the following tradeoff between simplicity and
+	 * accuracy: for an inactive group that is still counted in
+	 * num_groups_with_pending_reqs, we decrement
+	 * num_groups_with_pending_reqs when the first descendant
+	 * process of the group remains with no request waiting for
+	 * completion.
+	 *
+	 * Even this simpler decrement strategy requires a little
+	 * carefulness: to avoid multiple decrements, we flag a group,
+	 * more precisely an entity representing a group, as still
+	 * counted in num_groups_with_pending_reqs when it becomes
+	 * inactive. Then, when the first descendant queue of the
+	 * entity remains with no request waiting for completion,
+	 * num_groups_with_pending_reqs is decremented, and this flag
+	 * is reset. After this flag is reset for the entity,
+	 * num_groups_with_pending_reqs won't be decremented any
+	 * longer in case a new descendant queue of the entity remains
+	 * with no request waiting for completion.
 	 */
-	unsigned int num_active_groups;
+	unsigned int num_groups_with_pending_reqs;
 
 	/*
 	 * Number of bfq_queues containing requests (including the
diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
index 4b0d5fb691600..63e0f12be7c98 100644
--- a/block/bfq-wf2q.c
+++ b/block/bfq-wf2q.c
@@ -1012,7 +1012,10 @@ static void __bfq_activate_entity(struct bfq_entity *entity,
 			container_of(entity, struct bfq_group, entity);
 		struct bfq_data *bfqd = bfqg->bfqd;
 
-		bfqd->num_active_groups++;
+		if (!entity->in_groups_with_pending_reqs) {
+			entity->in_groups_with_pending_reqs = true;
+			bfqd->num_groups_with_pending_reqs++;
+		}
 	}
 #endif
 
-- 
GitLab


From 86880d646122240596d6719b642fee3213239994 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Tue, 27 Nov 2018 17:04:44 -0800
Subject: [PATCH 1861/1890] nvme: validate controller state before rescheduling
 keep alive

Delete operations are seeing NULL pointer references in call_timer_fn.
Tracking these back, the timer appears to be the keep alive timer.

nvme_keep_alive_work() which is tied to the timer that is cancelled
by nvme_stop_keep_alive(), simply starts the keep alive io but doesn't
wait for it's completion. So nvme_stop_keep_alive() only stops a timer
when it's pending. When a keep alive is in flight, there is no timer
running and the nvme_stop_keep_alive() will have no affect on the keep
alive io. Thus, if the io completes successfully, the keep alive timer
will be rescheduled.   In the failure case, delete is called, the
controller state is changed, the nvme_stop_keep_alive() is called while
the io is outstanding, and the delete path continues on. The keep
alive happens to successfully complete before the delete paths mark it
as aborted as part of the queue termination, so the timer is restarted.
The delete paths then tear down the controller, and later on the timer
code fires and the timer entry is now corrupt.

Fix by validating the controller state before rescheduling the keep
alive. Testing with the fix has confirmed the condition above was hit.

Signed-off-by: James Smart <jsmart2021@gmail.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 3cf1b773158ec..962012135b62a 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -831,6 +831,8 @@ static int nvme_submit_user_cmd(struct request_queue *q,
 static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status)
 {
 	struct nvme_ctrl *ctrl = rq->end_io_data;
+	unsigned long flags;
+	bool startka = false;
 
 	blk_mq_free_request(rq);
 
@@ -841,7 +843,13 @@ static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status)
 		return;
 	}
 
-	schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
+	spin_lock_irqsave(&ctrl->lock, flags);
+	if (ctrl->state == NVME_CTRL_LIVE ||
+	    ctrl->state == NVME_CTRL_CONNECTING)
+		startka = true;
+	spin_unlock_irqrestore(&ctrl->lock, flags);
+	if (startka)
+		schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
 }
 
 static int nvme_keep_alive(struct nvme_ctrl *ctrl)
-- 
GitLab


From d7dcdf9d4e15189ecfda24cc87339a3425448d5c Mon Sep 17 00:00:00 2001
From: Israel Rukshin <israelr@mellanox.com>
Date: Wed, 5 Dec 2018 16:54:57 +0000
Subject: [PATCH 1862/1890] nvmet-rdma: fix response use after free

nvmet_rdma_release_rsp() may free the response before using it at error
flow.

Fixes: 8407879 ("nvmet-rdma: fix possible bogus dereference under heavy load")
Signed-off-by: Israel Rukshin <israelr@mellanox.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/rdma.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 3f7971d3706d9..583086dd9cb9a 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -529,6 +529,7 @@ static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
 {
 	struct nvmet_rdma_rsp *rsp =
 		container_of(wc->wr_cqe, struct nvmet_rdma_rsp, send_cqe);
+	struct nvmet_rdma_queue *queue = cq->cq_context;
 
 	nvmet_rdma_release_rsp(rsp);
 
@@ -536,7 +537,7 @@ static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
 		     wc->status != IB_WC_WR_FLUSH_ERR)) {
 		pr_err("SEND for CQE 0x%p failed with status %s (%d).\n",
 			wc->wr_cqe, ib_wc_status_msg(wc->status), wc->status);
-		nvmet_rdma_error_comp(rsp->queue);
+		nvmet_rdma_error_comp(queue);
 	}
 }
 
-- 
GitLab


From c616cbee97aed4bc6178f148a7240206dcdb85a6 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Thu, 6 Dec 2018 22:17:44 -0700
Subject: [PATCH 1863/1890] blk-mq: punt failed direct issue to dispatch list

After the direct dispatch corruption fix, we permanently disallow direct
dispatch of non read/write requests. This works fine off the normal IO
path, as they will be retried like any other failed direct dispatch
request. But for the blk_insert_cloned_request() that only DM uses to
bypass the bottom level scheduler, we always first attempt direct
dispatch. For some types of requests, that's now a permanent failure,
and no amount of retrying will make that succeed. This results in a
livelock.

Instead of making special cases for what we can direct issue, and now
having to deal with DM solving the livelock while still retaining a BUSY
condition feedback loop, always just add a request that has been through
->queue_rq() to the hardware queue dispatch list. These are safe to use
as no merging can take place there. Additionally, if requests do have
prepped data from drivers, we aren't dependent on them not sharing space
in the request structure to safely add them to the IO scheduler lists.

This basically reverts ffe81d45322c and is based on a patch from Ming,
but with the list insert case covered as well.

Fixes: ffe81d45322c ("blk-mq: fix corruption with direct issue")
Cc: stable@vger.kernel.org
Suggested-by: Ming Lei <ming.lei@redhat.com>
Reported-by: Bart Van Assche <bvanassche@acm.org>
Tested-by: Ming Lei <ming.lei@redhat.com>
Acked-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 33 +++++----------------------------
 1 file changed, 5 insertions(+), 28 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 3262d83b9e074..6a7566244de30 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1715,15 +1715,6 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
 		break;
 	case BLK_STS_RESOURCE:
 	case BLK_STS_DEV_RESOURCE:
-		/*
-		 * If direct dispatch fails, we cannot allow any merging on
-		 * this IO. Drivers (like SCSI) may have set up permanent state
-		 * for this request, like SG tables and mappings, and if we
-		 * merge to it later on then we'll still only do IO to the
-		 * original part.
-		 */
-		rq->cmd_flags |= REQ_NOMERGE;
-
 		blk_mq_update_dispatch_busy(hctx, true);
 		__blk_mq_requeue_request(rq);
 		break;
@@ -1736,18 +1727,6 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
 	return ret;
 }
 
-/*
- * Don't allow direct dispatch of anything but regular reads/writes,
- * as some of the other commands can potentially share request space
- * with data we need for the IO scheduler. If we attempt a direct dispatch
- * on those and fail, we can't safely add it to the scheduler afterwards
- * without potentially overwriting data that the driver has already written.
- */
-static bool blk_rq_can_direct_dispatch(struct request *rq)
-{
-	return req_op(rq) == REQ_OP_READ || req_op(rq) == REQ_OP_WRITE;
-}
-
 static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
 						struct request *rq,
 						blk_qc_t *cookie,
@@ -1769,7 +1748,7 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
 		goto insert;
 	}
 
-	if (!blk_rq_can_direct_dispatch(rq) || (q->elevator && !bypass_insert))
+	if (q->elevator && !bypass_insert)
 		goto insert;
 
 	if (!blk_mq_get_dispatch_budget(hctx))
@@ -1785,7 +1764,7 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
 	if (bypass_insert)
 		return BLK_STS_RESOURCE;
 
-	blk_mq_sched_insert_request(rq, false, run_queue, false);
+	blk_mq_request_bypass_insert(rq, run_queue);
 	return BLK_STS_OK;
 }
 
@@ -1801,7 +1780,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
 
 	ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false);
 	if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE)
-		blk_mq_sched_insert_request(rq, false, true, false);
+		blk_mq_request_bypass_insert(rq, true);
 	else if (ret != BLK_STS_OK)
 		blk_mq_end_request(rq, ret);
 
@@ -1831,15 +1810,13 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
 		struct request *rq = list_first_entry(list, struct request,
 				queuelist);
 
-		if (!blk_rq_can_direct_dispatch(rq))
-			break;
-
 		list_del_init(&rq->queuelist);
 		ret = blk_mq_request_issue_directly(rq);
 		if (ret != BLK_STS_OK) {
 			if (ret == BLK_STS_RESOURCE ||
 					ret == BLK_STS_DEV_RESOURCE) {
-				list_add(&rq->queuelist, list);
+				blk_mq_request_bypass_insert(rq,
+							list_empty(list));
 				break;
 			}
 			blk_mq_end_request(rq, ret);
-- 
GitLab


From 52ea899637c746984d657b508da6e3f2686adfca Mon Sep 17 00:00:00 2001
From: Peter Hutterer <peter.hutterer@who-t.net>
Date: Wed, 5 Dec 2018 10:42:21 +1000
Subject: [PATCH 1864/1890] Input: add `REL_WHEEL_HI_RES` and
 `REL_HWHEEL_HI_RES`

This event code represents scroll reports from high-resolution wheels and
is modelled after the approach Windows uses. The value 120 is one detent
(wheel click) of movement. Mice with higher-resolution scrolling can send
fractions of 120 which must be accumulated in userspace. Userspace can either
wait for a full 120 to accumulate or scroll by fractions of one logical scroll
movement as the events come in. 120 was picked as magic number because it has
a high number of integer fractions that can be used by high-resolution wheels.

For more information see
https://docs.microsoft.com/en-us/previous-versions/windows/hardware/design/dn613912(v=vs.85)

These new axes obsolete REL_WHEEL and REL_HWHEEL. The legacy axes are emulated
by the kernel but the most accurate (and most granular) data is available
through the new axes.

Signed-off-by: Peter Hutterer <peter.hutterer@who-t.net>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Verified-by: Harry Cutts <hcutts@chromium.org>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
---
 Documentation/input/event-codes.rst    | 21 ++++++++++++++++++++-
 include/uapi/linux/input-event-codes.h |  2 ++
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/Documentation/input/event-codes.rst b/Documentation/input/event-codes.rst
index a8c0873beb952..b24b5343f5eb3 100644
--- a/Documentation/input/event-codes.rst
+++ b/Documentation/input/event-codes.rst
@@ -190,7 +190,26 @@ A few EV_REL codes have special meanings:
 * REL_WHEEL, REL_HWHEEL:
 
   - These codes are used for vertical and horizontal scroll wheels,
-    respectively.
+    respectively. The value is the number of detents moved on the wheel, the
+    physical size of which varies by device. For high-resolution wheels
+    this may be an approximation based on the high-resolution scroll events,
+    see REL_WHEEL_HI_RES. These event codes are legacy codes and
+    REL_WHEEL_HI_RES and REL_HWHEEL_HI_RES should be preferred where
+    available.
+
+* REL_WHEEL_HI_RES, REL_HWHEEL_HI_RES:
+
+  - High-resolution scroll wheel data. The accumulated value 120 represents
+    movement by one detent. For devices that do not provide high-resolution
+    scrolling, the value is always a multiple of 120. For devices with
+    high-resolution scrolling, the value may be a fraction of 120.
+
+    If a vertical scroll wheel supports high-resolution scrolling, this code
+    will be emitted in addition to REL_WHEEL or REL_HWHEEL. The REL_WHEEL
+    and REL_HWHEEL may be an approximation based on the high-resolution
+    scroll events. There is no guarantee that the high-resolution data
+    is a multiple of 120 at the time of an emulated REL_WHEEL or REL_HWHEEL
+    event.
 
 EV_ABS
 ------
diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h
index 3eb5a4c3d60a9..265ef20286605 100644
--- a/include/uapi/linux/input-event-codes.h
+++ b/include/uapi/linux/input-event-codes.h
@@ -716,6 +716,8 @@
  * the situation described above.
  */
 #define REL_RESERVED		0x0a
+#define REL_WHEEL_HI_RES	0x0b
+#define REL_HWHEEL_HI_RES	0x0c
 #define REL_MAX			0x0f
 #define REL_CNT			(REL_MAX+1)
 
-- 
GitLab


From c53431eb696f3c64c12c00afb81048af54b61532 Mon Sep 17 00:00:00 2001
From: Peter Hutterer <peter.hutterer@who-t.net>
Date: Wed, 5 Dec 2018 10:42:22 +1000
Subject: [PATCH 1865/1890] HID: core: store the collections as a basic tree

For each collection parsed, store a pointer to the parent collection
(if any). This makes it a lot easier to look up which collection(s)
any given item is part of

Signed-off-by: Peter Hutterer <peter.hutterer@who-t.net>
Verified-by: Harry Cutts <hcutts@chromium.org>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
---
 drivers/hid/hid-core.c | 4 ++++
 include/linux/hid.h    | 2 ++
 2 files changed, 6 insertions(+)

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 5bec9244c45b5..43d488a451202 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -172,6 +172,8 @@ static int open_collection(struct hid_parser *parser, unsigned type)
 	collection->type = type;
 	collection->usage = usage;
 	collection->level = parser->collection_stack_ptr - 1;
+	collection->parent = parser->active_collection;
+	parser->active_collection = collection;
 
 	if (type == HID_COLLECTION_APPLICATION)
 		parser->device->maxapplication++;
@@ -190,6 +192,8 @@ static int close_collection(struct hid_parser *parser)
 		return -EINVAL;
 	}
 	parser->collection_stack_ptr--;
+	if (parser->active_collection)
+		parser->active_collection = parser->active_collection->parent;
 	return 0;
 }
 
diff --git a/include/linux/hid.h b/include/linux/hid.h
index a355d61940f28..fdfda898656c3 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -427,6 +427,7 @@ struct hid_local {
  */
 
 struct hid_collection {
+	struct hid_collection *parent;
 	unsigned type;
 	unsigned usage;
 	unsigned level;
@@ -650,6 +651,7 @@ struct hid_parser {
 	unsigned int         *collection_stack;
 	unsigned int          collection_stack_ptr;
 	unsigned int          collection_stack_size;
+	struct hid_collection *active_collection;
 	struct hid_device    *device;
 	unsigned int          scan_flags;
 };
-- 
GitLab


From 5a4abb36f312cf83206b1b7d1308ba47cba0b3cc Mon Sep 17 00:00:00 2001
From: Peter Hutterer <peter.hutterer@who-t.net>
Date: Wed, 5 Dec 2018 10:42:23 +1000
Subject: [PATCH 1866/1890] HID: core: process the Resolution Multiplier

The Resolution Multiplier is a feature report that modifies the value of
Usages within the same Logical Collection. If the multiplier is set to
anything but 1, the hardware reports (value * multiplier) for the same amount
of physical movement, i.e. the value we receive in the kernel is
pre-multiplied.

The hardware may either send a single (value * multiplier), or by sending
multiplier as many reports with the same value, or a combination of these two
options. For example, when the Microsoft Sculpt Ergonomic mouse Resolution
Multiplier is set to 12, the Wheel sends out 12 for every detent but AC Pan
sends out a value of 3 at 4 times the frequency.

The effective multiplier is based on the physical min/max of the multiplier
field, a logical min/max of [0,1] with a physical min/max of [1,8] means the
multiplier is either 1 or 8.

The Resolution Multiplier was introduced for high-resolution scrolling in
Windows Vista and is commonly used on Microsoft mice.

The recommendation for the Resolution Multiplier is to default to 1 for
backwards compatibility. This patch adds an arbitrary upper limit at 255. The
only known use case for the Resolution Multiplier is for scroll wheels where the
multiplier has to be a fraction of 120 to work with Windows.

Signed-off-by: Peter Hutterer <peter.hutterer@who-t.net>
Verified-by: Harry Cutts <hcutts@chromium.org>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
---
 drivers/hid/hid-core.c | 170 +++++++++++++++++++++++++++++++++++++++++
 include/linux/hid.h    |   5 ++
 2 files changed, 175 insertions(+)

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 43d488a451202..f41d5fe51abe3 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -294,6 +294,7 @@ static int hid_add_field(struct hid_parser *parser, unsigned report_type, unsign
 		field->usage[i].collection_index =
 			parser->local.collection_index[j];
 		field->usage[i].usage_index = i;
+		field->usage[i].resolution_multiplier = 1;
 	}
 
 	field->maxusage = usages;
@@ -947,6 +948,167 @@ struct hid_report *hid_validate_values(struct hid_device *hid,
 }
 EXPORT_SYMBOL_GPL(hid_validate_values);
 
+static int hid_calculate_multiplier(struct hid_device *hid,
+				     struct hid_field *multiplier)
+{
+	int m;
+	__s32 v = *multiplier->value;
+	__s32 lmin = multiplier->logical_minimum;
+	__s32 lmax = multiplier->logical_maximum;
+	__s32 pmin = multiplier->physical_minimum;
+	__s32 pmax = multiplier->physical_maximum;
+
+	/*
+	 * "Because OS implementations will generally divide the control's
+	 * reported count by the Effective Resolution Multiplier, designers
+	 * should take care not to establish a potential Effective
+	 * Resolution Multiplier of zero."
+	 * HID Usage Table, v1.12, Section 4.3.1, p31
+	 */
+	if (lmax - lmin == 0)
+		return 1;
+	/*
+	 * Handling the unit exponent is left as an exercise to whoever
+	 * finds a device where that exponent is not 0.
+	 */
+	m = ((v - lmin)/(lmax - lmin) * (pmax - pmin) + pmin);
+	if (unlikely(multiplier->unit_exponent != 0)) {
+		hid_warn(hid,
+			 "unsupported Resolution Multiplier unit exponent %d\n",
+			 multiplier->unit_exponent);
+	}
+
+	/* There are no devices with an effective multiplier > 255 */
+	if (unlikely(m == 0 || m > 255 || m < -255)) {
+		hid_warn(hid, "unsupported Resolution Multiplier %d\n", m);
+		m = 1;
+	}
+
+	return m;
+}
+
+static void hid_apply_multiplier_to_field(struct hid_device *hid,
+					  struct hid_field *field,
+					  struct hid_collection *multiplier_collection,
+					  int effective_multiplier)
+{
+	struct hid_collection *collection;
+	struct hid_usage *usage;
+	int i;
+
+	/*
+	 * If multiplier_collection is NULL, the multiplier applies
+	 * to all fields in the report.
+	 * Otherwise, it is the Logical Collection the multiplier applies to
+	 * but our field may be in a subcollection of that collection.
+	 */
+	for (i = 0; i < field->maxusage; i++) {
+		usage = &field->usage[i];
+
+		collection = &hid->collection[usage->collection_index];
+		while (collection && collection != multiplier_collection)
+			collection = collection->parent;
+
+		if (collection || multiplier_collection == NULL)
+			usage->resolution_multiplier = effective_multiplier;
+
+	}
+}
+
+static void hid_apply_multiplier(struct hid_device *hid,
+				 struct hid_field *multiplier)
+{
+	struct hid_report_enum *rep_enum;
+	struct hid_report *rep;
+	struct hid_field *field;
+	struct hid_collection *multiplier_collection;
+	int effective_multiplier;
+	int i;
+
+	/*
+	 * "The Resolution Multiplier control must be contained in the same
+	 * Logical Collection as the control(s) to which it is to be applied.
+	 * If no Resolution Multiplier is defined, then the Resolution
+	 * Multiplier defaults to 1.  If more than one control exists in a
+	 * Logical Collection, the Resolution Multiplier is associated with
+	 * all controls in the collection. If no Logical Collection is
+	 * defined, the Resolution Multiplier is associated with all
+	 * controls in the report."
+	 * HID Usage Table, v1.12, Section 4.3.1, p30
+	 *
+	 * Thus, search from the current collection upwards until we find a
+	 * logical collection. Then search all fields for that same parent
+	 * collection. Those are the fields the multiplier applies to.
+	 *
+	 * If we have more than one multiplier, it will overwrite the
+	 * applicable fields later.
+	 */
+	multiplier_collection = &hid->collection[multiplier->usage->collection_index];
+	while (multiplier_collection &&
+	       multiplier_collection->type != HID_COLLECTION_LOGICAL)
+		multiplier_collection = multiplier_collection->parent;
+
+	effective_multiplier = hid_calculate_multiplier(hid, multiplier);
+
+	rep_enum = &hid->report_enum[HID_INPUT_REPORT];
+	list_for_each_entry(rep, &rep_enum->report_list, list) {
+		for (i = 0; i < rep->maxfield; i++) {
+			field = rep->field[i];
+			hid_apply_multiplier_to_field(hid, field,
+						      multiplier_collection,
+						      effective_multiplier);
+		}
+	}
+}
+
+/*
+ * hid_setup_resolution_multiplier - set up all resolution multipliers
+ *
+ * @device: hid device
+ *
+ * Search for all Resolution Multiplier Feature Reports and apply their
+ * value to all matching Input items. This only updates the internal struct
+ * fields.
+ *
+ * The Resolution Multiplier is applied by the hardware. If the multiplier
+ * is anything other than 1, the hardware will send pre-multiplied events
+ * so that the same physical interaction generates an accumulated
+ *	accumulated_value = value * * multiplier
+ * This may be achieved by sending
+ * - "value * multiplier" for each event, or
+ * - "value" but "multiplier" times as frequently, or
+ * - a combination of the above
+ * The only guarantee is that the same physical interaction always generates
+ * an accumulated 'value * multiplier'.
+ *
+ * This function must be called before any event processing and after
+ * any SetRequest to the Resolution Multiplier.
+ */
+void hid_setup_resolution_multiplier(struct hid_device *hid)
+{
+	struct hid_report_enum *rep_enum;
+	struct hid_report *rep;
+	struct hid_usage *usage;
+	int i, j;
+
+	rep_enum = &hid->report_enum[HID_FEATURE_REPORT];
+	list_for_each_entry(rep, &rep_enum->report_list, list) {
+		for (i = 0; i < rep->maxfield; i++) {
+			/* Ignore if report count is out of bounds. */
+			if (rep->field[i]->report_count < 1)
+				continue;
+
+			for (j = 0; j < rep->field[i]->maxusage; j++) {
+				usage = &rep->field[i]->usage[j];
+				if (usage->hid == HID_GD_RESOLUTION_MULTIPLIER)
+					hid_apply_multiplier(hid,
+							     rep->field[i]);
+			}
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(hid_setup_resolution_multiplier);
+
 /**
  * hid_open_report - open a driver-specific device report
  *
@@ -1043,9 +1205,17 @@ int hid_open_report(struct hid_device *device)
 				hid_err(device, "unbalanced delimiter at end of report description\n");
 				goto err;
 			}
+
+			/*
+			 * fetch initial values in case the device's
+			 * default multiplier isn't the recommended 1
+			 */
+			hid_setup_resolution_multiplier(device);
+
 			kfree(parser->collection_stack);
 			vfree(parser);
 			device->status |= HID_STAT_PARSED;
+
 			return 0;
 		}
 	}
diff --git a/include/linux/hid.h b/include/linux/hid.h
index fdfda898656c3..fd8d860365a48 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -219,6 +219,7 @@ struct hid_item {
 #define HID_GD_VBRZ		0x00010045
 #define HID_GD_VNO		0x00010046
 #define HID_GD_FEATURE		0x00010047
+#define HID_GD_RESOLUTION_MULTIPLIER	0x00010048
 #define HID_GD_SYSTEM_CONTROL	0x00010080
 #define HID_GD_UP		0x00010090
 #define HID_GD_DOWN		0x00010091
@@ -437,6 +438,8 @@ struct hid_usage {
 	unsigned  hid;			/* hid usage code */
 	unsigned  collection_index;	/* index into collection array */
 	unsigned  usage_index;		/* index into usage array */
+	__s8	  resolution_multiplier;/* Effective Resolution Multiplier
+					   (HUT v1.12, 4.3.1), default: 1 */
 	/* hidinput data */
 	__u16     code;			/* input driver code */
 	__u8      type;			/* input driver type */
@@ -894,6 +897,8 @@ struct hid_report *hid_validate_values(struct hid_device *hid,
 				       unsigned int type, unsigned int id,
 				       unsigned int field_index,
 				       unsigned int report_counts);
+
+void hid_setup_resolution_multiplier(struct hid_device *hid);
 int hid_open_report(struct hid_device *device);
 int hid_check_keys_pressed(struct hid_device *hid);
 int hid_connect(struct hid_device *hid, unsigned int connect_mask);
-- 
GitLab


From 2dc702c991e3774af9d7ce410eef410ca9e2357e Mon Sep 17 00:00:00 2001
From: Peter Hutterer <peter.hutterer@who-t.net>
Date: Wed, 5 Dec 2018 10:42:24 +1000
Subject: [PATCH 1867/1890] HID: input: use the Resolution Multiplier for
 high-resolution scrolling

Windows uses a magic number of 120 for a wheel click. High-resolution
scroll wheels are supposed to use a fraction of 120 to signal smaller
scroll steps. This is implemented by the Resolution Multiplier in the
device itself.

If the multiplier is present in the report descriptor, set it to the
logical max and then use the resolution multiplier to calculate the
high-resolution events. This is the recommendation by Microsoft, see
http://msdn.microsoft.com/en-us/windows/hardware/gg487477.aspx

Note that all mice encountered so far have a logical min/max of 0/1, so
it's a binary "yes or no" to high-res scrolling anyway.

To make userspace simpler, always enable the REL_WHEEL_HI_RES bit. Where
the device doesn't support high-resolution scrolling, the value for the
high-res data will simply be a multiple of 120 every time. For userspace,
if REL_WHEEL_HI_RES is available that is the one to be used.

Potential side-effect: a device with a Resolution Multiplier applying to
other Input items will have those items set to the logical max as well.
This cannot easily be worked around but it is doubtful such devices exist.

Signed-off-by: Peter Hutterer <peter.hutterer@who-t.net>
Verified-by: Harry Cutts <hcutts@chromium.org>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
---
 drivers/hid/hid-input.c | 108 ++++++++++++++++++++++++++++++++++++++--
 include/linux/hid.h     |   3 ++
 2 files changed, 108 insertions(+), 3 deletions(-)

diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index d6fab57984874..59a5608b8dc06 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -712,7 +712,15 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
 				map_abs_clear(usage->hid & 0xf);
 			break;
 
-		case HID_GD_SLIDER: case HID_GD_DIAL: case HID_GD_WHEEL:
+		case HID_GD_WHEEL:
+			if (field->flags & HID_MAIN_ITEM_RELATIVE) {
+				set_bit(REL_WHEEL, input->relbit);
+				map_rel(REL_WHEEL_HI_RES);
+			} else {
+				map_abs(usage->hid & 0xf);
+			}
+			break;
+		case HID_GD_SLIDER: case HID_GD_DIAL:
 			if (field->flags & HID_MAIN_ITEM_RELATIVE)
 				map_rel(usage->hid & 0xf);
 			else
@@ -1012,7 +1020,10 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
 		case 0x22f: map_key_clear(KEY_ZOOMRESET);	break;
 		case 0x233: map_key_clear(KEY_SCROLLUP);	break;
 		case 0x234: map_key_clear(KEY_SCROLLDOWN);	break;
-		case 0x238: map_rel(REL_HWHEEL);		break;
+		case 0x238: /* AC Pan */
+			set_bit(REL_HWHEEL, input->relbit);
+			map_rel(REL_HWHEEL_HI_RES);
+			break;
 		case 0x23d: map_key_clear(KEY_EDIT);		break;
 		case 0x25f: map_key_clear(KEY_CANCEL);		break;
 		case 0x269: map_key_clear(KEY_INSERT);		break;
@@ -1200,6 +1211,38 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
 
 }
 
+static void hidinput_handle_scroll(struct hid_usage *usage,
+				   struct input_dev *input,
+				   __s32 value)
+{
+	int code;
+	int hi_res, lo_res;
+
+	if (value == 0)
+		return;
+
+	if (usage->code == REL_WHEEL_HI_RES)
+		code = REL_WHEEL;
+	else
+		code = REL_HWHEEL;
+
+	/*
+	 * Windows reports one wheel click as value 120. Where a high-res
+	 * scroll wheel is present, a fraction of 120 is reported instead.
+	 * Our REL_WHEEL_HI_RES axis does the same because all HW must
+	 * adhere to the 120 expectation.
+	 */
+	hi_res = value * 120/usage->resolution_multiplier;
+
+	usage->wheel_accumulated += hi_res;
+	lo_res = usage->wheel_accumulated/120;
+	if (lo_res)
+		usage->wheel_accumulated -= lo_res * 120;
+
+	input_event(input, EV_REL, code, lo_res);
+	input_event(input, EV_REL, usage->code, hi_res);
+}
+
 void hidinput_hid_event(struct hid_device *hid, struct hid_field *field, struct hid_usage *usage, __s32 value)
 {
 	struct input_dev *input;
@@ -1262,6 +1305,12 @@ void hidinput_hid_event(struct hid_device *hid, struct hid_field *field, struct
 	if ((usage->type == EV_KEY) && (usage->code == 0)) /* Key 0 is "unassigned", not KEY_UNKNOWN */
 		return;
 
+	if ((usage->type == EV_REL) && (usage->code == REL_WHEEL_HI_RES ||
+					usage->code == REL_HWHEEL_HI_RES)) {
+		hidinput_handle_scroll(usage, input, value);
+		return;
+	}
+
 	if ((usage->type == EV_ABS) && (field->flags & HID_MAIN_ITEM_RELATIVE) &&
 			(usage->code == ABS_VOLUME)) {
 		int count = abs(value);
@@ -1489,6 +1538,58 @@ static void hidinput_close(struct input_dev *dev)
 	hid_hw_close(hid);
 }
 
+static void hidinput_change_resolution_multipliers(struct hid_device *hid)
+{
+	struct hid_report_enum *rep_enum;
+	struct hid_report *rep;
+	struct hid_usage *usage;
+	int i, j;
+
+	rep_enum = &hid->report_enum[HID_FEATURE_REPORT];
+	list_for_each_entry(rep, &rep_enum->report_list, list) {
+		bool update_needed = false;
+
+		if (rep->maxfield == 0)
+			continue;
+
+		/*
+		 * If we have more than one feature within this report we
+		 * need to fill in the bits from the others before we can
+		 * overwrite the ones for the Resolution Multiplier.
+		 */
+		if (rep->maxfield > 1) {
+			hid_hw_request(hid, rep, HID_REQ_GET_REPORT);
+			hid_hw_wait(hid);
+		}
+
+		for (i = 0; i < rep->maxfield; i++) {
+			__s32 logical_max = rep->field[i]->logical_maximum;
+
+			/* There is no good reason for a Resolution
+			 * Multiplier to have a count other than 1.
+			 * Ignore that case.
+			 */
+			if (rep->field[i]->report_count != 1)
+				continue;
+
+			for (j = 0; j < rep->field[i]->maxusage; j++) {
+				usage = &rep->field[i]->usage[j];
+
+				if (usage->hid != HID_GD_RESOLUTION_MULTIPLIER)
+					continue;
+
+				*rep->field[i]->value = logical_max;
+				update_needed = true;
+			}
+		}
+		if (update_needed)
+			hid_hw_request(hid, rep, HID_REQ_SET_REPORT);
+	}
+
+	/* refresh our structs */
+	hid_setup_resolution_multiplier(hid);
+}
+
 static void report_features(struct hid_device *hid)
 {
 	struct hid_driver *drv = hid->driver;
@@ -1782,6 +1883,8 @@ int hidinput_connect(struct hid_device *hid, unsigned int force)
 		}
 	}
 
+	hidinput_change_resolution_multipliers(hid);
+
 	list_for_each_entry_safe(hidinput, next, &hid->inputs, list) {
 		if (drv->input_configured &&
 		    drv->input_configured(hid, hidinput))
@@ -1840,4 +1943,3 @@ void hidinput_disconnect(struct hid_device *hid)
 	cancel_work_sync(&hid->led_work);
 }
 EXPORT_SYMBOL_GPL(hidinput_disconnect);
-
diff --git a/include/linux/hid.h b/include/linux/hid.h
index fd8d860365a48..93db548f8761b 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -233,6 +233,7 @@ struct hid_item {
 #define HID_DC_BATTERYSTRENGTH	0x00060020
 
 #define HID_CP_CONSUMER_CONTROL	0x000c0001
+#define HID_CP_AC_PAN		0x000c0238
 
 #define HID_DG_DIGITIZER	0x000d0001
 #define HID_DG_PEN		0x000d0002
@@ -441,11 +442,13 @@ struct hid_usage {
 	__s8	  resolution_multiplier;/* Effective Resolution Multiplier
 					   (HUT v1.12, 4.3.1), default: 1 */
 	/* hidinput data */
+	__s8	  wheel_factor;		/* 120/resolution_multiplier */
 	__u16     code;			/* input driver code */
 	__u8      type;			/* input driver type */
 	__s8	  hat_min;		/* hat switch fun */
 	__s8	  hat_max;		/* ditto */
 	__s8	  hat_dir;		/* ditto */
+	__s16	  wheel_accumulated;	/* hi-res wheel */
 };
 
 struct hid_input;
-- 
GitLab


From fef33601a321d8b7edfdd4c318b9c6c7aec6ef06 Mon Sep 17 00:00:00 2001
From: Peter Hutterer <peter.hutterer@who-t.net>
Date: Wed, 5 Dec 2018 10:42:25 +1000
Subject: [PATCH 1868/1890] HID: logitech-hidpp: fix typo, hiddpp to hidpp

Signed-off-by: Peter Hutterer <peter.hutterer@who-t.net>
Verified-by: Harry Cutts <hcutts@chromium.org>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
---
 drivers/hid/hid-logitech-hidpp.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c
index 19cc980eebce6..22b37a3844d10 100644
--- a/drivers/hid/hid-logitech-hidpp.c
+++ b/drivers/hid/hid-logitech-hidpp.c
@@ -1465,7 +1465,7 @@ struct hidpp_ff_work_data {
 	u8 size;
 };
 
-static const signed short hiddpp_ff_effects[] = {
+static const signed short hidpp_ff_effects[] = {
 	FF_CONSTANT,
 	FF_PERIODIC,
 	FF_SINE,
@@ -1480,7 +1480,7 @@ static const signed short hiddpp_ff_effects[] = {
 	-1
 };
 
-static const signed short hiddpp_ff_effects_v2[] = {
+static const signed short hidpp_ff_effects_v2[] = {
 	FF_RAMP,
 	FF_FRICTION,
 	FF_INERTIA,
@@ -1873,11 +1873,11 @@ static int hidpp_ff_init(struct hidpp_device *hidpp, u8 feature_index)
 	version = bcdDevice & 255;
 
 	/* Set supported force feedback capabilities */
-	for (j = 0; hiddpp_ff_effects[j] >= 0; j++)
-		set_bit(hiddpp_ff_effects[j], dev->ffbit);
+	for (j = 0; hidpp_ff_effects[j] >= 0; j++)
+		set_bit(hidpp_ff_effects[j], dev->ffbit);
 	if (version > 1)
-		for (j = 0; hiddpp_ff_effects_v2[j] >= 0; j++)
-			set_bit(hiddpp_ff_effects_v2[j], dev->ffbit);
+		for (j = 0; hidpp_ff_effects_v2[j] >= 0; j++)
+			set_bit(hidpp_ff_effects_v2[j], dev->ffbit);
 
 	/* Read number of slots available in device */
 	error = hidpp_send_fap_command_sync(hidpp, feature_index,
-- 
GitLab


From 95c3d00282ce6a4911e3050a3b48a194ba16e9e5 Mon Sep 17 00:00:00 2001
From: Harry Cutts <hcutts@chromium.org>
Date: Wed, 5 Dec 2018 10:42:26 +1000
Subject: [PATCH 1869/1890] HID: logitech: Add function to enable HID++ 1.0
 "scrolling acceleration"

"Scrolling acceleration" is a bit of a misnomer: it doesn't deal with
acceleration at all. However, that's the name used in Logitech's spec,
so I used it here.

Signed-off-by: Harry Cutts <hcutts@chromium.org>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Peter Hutterer <peter.hutterer@who-t.net>
Verified-by: Harry Cutts <hcutts@chromium.org>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
---
 drivers/hid/hid-logitech-hidpp.c | 47 +++++++++++++++++++++++---------
 1 file changed, 34 insertions(+), 13 deletions(-)

diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c
index 22b37a3844d10..95ba9db6e613e 100644
--- a/drivers/hid/hid-logitech-hidpp.c
+++ b/drivers/hid/hid-logitech-hidpp.c
@@ -400,32 +400,53 @@ static void hidpp_prefix_name(char **name, int name_length)
 #define HIDPP_SET_LONG_REGISTER				0x82
 #define HIDPP_GET_LONG_REGISTER				0x83
 
-#define HIDPP_REG_GENERAL				0x00
-
-static int hidpp10_enable_battery_reporting(struct hidpp_device *hidpp_dev)
+/**
+ * hidpp10_set_register_bit() - Sets a single bit in a HID++ 1.0 register.
+ * @hidpp_dev: the device to set the register on.
+ * @register_address: the address of the register to modify.
+ * @byte: the byte of the register to modify. Should be less than 3.
+ * Return: 0 if successful, otherwise a negative error code.
+ */
+static int hidpp10_set_register_bit(struct hidpp_device *hidpp_dev,
+	u8 register_address, u8 byte, u8 bit)
 {
 	struct hidpp_report response;
 	int ret;
 	u8 params[3] = { 0 };
 
 	ret = hidpp_send_rap_command_sync(hidpp_dev,
-					REPORT_ID_HIDPP_SHORT,
-					HIDPP_GET_REGISTER,
-					HIDPP_REG_GENERAL,
-					NULL, 0, &response);
+					  REPORT_ID_HIDPP_SHORT,
+					  HIDPP_GET_REGISTER,
+					  register_address,
+					  NULL, 0, &response);
 	if (ret)
 		return ret;
 
 	memcpy(params, response.rap.params, 3);
 
-	/* Set the battery bit */
-	params[0] |= BIT(4);
+	params[byte] |= BIT(bit);
 
 	return hidpp_send_rap_command_sync(hidpp_dev,
-					REPORT_ID_HIDPP_SHORT,
-					HIDPP_SET_REGISTER,
-					HIDPP_REG_GENERAL,
-					params, 3, &response);
+					   REPORT_ID_HIDPP_SHORT,
+					   HIDPP_SET_REGISTER,
+					   register_address,
+					   params, 3, &response);
+}
+
+
+#define HIDPP_REG_GENERAL				0x00
+
+static int hidpp10_enable_battery_reporting(struct hidpp_device *hidpp_dev)
+{
+	return hidpp10_set_register_bit(hidpp_dev, HIDPP_REG_GENERAL, 0, 4);
+}
+
+#define HIDPP_REG_FEATURES				0x01
+
+/* On HID++ 1.0 devices, high-res scroll was called "scrolling acceleration". */
+static int hidpp10_enable_scrolling_acceleration(struct hidpp_device *hidpp_dev)
+{
+	return hidpp10_set_register_bit(hidpp_dev, HIDPP_REG_FEATURES, 0, 6);
 }
 
 #define HIDPP_REG_BATTERY_STATUS			0x07
-- 
GitLab


From 4435ff2f09a2fc43d1201d732e6e606b4d4b1ad5 Mon Sep 17 00:00:00 2001
From: Harry Cutts <hcutts@chromium.org>
Date: Wed, 5 Dec 2018 10:42:27 +1000
Subject: [PATCH 1870/1890] HID: logitech: Enable high-resolution scrolling on
 Logitech mice

There are three features used by various Logitech mice for
high-resolution scrolling: the scrolling acceleration bit in HID++ 1.0,
and the x2120 and x2121 features in HID++ 2.0 and above. This patch
supports all three, and uses the multiplier reported by the mouse for
the HID++ 2.0+ features.

The full list of product IDs of mice which support high-resolution
scrolling was provided by Logitech, but the patch was tested using the
following mice (using the Unifying receiver):

* HID++ 1.0: Anywhere MX, Performance MX
* x2120: M560
* x2121: MX Anywhere 2, MX Master 2S

This patch is a combinations of the now-reverted commits 1ff2e1a44e0,
d56ca9855bf9, 5fe2ccbef9d, 044ee89028 together with some extra bits for the
directional and timeout-based reset.
The previous patch series was in hid-input, it appears this remainder
handling is logitech-specific and was moved to hid-logitech-hidpp.c and
renamed accordingly.

Signed-off-by: Harry Cutts <hcutts@chromium.org>
Signed-off-by: Peter Hutterer <peter.hutterer@who-t.net>
Verified-by: Harry Cutts <hcutts@chromium.org>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
---
 drivers/hid/hid-logitech-hidpp.c | 301 ++++++++++++++++++++++++++++++-
 1 file changed, 295 insertions(+), 6 deletions(-)

diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c
index 95ba9db6e613e..a66daf8acd87b 100644
--- a/drivers/hid/hid-logitech-hidpp.c
+++ b/drivers/hid/hid-logitech-hidpp.c
@@ -21,6 +21,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
+#include <linux/sched/clock.h>
 #include <linux/kfifo.h>
 #include <linux/input/mt.h>
 #include <linux/workqueue.h>
@@ -64,6 +65,14 @@ MODULE_PARM_DESC(disable_tap_to_click,
 #define HIDPP_QUIRK_NO_HIDINPUT			BIT(23)
 #define HIDPP_QUIRK_FORCE_OUTPUT_REPORTS	BIT(24)
 #define HIDPP_QUIRK_UNIFYING			BIT(25)
+#define HIDPP_QUIRK_HI_RES_SCROLL_1P0		BIT(26)
+#define HIDPP_QUIRK_HI_RES_SCROLL_X2120		BIT(27)
+#define HIDPP_QUIRK_HI_RES_SCROLL_X2121		BIT(28)
+
+/* Convenience constant to check for any high-res support. */
+#define HIDPP_QUIRK_HI_RES_SCROLL	(HIDPP_QUIRK_HI_RES_SCROLL_1P0 | \
+					 HIDPP_QUIRK_HI_RES_SCROLL_X2120 | \
+					 HIDPP_QUIRK_HI_RES_SCROLL_X2121)
 
 #define HIDPP_QUIRK_DELAYED_INIT		HIDPP_QUIRK_NO_HIDINPUT
 
@@ -128,6 +137,25 @@ struct hidpp_battery {
 	bool online;
 };
 
+/**
+ * struct hidpp_scroll_counter - Utility class for processing high-resolution
+ *                             scroll events.
+ * @dev: the input device for which events should be reported.
+ * @wheel_multiplier: the scalar multiplier to be applied to each wheel event
+ * @remainder: counts the number of high-resolution units moved since the last
+ *             low-resolution event (REL_WHEEL or REL_HWHEEL) was sent. Should
+ *             only be used by class methods.
+ * @direction: direction of last movement (1 or -1)
+ * @last_time: last event time, used to reset remainder after inactivity
+ */
+struct hidpp_scroll_counter {
+	struct input_dev *dev;
+	int wheel_multiplier;
+	int remainder;
+	int direction;
+	unsigned long long last_time;
+};
+
 struct hidpp_device {
 	struct hid_device *hid_dev;
 	struct mutex send_mutex;
@@ -149,6 +177,7 @@ struct hidpp_device {
 	unsigned long capabilities;
 
 	struct hidpp_battery battery;
+	struct hidpp_scroll_counter vertical_wheel_counter;
 };
 
 /* HID++ 1.0 error codes */
@@ -391,6 +420,67 @@ static void hidpp_prefix_name(char **name, int name_length)
 	*name = new_name;
 }
 
+/**
+ * hidpp_scroll_counter_handle_scroll() - Send high- and low-resolution scroll
+ *                                        events given a high-resolution wheel
+ *                                        movement.
+ * @counter: a hid_scroll_counter struct describing the wheel.
+ * @hi_res_value: the movement of the wheel, in the mouse's high-resolution
+ *                units.
+ *
+ * Given a high-resolution movement, this function converts the movement into
+ * fractions of 120 and emits high-resolution scroll events for the input
+ * device. It also uses the multiplier from &struct hid_scroll_counter to
+ * emit low-resolution scroll events when appropriate for
+ * backwards-compatibility with userspace input libraries.
+ */
+static void hidpp_scroll_counter_handle_scroll(struct hidpp_scroll_counter *counter,
+					       int hi_res_value)
+{
+	int low_res_value, remainder, direction;
+	unsigned long long now, previous;
+
+	hi_res_value = hi_res_value * 120/counter->wheel_multiplier;
+	input_report_rel(counter->dev, REL_WHEEL_HI_RES, hi_res_value);
+
+	remainder = counter->remainder;
+	direction = hi_res_value > 0 ? 1 : -1;
+
+	now = sched_clock();
+	previous = counter->last_time;
+	counter->last_time = now;
+	/*
+	 * Reset the remainder after a period of inactivity or when the
+	 * direction changes. This prevents the REL_WHEEL emulation point
+	 * from sliding for devices that don't always provide the same
+	 * number of movements per detent.
+	 */
+	if (now - previous > 1000000000 || direction != counter->direction)
+		remainder = 0;
+
+	counter->direction = direction;
+	remainder += hi_res_value;
+
+	/* Some wheels will rest 7/8ths of a detent from the previous detent
+	 * after slow movement, so we want the threshold for low-res events to
+	 * be in the middle between two detents (e.g. after 4/8ths) as
+	 * opposed to on the detents themselves (8/8ths).
+	 */
+	if (abs(remainder) >= 60) {
+		/* Add (or subtract) 1 because we want to trigger when the wheel
+		 * is half-way to the next detent (i.e. scroll 1 detent after a
+		 * 1/2 detent movement, 2 detents after a 1 1/2 detent movement,
+		 * etc.).
+		 */
+		low_res_value = remainder / 120;
+		if (low_res_value == 0)
+			low_res_value = (hi_res_value > 0 ? 1 : -1);
+		input_report_rel(counter->dev, REL_WHEEL, low_res_value);
+		remainder -= low_res_value * 120;
+	}
+	counter->remainder = remainder;
+}
+
 /* -------------------------------------------------------------------------- */
 /* HIDP++ 1.0 commands                                                        */
 /* -------------------------------------------------------------------------- */
@@ -1157,6 +1247,99 @@ static int hidpp_battery_get_property(struct power_supply *psy,
 	return ret;
 }
 
+/* -------------------------------------------------------------------------- */
+/* 0x2120: Hi-resolution scrolling                                            */
+/* -------------------------------------------------------------------------- */
+
+#define HIDPP_PAGE_HI_RESOLUTION_SCROLLING			0x2120
+
+#define CMD_HI_RESOLUTION_SCROLLING_SET_HIGHRES_SCROLLING_MODE	0x10
+
+static int hidpp_hrs_set_highres_scrolling_mode(struct hidpp_device *hidpp,
+	bool enabled, u8 *multiplier)
+{
+	u8 feature_index;
+	u8 feature_type;
+	int ret;
+	u8 params[1];
+	struct hidpp_report response;
+
+	ret = hidpp_root_get_feature(hidpp,
+				     HIDPP_PAGE_HI_RESOLUTION_SCROLLING,
+				     &feature_index,
+				     &feature_type);
+	if (ret)
+		return ret;
+
+	params[0] = enabled ? BIT(0) : 0;
+	ret = hidpp_send_fap_command_sync(hidpp, feature_index,
+					  CMD_HI_RESOLUTION_SCROLLING_SET_HIGHRES_SCROLLING_MODE,
+					  params, sizeof(params), &response);
+	if (ret)
+		return ret;
+	*multiplier = response.fap.params[1];
+	return 0;
+}
+
+/* -------------------------------------------------------------------------- */
+/* 0x2121: HiRes Wheel                                                        */
+/* -------------------------------------------------------------------------- */
+
+#define HIDPP_PAGE_HIRES_WHEEL		0x2121
+
+#define CMD_HIRES_WHEEL_GET_WHEEL_CAPABILITY	0x00
+#define CMD_HIRES_WHEEL_SET_WHEEL_MODE		0x20
+
+static int hidpp_hrw_get_wheel_capability(struct hidpp_device *hidpp,
+	u8 *multiplier)
+{
+	u8 feature_index;
+	u8 feature_type;
+	int ret;
+	struct hidpp_report response;
+
+	ret = hidpp_root_get_feature(hidpp, HIDPP_PAGE_HIRES_WHEEL,
+				     &feature_index, &feature_type);
+	if (ret)
+		goto return_default;
+
+	ret = hidpp_send_fap_command_sync(hidpp, feature_index,
+					  CMD_HIRES_WHEEL_GET_WHEEL_CAPABILITY,
+					  NULL, 0, &response);
+	if (ret)
+		goto return_default;
+
+	*multiplier = response.fap.params[0];
+	return 0;
+return_default:
+	hid_warn(hidpp->hid_dev,
+		 "Couldn't get wheel multiplier (error %d)\n", ret);
+	return ret;
+}
+
+static int hidpp_hrw_set_wheel_mode(struct hidpp_device *hidpp, bool invert,
+	bool high_resolution, bool use_hidpp)
+{
+	u8 feature_index;
+	u8 feature_type;
+	int ret;
+	u8 params[1];
+	struct hidpp_report response;
+
+	ret = hidpp_root_get_feature(hidpp, HIDPP_PAGE_HIRES_WHEEL,
+				     &feature_index, &feature_type);
+	if (ret)
+		return ret;
+
+	params[0] = (invert          ? BIT(2) : 0) |
+		    (high_resolution ? BIT(1) : 0) |
+		    (use_hidpp       ? BIT(0) : 0);
+
+	return hidpp_send_fap_command_sync(hidpp, feature_index,
+					   CMD_HIRES_WHEEL_SET_WHEEL_MODE,
+					   params, sizeof(params), &response);
+}
+
 /* -------------------------------------------------------------------------- */
 /* 0x4301: Solar Keyboard                                                     */
 /* -------------------------------------------------------------------------- */
@@ -2408,10 +2591,15 @@ static int m560_raw_event(struct hid_device *hdev, u8 *data, int size)
 		input_report_key(mydata->input, BTN_RIGHT,
 			!!(data[1] & M560_MOUSE_BTN_RIGHT));
 
-		if (data[1] & M560_MOUSE_BTN_WHEEL_LEFT)
+		if (data[1] & M560_MOUSE_BTN_WHEEL_LEFT) {
 			input_report_rel(mydata->input, REL_HWHEEL, -1);
-		else if (data[1] & M560_MOUSE_BTN_WHEEL_RIGHT)
+			input_report_rel(mydata->input, REL_HWHEEL_HI_RES,
+					 -120);
+		} else if (data[1] & M560_MOUSE_BTN_WHEEL_RIGHT) {
 			input_report_rel(mydata->input, REL_HWHEEL, 1);
+			input_report_rel(mydata->input, REL_HWHEEL_HI_RES,
+					 120);
+		}
 
 		v = hid_snto32(hid_field_extract(hdev, data+3, 0, 12), 12);
 		input_report_rel(mydata->input, REL_X, v);
@@ -2420,7 +2608,8 @@ static int m560_raw_event(struct hid_device *hdev, u8 *data, int size)
 		input_report_rel(mydata->input, REL_Y, v);
 
 		v = hid_snto32(data[6], 8);
-		input_report_rel(mydata->input, REL_WHEEL, v);
+		hidpp_scroll_counter_handle_scroll(
+				&hidpp->vertical_wheel_counter, v);
 
 		input_sync(mydata->input);
 	}
@@ -2447,6 +2636,8 @@ static void m560_populate_input(struct hidpp_device *hidpp,
 	__set_bit(REL_Y, mydata->input->relbit);
 	__set_bit(REL_WHEEL, mydata->input->relbit);
 	__set_bit(REL_HWHEEL, mydata->input->relbit);
+	__set_bit(REL_WHEEL_HI_RES, mydata->input->relbit);
+	__set_bit(REL_HWHEEL_HI_RES, mydata->input->relbit);
 }
 
 static int m560_input_mapping(struct hid_device *hdev, struct hid_input *hi,
@@ -2548,6 +2739,37 @@ static int g920_get_config(struct hidpp_device *hidpp)
 	return 0;
 }
 
+/* -------------------------------------------------------------------------- */
+/* High-resolution scroll wheels                                              */
+/* -------------------------------------------------------------------------- */
+
+static int hi_res_scroll_enable(struct hidpp_device *hidpp)
+{
+	int ret;
+	u8 multiplier = 1;
+
+	if (hidpp->quirks & HIDPP_QUIRK_HI_RES_SCROLL_X2121) {
+		ret = hidpp_hrw_set_wheel_mode(hidpp, false, true, false);
+		if (ret == 0)
+			ret = hidpp_hrw_get_wheel_capability(hidpp, &multiplier);
+	} else if (hidpp->quirks & HIDPP_QUIRK_HI_RES_SCROLL_X2120) {
+		ret = hidpp_hrs_set_highres_scrolling_mode(hidpp, true,
+							   &multiplier);
+	} else /* if (hidpp->quirks & HIDPP_QUIRK_HI_RES_SCROLL_1P0) */ {
+		ret = hidpp10_enable_scrolling_acceleration(hidpp);
+		multiplier = 8;
+	}
+	if (ret)
+		return ret;
+
+	if (multiplier == 0)
+		multiplier = 1;
+
+	hidpp->vertical_wheel_counter.wheel_multiplier = multiplier;
+	hid_info(hidpp->hid_dev, "multiplier = %d\n", multiplier);
+	return 0;
+}
+
 /* -------------------------------------------------------------------------- */
 /* Generic HID++ devices                                                      */
 /* -------------------------------------------------------------------------- */
@@ -2593,6 +2815,9 @@ static void hidpp_populate_input(struct hidpp_device *hidpp,
 		wtp_populate_input(hidpp, input, origin_is_hid_core);
 	else if (hidpp->quirks & HIDPP_QUIRK_CLASS_M560)
 		m560_populate_input(hidpp, input, origin_is_hid_core);
+
+	if (hidpp->quirks & HIDPP_QUIRK_HI_RES_SCROLL)
+		hidpp->vertical_wheel_counter.dev = input;
 }
 
 static int hidpp_input_configured(struct hid_device *hdev,
@@ -2711,6 +2936,27 @@ static int hidpp_raw_event(struct hid_device *hdev, struct hid_report *report,
 	return 0;
 }
 
+static int hidpp_event(struct hid_device *hdev, struct hid_field *field,
+	struct hid_usage *usage, __s32 value)
+{
+	/* This function will only be called for scroll events, due to the
+	 * restriction imposed in hidpp_usages.
+	 */
+	struct hidpp_device *hidpp = hid_get_drvdata(hdev);
+	struct hidpp_scroll_counter *counter = &hidpp->vertical_wheel_counter;
+	/* A scroll event may occur before the multiplier has been retrieved or
+	 * the input device set, or high-res scroll enabling may fail. In such
+	 * cases we must return early (falling back to default behaviour) to
+	 * avoid a crash in hidpp_scroll_counter_handle_scroll.
+	 */
+	if (!(hidpp->quirks & HIDPP_QUIRK_HI_RES_SCROLL) || value == 0
+	    || counter->dev == NULL || counter->wheel_multiplier == 0)
+		return 0;
+
+	hidpp_scroll_counter_handle_scroll(counter, value);
+	return 1;
+}
+
 static int hidpp_initialize_battery(struct hidpp_device *hidpp)
 {
 	static atomic_t battery_no = ATOMIC_INIT(0);
@@ -2922,6 +3168,9 @@ static void hidpp_connect_event(struct hidpp_device *hidpp)
 	if (hidpp->battery.ps)
 		power_supply_changed(hidpp->battery.ps);
 
+	if (hidpp->quirks & HIDPP_QUIRK_HI_RES_SCROLL)
+		hi_res_scroll_enable(hidpp);
+
 	if (!(hidpp->quirks & HIDPP_QUIRK_NO_HIDINPUT) || hidpp->delayed_input)
 		/* if the input nodes are already created, we can stop now */
 		return;
@@ -3107,6 +3356,10 @@ static void hidpp_remove(struct hid_device *hdev)
 	mutex_destroy(&hidpp->send_mutex);
 }
 
+#define LDJ_DEVICE(product) \
+	HID_DEVICE(BUS_USB, HID_GROUP_LOGITECH_DJ_DEVICE, \
+		   USB_VENDOR_ID_LOGITECH, (product))
+
 static const struct hid_device_id hidpp_devices[] = {
 	{ /* wireless touchpad */
 	  HID_DEVICE(BUS_USB, HID_GROUP_LOGITECH_DJ_DEVICE,
@@ -3121,10 +3374,39 @@ static const struct hid_device_id hidpp_devices[] = {
 	  HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH,
 		USB_DEVICE_ID_LOGITECH_T651),
 	  .driver_data = HIDPP_QUIRK_CLASS_WTP },
+	{ /* Mouse Logitech Anywhere MX */
+	  LDJ_DEVICE(0x1017), .driver_data = HIDPP_QUIRK_HI_RES_SCROLL_1P0 },
+	{ /* Mouse Logitech Cube */
+	  LDJ_DEVICE(0x4010), .driver_data = HIDPP_QUIRK_HI_RES_SCROLL_X2120 },
+	{ /* Mouse Logitech M335 */
+	  LDJ_DEVICE(0x4050), .driver_data = HIDPP_QUIRK_HI_RES_SCROLL_X2121 },
+	{ /* Mouse Logitech M515 */
+	  LDJ_DEVICE(0x4007), .driver_data = HIDPP_QUIRK_HI_RES_SCROLL_X2120 },
 	{ /* Mouse logitech M560 */
-	  HID_DEVICE(BUS_USB, HID_GROUP_LOGITECH_DJ_DEVICE,
-		USB_VENDOR_ID_LOGITECH, 0x402d),
-	  .driver_data = HIDPP_QUIRK_DELAYED_INIT | HIDPP_QUIRK_CLASS_M560 },
+	  LDJ_DEVICE(0x402d),
+	  .driver_data = HIDPP_QUIRK_DELAYED_INIT | HIDPP_QUIRK_CLASS_M560
+		| HIDPP_QUIRK_HI_RES_SCROLL_X2120 },
+	{ /* Mouse Logitech M705 (firmware RQM17) */
+	  LDJ_DEVICE(0x101b), .driver_data = HIDPP_QUIRK_HI_RES_SCROLL_1P0 },
+	{ /* Mouse Logitech M705 (firmware RQM67) */
+	  LDJ_DEVICE(0x406d), .driver_data = HIDPP_QUIRK_HI_RES_SCROLL_X2121 },
+	{ /* Mouse Logitech M720 */
+	  LDJ_DEVICE(0x405e), .driver_data = HIDPP_QUIRK_HI_RES_SCROLL_X2121 },
+	{ /* Mouse Logitech MX Anywhere 2 */
+	  LDJ_DEVICE(0x404a), .driver_data = HIDPP_QUIRK_HI_RES_SCROLL_X2121 },
+	{ LDJ_DEVICE(0xb013), .driver_data = HIDPP_QUIRK_HI_RES_SCROLL_X2121 },
+	{ LDJ_DEVICE(0xb018), .driver_data = HIDPP_QUIRK_HI_RES_SCROLL_X2121 },
+	{ LDJ_DEVICE(0xb01f), .driver_data = HIDPP_QUIRK_HI_RES_SCROLL_X2121 },
+	{ /* Mouse Logitech MX Anywhere 2S */
+	  LDJ_DEVICE(0x406a), .driver_data = HIDPP_QUIRK_HI_RES_SCROLL_X2121 },
+	{ /* Mouse Logitech MX Master */
+	  LDJ_DEVICE(0x4041), .driver_data = HIDPP_QUIRK_HI_RES_SCROLL_X2121 },
+	{ LDJ_DEVICE(0x4060), .driver_data = HIDPP_QUIRK_HI_RES_SCROLL_X2121 },
+	{ LDJ_DEVICE(0x4071), .driver_data = HIDPP_QUIRK_HI_RES_SCROLL_X2121 },
+	{ /* Mouse Logitech MX Master 2S */
+	  LDJ_DEVICE(0x4069), .driver_data = HIDPP_QUIRK_HI_RES_SCROLL_X2121 },
+	{ /* Mouse Logitech Performance MX */
+	  LDJ_DEVICE(0x101a), .driver_data = HIDPP_QUIRK_HI_RES_SCROLL_1P0 },
 	{ /* Keyboard logitech K400 */
 	  HID_DEVICE(BUS_USB, HID_GROUP_LOGITECH_DJ_DEVICE,
 		USB_VENDOR_ID_LOGITECH, 0x4024),
@@ -3144,12 +3426,19 @@ static const struct hid_device_id hidpp_devices[] = {
 
 MODULE_DEVICE_TABLE(hid, hidpp_devices);
 
+static const struct hid_usage_id hidpp_usages[] = {
+	{ HID_GD_WHEEL, EV_REL, REL_WHEEL_HI_RES },
+	{ HID_ANY_ID - 1, HID_ANY_ID - 1, HID_ANY_ID - 1}
+};
+
 static struct hid_driver hidpp_driver = {
 	.name = "logitech-hidpp-device",
 	.id_table = hidpp_devices,
 	.probe = hidpp_probe,
 	.remove = hidpp_remove,
 	.raw_event = hidpp_raw_event,
+	.usage_table = hidpp_usages,
+	.event = hidpp_event,
 	.input_configured = hidpp_input_configured,
 	.input_mapping = hidpp_input_mapping,
 	.input_mapped = hidpp_input_mapped,
-- 
GitLab


From 16767229940fb18086430907d3cd46245c7398ac Mon Sep 17 00:00:00 2001
From: Harry Cutts <hcutts@chromium.org>
Date: Wed, 5 Dec 2018 10:42:28 +1000
Subject: [PATCH 1871/1890] HID: logitech: Use LDJ_DEVICE macro for existing
 Logitech mice

Signed-off-by: Harry Cutts <hcutts@chromium.org>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Peter Hutterer <peter.hutterer@who-t.net>
Verified-by: Harry Cutts <hcutts@chromium.org>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
---
 drivers/hid/hid-logitech-hidpp.c | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c
index a66daf8acd87b..15ed6177a7a36 100644
--- a/drivers/hid/hid-logitech-hidpp.c
+++ b/drivers/hid/hid-logitech-hidpp.c
@@ -3362,13 +3362,11 @@ static void hidpp_remove(struct hid_device *hdev)
 
 static const struct hid_device_id hidpp_devices[] = {
 	{ /* wireless touchpad */
-	  HID_DEVICE(BUS_USB, HID_GROUP_LOGITECH_DJ_DEVICE,
-		USB_VENDOR_ID_LOGITECH, 0x4011),
+	  LDJ_DEVICE(0x4011),
 	  .driver_data = HIDPP_QUIRK_CLASS_WTP | HIDPP_QUIRK_DELAYED_INIT |
 			 HIDPP_QUIRK_WTP_PHYSICAL_BUTTONS },
 	{ /* wireless touchpad T650 */
-	  HID_DEVICE(BUS_USB, HID_GROUP_LOGITECH_DJ_DEVICE,
-		USB_VENDOR_ID_LOGITECH, 0x4101),
+	  LDJ_DEVICE(0x4101),
 	  .driver_data = HIDPP_QUIRK_CLASS_WTP | HIDPP_QUIRK_DELAYED_INIT },
 	{ /* wireless touchpad T651 */
 	  HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH,
@@ -3408,16 +3406,13 @@ static const struct hid_device_id hidpp_devices[] = {
 	{ /* Mouse Logitech Performance MX */
 	  LDJ_DEVICE(0x101a), .driver_data = HIDPP_QUIRK_HI_RES_SCROLL_1P0 },
 	{ /* Keyboard logitech K400 */
-	  HID_DEVICE(BUS_USB, HID_GROUP_LOGITECH_DJ_DEVICE,
-		USB_VENDOR_ID_LOGITECH, 0x4024),
+	  LDJ_DEVICE(0x4024),
 	  .driver_data = HIDPP_QUIRK_CLASS_K400 },
 	{ /* Solar Keyboard Logitech K750 */
-	  HID_DEVICE(BUS_USB, HID_GROUP_LOGITECH_DJ_DEVICE,
-		USB_VENDOR_ID_LOGITECH, 0x4002),
+	  LDJ_DEVICE(0x4002),
 	  .driver_data = HIDPP_QUIRK_CLASS_K750 },
 
-	{ HID_DEVICE(BUS_USB, HID_GROUP_LOGITECH_DJ_DEVICE,
-		USB_VENDOR_ID_LOGITECH, HID_ANY_ID)},
+	{ LDJ_DEVICE(HID_ANY_ID) },
 
 	{ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_G920_WHEEL),
 		.driver_data = HIDPP_QUIRK_CLASS_G920 | HIDPP_QUIRK_FORCE_OUTPUT_REPORTS},
-- 
GitLab


From b4aecf78083d8c6424657c1746c7c3de6e61669f Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 7 Dec 2018 12:47:10 +0000
Subject: [PATCH 1872/1890] arm64: hibernate: Avoid sending cross-calling with
 interrupts disabled

Since commit 3b8c9f1cdfc50 ("arm64: IPI each CPU after invalidating the
I-cache for kernel mappings"), a call to flush_icache_range() will use
an IPI to cross-call other online CPUs so that any stale instructions
are flushed from their pipelines. This triggers a WARN during the
hibernation resume path, where flush_icache_range() is called with
interrupts disabled and is therefore prone to deadlock:

  | Disabling non-boot CPUs ...
  | CPU1: shutdown
  | psci: CPU1 killed.
  | CPU2: shutdown
  | psci: CPU2 killed.
  | CPU3: shutdown
  | psci: CPU3 killed.
  | WARNING: CPU: 0 PID: 1 at ../kernel/smp.c:416 smp_call_function_many+0xd4/0x350
  | Modules linked in:
  | CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.20.0-rc4 #1

Since all secondary CPUs have been taken offline prior to invalidating
the I-cache, there's actually no need for an IPI and we can simply call
__flush_icache_range() instead.

Cc: <stable@vger.kernel.org>
Fixes: 3b8c9f1cdfc50 ("arm64: IPI each CPU after invalidating the I-cache for kernel mappings")
Reported-by: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Tested-by: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Tested-by: James Morse <james.morse@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/hibernate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index 6b2686d54411f..29cdc99688f33 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -214,7 +214,7 @@ static int create_safe_exec_page(void *src_start, size_t length,
 	}
 
 	memcpy((void *)dst, src_start, length);
-	flush_icache_range(dst, dst + length);
+	__flush_icache_range(dst, dst + length);
 
 	pgdp = pgd_offset_raw(allocator(mask), dst_addr);
 	if (pgd_none(READ_ONCE(*pgdp))) {
-- 
GitLab


From ac3e233d29f7f77f28243af0132057d378d3ea58 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Thu, 6 Dec 2018 11:12:31 -0800
Subject: [PATCH 1873/1890] x86/vdso: Drop implicit common-page-size linker
 flag

GNU linker's -z common-page-size's default value is based on the target
architecture. arch/x86/entry/vdso/Makefile sets it to the architecture
default, which is implicit and redundant. Drop it.

Fixes: 2aae950b21e4 ("x86_64: Add vDSO for x86-64 with gettimeofday/clock_gettime/getcpu")
Reported-by: Dmitry Golovin <dima@golovin.in>
Reported-by: Bill Wendling <morbo@google.com>
Suggested-by: Dmitry Golovin <dima@golovin.in>
Suggested-by: Rui Ueyama <ruiu@google.com>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Andy Lutomirski <luto@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Fangrui Song <maskray@google.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20181206191231.192355-1-ndesaulniers@google.com
Link: https://bugs.llvm.org/show_bug.cgi?id=38774
Link: https://github.com/ClangBuiltLinux/linux/issues/31
---
 arch/x86/entry/vdso/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 141d415a8c809..0624bf2266fd7 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -47,7 +47,7 @@ targets += $(vdso_img_sodbg) $(vdso_img-y:%=vdso%.so)
 CPPFLAGS_vdso.lds += -P -C
 
 VDSO_LDFLAGS_vdso.lds = -m elf_x86_64 -soname linux-vdso.so.1 --no-undefined \
-			-z max-page-size=4096 -z common-page-size=4096
+			-z max-page-size=4096
 
 $(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE
 	$(call if_changed,vdso)
@@ -98,7 +98,7 @@ CFLAGS_REMOVE_vvar.o = -pg
 
 CPPFLAGS_vdsox32.lds = $(CPPFLAGS_vdso.lds)
 VDSO_LDFLAGS_vdsox32.lds = -m elf32_x86_64 -soname linux-vdso.so.1 \
-			   -z max-page-size=4096 -z common-page-size=4096
+			   -z max-page-size=4096
 
 # x32-rebranded versions
 vobjx32s-y := $(vobjs-y:.o=-x32.o)
-- 
GitLab


From 1b4e5ad5d6b9f15cd0b5121f86d4719165958417 Mon Sep 17 00:00:00 2001
From: Shmulik Ladkani <shmulik@metanetworks.com>
Date: Fri, 7 Dec 2018 09:50:17 +0200
Subject: [PATCH 1874/1890] ipv6: sr: properly initialize flowi6 prior passing
 to ip6_route_output

In 'seg6_output', stack variable 'struct flowi6 fl6' was missing
initialization.

Fixes: 6c8702c60b88 ("ipv6: sr: add support for SRH encapsulation and injection with lwtunnels")
Signed-off-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/seg6_iptunnel.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index a8854dd3e9c5e..8181ee7e1e270 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -347,6 +347,7 @@ static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 		struct ipv6hdr *hdr = ipv6_hdr(skb);
 		struct flowi6 fl6;
 
+		memset(&fl6, 0, sizeof(fl6));
 		fl6.daddr = hdr->daddr;
 		fl6.saddr = hdr->saddr;
 		fl6.flowlabel = ip6_flowinfo(hdr);
-- 
GitLab


From f9bfe4e6a9d08d405fe7b081ee9a13e649c97ecf Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 6 Dec 2018 09:58:24 -0800
Subject: [PATCH 1875/1890] tcp: lack of available data can also cause TSO
 defer

tcp_tso_should_defer() can return true in three different cases :

 1) We are cwnd-limited
 2) We are rwnd-limited
 3) We are application limited.

Neal pointed out that my recent fix went too far, since
it assumed that if we were not in 1) case, we must be rwnd-limited

Fix this by properly populating the is_cwnd_limited and
is_rwnd_limited booleans.

After this change, we can finally move the silly check for FIN
flag only for the application-limited case.

The same move for EOR bit will be handled in net-next,
since commit 1c09f7d073b1 ("tcp: do not try to defer skbs
with eor mark (MSG_EOR)") is scheduled for linux-4.21

Tested by running 200 concurrent netperf -t TCP_RR -- -r 60000,100
and checking none of them was rwnd_limited in the chrono_stat
output from "ss -ti" command.

Fixes: 41727549de3e ("tcp: Do not underestimate rwnd_limited")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Suggested-by: Neal Cardwell <ncardwell@google.com>
Reviewed-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Reviewed-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 35 ++++++++++++++++++++++++-----------
 1 file changed, 24 insertions(+), 11 deletions(-)

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 5aa6009006956..d1676d8a6ed70 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1904,7 +1904,9 @@ static int tso_fragment(struct sock *sk, enum tcp_queue tcp_queue,
  * This algorithm is from John Heffner.
  */
 static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
-				 bool *is_cwnd_limited, u32 max_segs)
+				 bool *is_cwnd_limited,
+				 bool *is_rwnd_limited,
+				 u32 max_segs)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	u32 age, send_win, cong_win, limit, in_flight;
@@ -1912,9 +1914,6 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
 	struct sk_buff *head;
 	int win_divisor;
 
-	if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
-		goto send_now;
-
 	if (icsk->icsk_ca_state >= TCP_CA_Recovery)
 		goto send_now;
 
@@ -1973,10 +1972,27 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
 	if (age < (tp->srtt_us >> 4))
 		goto send_now;
 
-	/* Ok, it looks like it is advisable to defer. */
+	/* Ok, it looks like it is advisable to defer.
+	 * Three cases are tracked :
+	 * 1) We are cwnd-limited
+	 * 2) We are rwnd-limited
+	 * 3) We are application limited.
+	 */
+	if (cong_win < send_win) {
+		if (cong_win <= skb->len) {
+			*is_cwnd_limited = true;
+			return true;
+		}
+	} else {
+		if (send_win <= skb->len) {
+			*is_rwnd_limited = true;
+			return true;
+		}
+	}
 
-	if (cong_win < send_win && cong_win <= skb->len)
-		*is_cwnd_limited = true;
+	/* If this packet won't get more data, do not wait. */
+	if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
+		goto send_now;
 
 	return true;
 
@@ -2356,11 +2372,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 		} else {
 			if (!push_one &&
 			    tcp_tso_should_defer(sk, skb, &is_cwnd_limited,
-						 max_segs)) {
-				if (!is_cwnd_limited)
-					is_rwnd_limited = true;
+						 &is_rwnd_limited, max_segs))
 				break;
-			}
 		}
 
 		limit = mss_now;
-- 
GitLab


From 66033f47ca60294a95fc85ec3a3cc909dab7b765 Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Thu, 6 Dec 2018 19:30:36 +0100
Subject: [PATCH 1876/1890] ipv6: Check available headroom in ip6_xmit() even
 without options

Even if we send an IPv6 packet without options, MAX_HEADER might not be
enough to account for the additional headroom required by alignment of
hardware headers.

On a configuration without HYPERV_NET, WLAN, AX25, and with IPV6_TUNNEL,
sending short SCTP packets over IPv4 over L2TP over IPv6, we start with
100 bytes of allocated headroom in sctp_packet_transmit(), end up with 54
bytes after l2tp_xmit_skb(), and 14 bytes in ip6_finish_output2().

Those would be enough to append our 14 bytes header, but we're going to
align that to 16 bytes, and write 2 bytes out of the allocated slab in
neigh_hh_output().

KASan says:

[  264.967848] ==================================================================
[  264.967861] BUG: KASAN: slab-out-of-bounds in ip6_finish_output2+0x1aec/0x1c70
[  264.967866] Write of size 16 at addr 000000006af1c7fe by task netperf/6201
[  264.967870]
[  264.967876] CPU: 0 PID: 6201 Comm: netperf Not tainted 4.20.0-rc4+ #1
[  264.967881] Hardware name: IBM 2827 H43 400 (z/VM 6.4.0)
[  264.967887] Call Trace:
[  264.967896] ([<00000000001347d6>] show_stack+0x56/0xa0)
[  264.967903]  [<00000000017e379c>] dump_stack+0x23c/0x290
[  264.967912]  [<00000000007bc594>] print_address_description+0xf4/0x290
[  264.967919]  [<00000000007bc8fc>] kasan_report+0x13c/0x240
[  264.967927]  [<000000000162f5e4>] ip6_finish_output2+0x1aec/0x1c70
[  264.967935]  [<000000000163f890>] ip6_finish_output+0x430/0x7f0
[  264.967943]  [<000000000163fe44>] ip6_output+0x1f4/0x580
[  264.967953]  [<000000000163882a>] ip6_xmit+0xfea/0x1ce8
[  264.967963]  [<00000000017396e2>] inet6_csk_xmit+0x282/0x3f8
[  264.968033]  [<000003ff805fb0ba>] l2tp_xmit_skb+0xe02/0x13e0 [l2tp_core]
[  264.968037]  [<000003ff80631192>] l2tp_eth_dev_xmit+0xda/0x150 [l2tp_eth]
[  264.968041]  [<0000000001220020>] dev_hard_start_xmit+0x268/0x928
[  264.968069]  [<0000000001330e8e>] sch_direct_xmit+0x7ae/0x1350
[  264.968071]  [<000000000122359c>] __dev_queue_xmit+0x2b7c/0x3478
[  264.968075]  [<00000000013d2862>] ip_finish_output2+0xce2/0x11a0
[  264.968078]  [<00000000013d9b14>] ip_finish_output+0x56c/0x8c8
[  264.968081]  [<00000000013ddd1e>] ip_output+0x226/0x4c0
[  264.968083]  [<00000000013dbd6c>] __ip_queue_xmit+0x894/0x1938
[  264.968100]  [<000003ff80bc3a5c>] sctp_packet_transmit+0x29d4/0x3648 [sctp]
[  264.968116]  [<000003ff80b7bf68>] sctp_outq_flush_ctrl.constprop.5+0x8d0/0xe50 [sctp]
[  264.968131]  [<000003ff80b7c716>] sctp_outq_flush+0x22e/0x7d8 [sctp]
[  264.968146]  [<000003ff80b35c68>] sctp_cmd_interpreter.isra.16+0x530/0x6800 [sctp]
[  264.968161]  [<000003ff80b3410a>] sctp_do_sm+0x222/0x648 [sctp]
[  264.968177]  [<000003ff80bbddac>] sctp_primitive_ASSOCIATE+0xbc/0xf8 [sctp]
[  264.968192]  [<000003ff80b93328>] __sctp_connect+0x830/0xc20 [sctp]
[  264.968208]  [<000003ff80bb11ce>] sctp_inet_connect+0x2e6/0x378 [sctp]
[  264.968212]  [<0000000001197942>] __sys_connect+0x21a/0x450
[  264.968215]  [<000000000119aff8>] sys_socketcall+0x3d0/0xb08
[  264.968218]  [<000000000184ea7a>] system_call+0x2a2/0x2c0

[...]

Just like ip_finish_output2() does for IPv4, check that we have enough
headroom in ip6_xmit(), and reallocate it if we don't.

This issue is older than git history.

Reported-by: Jianlin Shi <jishi@redhat.com>
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 42 +++++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 827a3f5ff3bbd..fcd3c66ded162 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -195,37 +195,37 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 	const struct ipv6_pinfo *np = inet6_sk(sk);
 	struct in6_addr *first_hop = &fl6->daddr;
 	struct dst_entry *dst = skb_dst(skb);
+	unsigned int head_room;
 	struct ipv6hdr *hdr;
 	u8  proto = fl6->flowi6_proto;
 	int seg_len = skb->len;
 	int hlimit = -1;
 	u32 mtu;
 
-	if (opt) {
-		unsigned int head_room;
+	head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
+	if (opt)
+		head_room += opt->opt_nflen + opt->opt_flen;
 
-		/* First: exthdrs may take lots of space (~8K for now)
-		   MAX_HEADER is not enough.
-		 */
-		head_room = opt->opt_nflen + opt->opt_flen;
-		seg_len += head_room;
-		head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
-
-		if (skb_headroom(skb) < head_room) {
-			struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
-			if (!skb2) {
-				IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
-					      IPSTATS_MIB_OUTDISCARDS);
-				kfree_skb(skb);
-				return -ENOBUFS;
-			}
-			if (skb->sk)
-				skb_set_owner_w(skb2, skb->sk);
-			consume_skb(skb);
-			skb = skb2;
+	if (unlikely(skb_headroom(skb) < head_room)) {
+		struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
+		if (!skb2) {
+			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+				      IPSTATS_MIB_OUTDISCARDS);
+			kfree_skb(skb);
+			return -ENOBUFS;
 		}
+		if (skb->sk)
+			skb_set_owner_w(skb2, skb->sk);
+		consume_skb(skb);
+		skb = skb2;
+	}
+
+	if (opt) {
+		seg_len += opt->opt_nflen + opt->opt_flen;
+
 		if (opt->opt_flen)
 			ipv6_push_frag_opts(skb, opt, &proto);
+
 		if (opt->opt_nflen)
 			ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
 					     &fl6->saddr);
-- 
GitLab


From e6ac64d4c4d095085d7dd71cbd05704ac99829b2 Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Thu, 6 Dec 2018 19:30:37 +0100
Subject: [PATCH 1877/1890] neighbour: Avoid writing before skb->head in
 neigh_hh_output()

While skb_push() makes the kernel panic if the skb headroom is less than
the unaligned hardware header size, it will proceed normally in case we
copy more than that because of alignment, and we'll silently corrupt
adjacent slabs.

In the case fixed by the previous patch,
"ipv6: Check available headroom in ip6_xmit() even without options", we
end up in neigh_hh_output() with 14 bytes headroom, 14 bytes hardware
header and write 16 bytes, starting 2 bytes before the allocated buffer.

Always check we're not writing before skb->head and, if the headroom is
not enough, warn and drop the packet.

v2:
 - instead of panicking with BUG_ON(), WARN_ON_ONCE() and drop the packet
   (Eric Dumazet)
 - if we avoid the panic, though, we need to explicitly check the headroom
   before the memcpy(), otherwise we'll have corrupted slabs on a running
   kernel, after we warn
 - use __skb_push() instead of skb_push(), as the headroom check is
   already implemented here explicitly (Eric Dumazet)

Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/neighbour.h | 28 +++++++++++++++++++++++-----
 1 file changed, 23 insertions(+), 5 deletions(-)

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index f58b384aa6c9e..665990c7dec8c 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -454,6 +454,7 @@ static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb)
 
 static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb)
 {
+	unsigned int hh_alen = 0;
 	unsigned int seq;
 	unsigned int hh_len;
 
@@ -461,16 +462,33 @@ static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb
 		seq = read_seqbegin(&hh->hh_lock);
 		hh_len = hh->hh_len;
 		if (likely(hh_len <= HH_DATA_MOD)) {
-			/* this is inlined by gcc */
-			memcpy(skb->data - HH_DATA_MOD, hh->hh_data, HH_DATA_MOD);
+			hh_alen = HH_DATA_MOD;
+
+			/* skb_push() would proceed silently if we have room for
+			 * the unaligned size but not for the aligned size:
+			 * check headroom explicitly.
+			 */
+			if (likely(skb_headroom(skb) >= HH_DATA_MOD)) {
+				/* this is inlined by gcc */
+				memcpy(skb->data - HH_DATA_MOD, hh->hh_data,
+				       HH_DATA_MOD);
+			}
 		} else {
-			unsigned int hh_alen = HH_DATA_ALIGN(hh_len);
+			hh_alen = HH_DATA_ALIGN(hh_len);
 
-			memcpy(skb->data - hh_alen, hh->hh_data, hh_alen);
+			if (likely(skb_headroom(skb) >= hh_alen)) {
+				memcpy(skb->data - hh_alen, hh->hh_data,
+				       hh_alen);
+			}
 		}
 	} while (read_seqretry(&hh->hh_lock, seq));
 
-	skb_push(skb, hh_len);
+	if (WARN_ON_ONCE(skb_headroom(skb) < hh_alen)) {
+		kfree_skb(skb);
+		return NET_XMIT_DROP;
+	}
+
+	__skb_push(skb, hh_len);
 	return dev_queue_xmit(skb);
 }
 
-- 
GitLab


From 5b3279e2cba2238b37f6c18adfdea8bddb32715a Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Fri, 7 Dec 2018 15:05:04 +1100
Subject: [PATCH 1878/1890] Revert "net/ibm/emac: wrong bit is used for STA
 control"

This reverts commit 624ca9c33c8a853a4a589836e310d776620f4ab9.

This commit is completely bogus. The STACR register has two formats, old
and new, depending on the version of the IP block used. There's a pair of
device-tree properties that can be used to specify the format used:

	has-inverted-stacr-oc
	has-new-stacr-staopc

What this commit did was to change the bit definition used with the old
parts to match the new parts. This of course breaks the driver on all
the old ones.

Instead, the author should have set the appropriate properties in the
device-tree for the variant used on his board.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/emac/emac.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/emac/emac.h b/drivers/net/ethernet/ibm/emac/emac.h
index e2f80cca9bed4..0d2de6f676764 100644
--- a/drivers/net/ethernet/ibm/emac/emac.h
+++ b/drivers/net/ethernet/ibm/emac/emac.h
@@ -231,7 +231,7 @@ struct emac_regs {
 #define EMAC_STACR_PHYE			0x00004000
 #define EMAC_STACR_STAC_MASK		0x00003000
 #define EMAC_STACR_STAC_READ		0x00001000
-#define EMAC_STACR_STAC_WRITE		0x00000800
+#define EMAC_STACR_STAC_WRITE		0x00002000
 #define EMAC_STACR_OPBC_MASK		0x00000C00
 #define EMAC_STACR_OPBC_50		0x00000000
 #define EMAC_STACR_OPBC_66		0x00000400
-- 
GitLab


From 356ff8a9a78fb35d6482584d260c3754dcbdf669 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Fri, 7 Dec 2018 14:50:16 -0800
Subject: [PATCH 1879/1890] Revert "mm, thp: consolidate THP gfp handling into
 alloc_hugepage_direct_gfpmask"

This reverts commit 89c83fb539f95491be80cdd5158e6f0ce329e317.

This should have been done as part of 2f0799a0ffc0 ("mm, thp: restore
node-local hugepage allocations").  The movement of the thp allocation
policy from alloc_pages_vma() to alloc_hugepage_direct_gfpmask() was
intended to only set __GFP_THISNODE for mempolicies that are not
MPOL_BIND whereas the revert could set this regardless of mempolicy.

While the check for MPOL_BIND between alloc_hugepage_direct_gfpmask()
and alloc_pages_vma() was racy, that has since been removed since the
revert.  What is left is the possibility to use __GFP_THISNODE in
policy_node() when it is unexpected because the special handling for
hugepages in alloc_pages_vma()  was removed as part of the consolidation.

Secondly, prior to 89c83fb539f9, alloc_pages_vma() implemented a somewhat
different policy for hugepage allocations, which were allocated through
alloc_hugepage_vma().  For hugepage allocations, if the allocating
process's node is in the set of allowed nodes, allocate with
__GFP_THISNODE for that node (for MPOL_PREFERRED, use that node with
__GFP_THISNODE instead).  This was changed for shmem_alloc_hugepage() to
allow fallback to other nodes in 89c83fb539f9 as it did for new_page() in
mm/mempolicy.c which is functionally different behavior and removes the
requirement to only allocate hugepages locally.

So this commit does a full revert of 89c83fb539f9 instead of the partial
revert that was done in 2f0799a0ffc0.  The result is the same thp
allocation policy for 4.20 that was in 4.19.

Fixes: 89c83fb539f9 ("mm, thp: consolidate THP gfp handling into alloc_hugepage_direct_gfpmask")
Fixes: 2f0799a0ffc0 ("mm, thp: restore node-local hugepage allocations")
Signed-off-by: David Rientjes <rientjes@google.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 12 ++++++++----
 mm/huge_memory.c    | 27 +++++++++++++--------------
 mm/mempolicy.c      | 32 +++++++++++++++++++++++++++++---
 mm/shmem.c          |  2 +-
 4 files changed, 51 insertions(+), 22 deletions(-)

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 76f8db0b0e715..0705164f928c9 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -510,18 +510,22 @@ alloc_pages(gfp_t gfp_mask, unsigned int order)
 }
 extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
 			struct vm_area_struct *vma, unsigned long addr,
-			int node);
+			int node, bool hugepage);
+#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
+	alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true)
 #else
 #define alloc_pages(gfp_mask, order) \
 		alloc_pages_node(numa_node_id(), gfp_mask, order)
-#define alloc_pages_vma(gfp_mask, order, vma, addr, node)\
+#define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\
+	alloc_pages(gfp_mask, order)
+#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
 	alloc_pages(gfp_mask, order)
 #endif
 #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
 #define alloc_page_vma(gfp_mask, vma, addr)			\
-	alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id())
+	alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false)
 #define alloc_page_vma_node(gfp_mask, vma, addr, node)		\
-	alloc_pages_vma(gfp_mask, 0, vma, addr, node)
+	alloc_pages_vma(gfp_mask, 0, vma, addr, node, false)
 
 extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
 extern unsigned long get_zeroed_page(gfp_t gfp_mask);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index f2d19e4fe854f..5da55b38b1b7f 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -629,30 +629,30 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
  *	    available
  * never: never stall for any thp allocation
  */
-static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma, unsigned long addr)
+static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma)
 {
 	const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE);
-	const gfp_t gfp_mask = GFP_TRANSHUGE_LIGHT | __GFP_THISNODE;
 
 	/* Always do synchronous compaction */
 	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
-		return GFP_TRANSHUGE | __GFP_THISNODE |
-		       (vma_madvised ? 0 : __GFP_NORETRY);
+		return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY);
 
 	/* Kick kcompactd and fail quickly */
 	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags))
-		return gfp_mask | __GFP_KSWAPD_RECLAIM;
+		return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM;
 
 	/* Synchronous compaction if madvised, otherwise kick kcompactd */
 	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags))
-		return gfp_mask | (vma_madvised ? __GFP_DIRECT_RECLAIM :
-						  __GFP_KSWAPD_RECLAIM);
+		return GFP_TRANSHUGE_LIGHT |
+			(vma_madvised ? __GFP_DIRECT_RECLAIM :
+					__GFP_KSWAPD_RECLAIM);
 
 	/* Only do synchronous compaction if madvised */
 	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags))
-		return gfp_mask | (vma_madvised ? __GFP_DIRECT_RECLAIM : 0);
+		return GFP_TRANSHUGE_LIGHT |
+		       (vma_madvised ? __GFP_DIRECT_RECLAIM : 0);
 
-	return gfp_mask;
+	return GFP_TRANSHUGE_LIGHT;
 }
 
 /* Caller must hold page table lock. */
@@ -724,8 +724,8 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
 			pte_free(vma->vm_mm, pgtable);
 		return ret;
 	}
-	gfp = alloc_hugepage_direct_gfpmask(vma, haddr);
-	page = alloc_pages_vma(gfp, HPAGE_PMD_ORDER, vma, haddr, numa_node_id());
+	gfp = alloc_hugepage_direct_gfpmask(vma);
+	page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
 	if (unlikely(!page)) {
 		count_vm_event(THP_FAULT_FALLBACK);
 		return VM_FAULT_FALLBACK;
@@ -1295,9 +1295,8 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
 alloc:
 	if (transparent_hugepage_enabled(vma) &&
 	    !transparent_hugepage_debug_cow()) {
-		huge_gfp = alloc_hugepage_direct_gfpmask(vma, haddr);
-		new_page = alloc_pages_vma(huge_gfp, HPAGE_PMD_ORDER, vma,
-				haddr, numa_node_id());
+		huge_gfp = alloc_hugepage_direct_gfpmask(vma);
+		new_page = alloc_hugepage_vma(huge_gfp, vma, haddr, HPAGE_PMD_ORDER);
 	} else
 		new_page = NULL;
 
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 69e278b469efd..d4496d9d34f53 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1116,8 +1116,8 @@ static struct page *new_page(struct page *page, unsigned long start)
 	} else if (PageTransHuge(page)) {
 		struct page *thp;
 
-		thp = alloc_pages_vma(GFP_TRANSHUGE, HPAGE_PMD_ORDER, vma,
-				address, numa_node_id());
+		thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address,
+					 HPAGE_PMD_ORDER);
 		if (!thp)
 			return NULL;
 		prep_transhuge_page(thp);
@@ -2011,6 +2011,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
  * 	@vma:  Pointer to VMA or NULL if not available.
  *	@addr: Virtual Address of the allocation. Must be inside the VMA.
  *	@node: Which node to prefer for allocation (modulo policy).
+ *	@hugepage: for hugepages try only the preferred node if possible
  *
  * 	This function allocates a page from the kernel page pool and applies
  *	a NUMA policy associated with the VMA or the current process.
@@ -2021,7 +2022,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
  */
 struct page *
 alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
-		unsigned long addr, int node)
+		unsigned long addr, int node, bool hugepage)
 {
 	struct mempolicy *pol;
 	struct page *page;
@@ -2039,6 +2040,31 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
 		goto out;
 	}
 
+	if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) {
+		int hpage_node = node;
+
+		/*
+		 * For hugepage allocation and non-interleave policy which
+		 * allows the current node (or other explicitly preferred
+		 * node) we only try to allocate from the current/preferred
+		 * node and don't fall back to other nodes, as the cost of
+		 * remote accesses would likely offset THP benefits.
+		 *
+		 * If the policy is interleave, or does not allow the current
+		 * node in its nodemask, we allocate the standard way.
+		 */
+		if (pol->mode == MPOL_PREFERRED && !(pol->flags & MPOL_F_LOCAL))
+			hpage_node = pol->v.preferred_node;
+
+		nmask = policy_nodemask(gfp, pol);
+		if (!nmask || node_isset(hpage_node, *nmask)) {
+			mpol_cond_put(pol);
+			page = __alloc_pages_node(hpage_node,
+						gfp | __GFP_THISNODE, order);
+			goto out;
+		}
+	}
+
 	nmask = policy_nodemask(gfp, pol);
 	preferred_nid = policy_node(gfp, pol, node);
 	page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask);
diff --git a/mm/shmem.c b/mm/shmem.c
index cddc72ac44d86..921f80488bb3f 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1439,7 +1439,7 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp,
 
 	shmem_pseudo_vma_init(&pvma, info, hindex);
 	page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN,
-			HPAGE_PMD_ORDER, &pvma, 0, numa_node_id());
+			HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(), true);
 	shmem_pseudo_vma_destroy(&pvma);
 	if (page)
 		prep_transhuge_page(page);
-- 
GitLab


From bd5122cd1e0644d8bd8dd84517c932773e999766 Mon Sep 17 00:00:00 2001
From: Tarick Bedeir <tarick@google.com>
Date: Fri, 7 Dec 2018 00:30:26 -0800
Subject: [PATCH 1880/1890] net/mlx4_core: Correctly set PFC param if global
 pause is turned off.

rx_ppp and tx_ppp can be set between 0 and 255, so don't clamp to 1.

Fixes: 6e8814ceb7e8 ("net/mlx4_en: Fix mixed PFC and Global pause user control requests")
Signed-off-by: Tarick Bedeir <tarick@google.com>
Reviewed-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index f11b45001cad8..d290f0787dfbb 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -1084,8 +1084,8 @@ static int mlx4_en_set_pauseparam(struct net_device *dev,
 
 	tx_pause = !!(pause->tx_pause);
 	rx_pause = !!(pause->rx_pause);
-	rx_ppp = priv->prof->rx_ppp && !(tx_pause || rx_pause);
-	tx_ppp = priv->prof->tx_ppp && !(tx_pause || rx_pause);
+	rx_ppp = (tx_pause || rx_pause) ? 0 : priv->prof->rx_ppp;
+	tx_ppp = (tx_pause || rx_pause) ? 0 : priv->prof->tx_ppp;
 
 	err = mlx4_SET_PORT_general(mdev->dev, priv->port,
 				    priv->rx_skb_size + ETH_FCS_LEN,
-- 
GitLab


From 804fba4e9f508c8004a4bfbdf3f300ca237c56df Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Sun, 9 Dec 2018 07:00:59 -0500
Subject: [PATCH 1881/1890] bnxt_en: Fix CNP CoS queue regression.

Recent changes to support the 57500 devices have created this
regression.  The bnxt_hwrm_queue_qportcfg() call was moved to be
called earlier before the RDMA support was determined, causing
the CoS queues configuration to be set before knowing whether RDMA
was supported or not.  Fix it by moving it to the right place right
after RDMA support is determined.

Fixes: 98f04cf0f1fc ("bnxt_en: Check context memory requirements from firmware.")
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index d4c3001175299..0cf4cb93c1e12 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -6292,6 +6292,8 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp)
 	return rc;
 }
 
+static int bnxt_hwrm_queue_qportcfg(struct bnxt *bp);
+
 static int bnxt_hwrm_func_qcaps(struct bnxt *bp)
 {
 	int rc;
@@ -6299,6 +6301,11 @@ static int bnxt_hwrm_func_qcaps(struct bnxt *bp)
 	rc = __bnxt_hwrm_func_qcaps(bp);
 	if (rc)
 		return rc;
+	rc = bnxt_hwrm_queue_qportcfg(bp);
+	if (rc) {
+		netdev_err(bp->dev, "hwrm query qportcfg failure rc: %d\n", rc);
+		return rc;
+	}
 	if (bp->hwrm_spec_code >= 0x10803) {
 		rc = bnxt_alloc_ctx_mem(bp);
 		if (rc)
-- 
GitLab


From 75720e6323a1d195ae3ebf1a7b5e17c2e687f552 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Sun, 9 Dec 2018 07:01:00 -0500
Subject: [PATCH 1882/1890] bnxt_en: Keep track of reserved IRQs.

The new 57500 chips use 1 NQ per MSIX vector, whereas legacy chips use
1 CP ring per MSIX vector.  To better unify this, add a resv_irqs
field to struct bnxt_hw_resc.  On legacy chips, we initialize resv_irqs
with resv_cp_rings.  On new chips, we initialize it with the allocated
MSIX resources.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c     | 8 ++++++--
 drivers/net/ethernet/broadcom/bnxt/bnxt.h     | 1 +
 drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c | 2 +-
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 0cf4cb93c1e12..c39820b2268fd 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -5162,6 +5162,7 @@ static int bnxt_hwrm_get_rings(struct bnxt *bp)
 		cp = le16_to_cpu(resp->alloc_cmpl_rings);
 		stats = le16_to_cpu(resp->alloc_stat_ctx);
 		cp = min_t(u16, cp, stats);
+		hw_resc->resv_irqs = cp;
 		if (bp->flags & BNXT_FLAG_CHIP_P5) {
 			int rx = hw_resc->resv_rx_rings;
 			int tx = hw_resc->resv_tx_rings;
@@ -5175,7 +5176,7 @@ static int bnxt_hwrm_get_rings(struct bnxt *bp)
 				hw_resc->resv_rx_rings = rx;
 				hw_resc->resv_tx_rings = tx;
 			}
-			cp = le16_to_cpu(resp->alloc_msix);
+			hw_resc->resv_irqs = le16_to_cpu(resp->alloc_msix);
 			hw_resc->resv_hw_ring_grps = rx;
 		}
 		hw_resc->resv_cp_rings = cp;
@@ -7055,7 +7056,9 @@ int bnxt_get_avail_msix(struct bnxt *bp, int num)
 	int total_req = bp->cp_nr_rings + num;
 	int max_idx, avail_msix;
 
-	max_idx = min_t(int, bp->total_irqs, max_cp);
+	max_idx = bp->total_irqs;
+	if (!(bp->flags & BNXT_FLAG_CHIP_P5))
+		max_idx = min_t(int, bp->total_irqs, max_cp);
 	avail_msix = max_idx - bp->cp_nr_rings;
 	if (!BNXT_NEW_RM(bp) || avail_msix >= num)
 		return avail_msix;
@@ -7801,6 +7804,7 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up)
 
 		rc = bnxt_hwrm_func_resc_qcaps(bp, true);
 		hw_resc->resv_cp_rings = 0;
+		hw_resc->resv_irqs = 0;
 		hw_resc->resv_tx_rings = 0;
 		hw_resc->resv_rx_rings = 0;
 		hw_resc->resv_hw_ring_grps = 0;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 9e99d4ab3e062..3030931ccaf8a 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -928,6 +928,7 @@ struct bnxt_hw_resc {
 	u16	min_stat_ctxs;
 	u16	max_stat_ctxs;
 	u16	max_irqs;
+	u16	resv_irqs;
 };
 
 #if defined(CONFIG_BNXT_SRIOV)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
index b59b382d34f94..0a3097baafde6 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
@@ -168,7 +168,7 @@ static int bnxt_req_msix_vecs(struct bnxt_en_dev *edev, int ulp_id,
 	if (BNXT_NEW_RM(bp)) {
 		struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
 
-		avail_msix = hw_resc->resv_cp_rings - bp->cp_nr_rings;
+		avail_msix = hw_resc->resv_irqs - bp->cp_nr_rings;
 		edev->ulp_tbl[ulp_id].msix_requested = avail_msix;
 	}
 	bnxt_fill_msix_vecs(bp, ent);
-- 
GitLab


From c0b8cda05e1d8151f57a79e525c2c7d51cec2f4e Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Sun, 9 Dec 2018 07:01:01 -0500
Subject: [PATCH 1883/1890] bnxt_en: Fix NQ/CP rings accounting on the new
 57500 chips.

The new 57500 chips have introduced the NQ structure in addition to
the existing CP rings in all chips.  We need to introduce a new
bnxt_nq_rings_in_use().  On legacy chips, the 2 functions are the
same and one will just call the other.  On the new chips, they
refer to the 2 separate ring structures.  The new function is now
called to determine the resource (NQ or CP rings) associated with
MSIX that are in use.

On 57500 chips, the RDMA driver does not use the CP rings so
we don't need to do the subtraction adjustment.

Fixes: 41e8d7983752 ("bnxt_en: Modify the ring reservation functions for 57500 series chips.")
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 29 ++++++++++++++++++-----
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index c39820b2268fd..2e90d98640d1f 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -5354,7 +5354,7 @@ static int bnxt_hwrm_reserve_rings(struct bnxt *bp, int tx, int rx, int grp,
 		return bnxt_hwrm_reserve_vf_rings(bp, tx, rx, grp, cp, vnic);
 }
 
-static int bnxt_cp_rings_in_use(struct bnxt *bp)
+static int bnxt_nq_rings_in_use(struct bnxt *bp)
 {
 	int cp = bp->cp_nr_rings;
 	int ulp_msix, ulp_base;
@@ -5369,10 +5369,22 @@ static int bnxt_cp_rings_in_use(struct bnxt *bp)
 	return cp;
 }
 
+static int bnxt_cp_rings_in_use(struct bnxt *bp)
+{
+	int cp;
+
+	if (!(bp->flags & BNXT_FLAG_CHIP_P5))
+		return bnxt_nq_rings_in_use(bp);
+
+	cp = bp->tx_nr_rings + bp->rx_nr_rings;
+	return cp;
+}
+
 static bool bnxt_need_reserve_rings(struct bnxt *bp)
 {
 	struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
 	int cp = bnxt_cp_rings_in_use(bp);
+	int nq = bnxt_nq_rings_in_use(bp);
 	int rx = bp->rx_nr_rings;
 	int vnic = 1, grp = rx;
 
@@ -5388,7 +5400,7 @@ static bool bnxt_need_reserve_rings(struct bnxt *bp)
 		rx <<= 1;
 	if (BNXT_NEW_RM(bp) &&
 	    (hw_resc->resv_rx_rings != rx || hw_resc->resv_cp_rings != cp ||
-	     hw_resc->resv_vnics != vnic ||
+	     hw_resc->resv_irqs < nq || hw_resc->resv_vnics != vnic ||
 	     (hw_resc->resv_hw_ring_grps != grp &&
 	      !(bp->flags & BNXT_FLAG_CHIP_P5))))
 		return true;
@@ -5398,7 +5410,7 @@ static bool bnxt_need_reserve_rings(struct bnxt *bp)
 static int __bnxt_reserve_rings(struct bnxt *bp)
 {
 	struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
-	int cp = bnxt_cp_rings_in_use(bp);
+	int cp = bnxt_nq_rings_in_use(bp);
 	int tx = bp->tx_nr_rings;
 	int rx = bp->rx_nr_rings;
 	int grp, rx_rings, rc;
@@ -5423,7 +5435,7 @@ static int __bnxt_reserve_rings(struct bnxt *bp)
 	tx = hw_resc->resv_tx_rings;
 	if (BNXT_NEW_RM(bp)) {
 		rx = hw_resc->resv_rx_rings;
-		cp = hw_resc->resv_cp_rings;
+		cp = hw_resc->resv_irqs;
 		grp = hw_resc->resv_hw_ring_grps;
 		vnic = hw_resc->resv_vnics;
 	}
@@ -7034,7 +7046,12 @@ unsigned int bnxt_get_max_func_cp_rings(struct bnxt *bp)
 
 unsigned int bnxt_get_max_func_cp_rings_for_en(struct bnxt *bp)
 {
-	return bp->hw_resc.max_cp_rings - bnxt_get_ulp_msix_num(bp);
+	unsigned int cp = bp->hw_resc.max_cp_rings;
+
+	if (!(bp->flags & BNXT_FLAG_CHIP_P5))
+		cp -= bnxt_get_ulp_msix_num(bp);
+
+	return cp;
 }
 
 static unsigned int bnxt_get_max_func_irqs(struct bnxt *bp)
@@ -7076,7 +7093,7 @@ static int bnxt_get_num_msix(struct bnxt *bp)
 	if (!BNXT_NEW_RM(bp))
 		return bnxt_get_max_func_irqs(bp);
 
-	return bnxt_cp_rings_in_use(bp);
+	return bnxt_nq_rings_in_use(bp);
 }
 
 static int bnxt_init_msix(struct bnxt *bp)
-- 
GitLab


From e30fbc33190b8ba1d6e8ff4864627f7414b5ca99 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Sun, 9 Dec 2018 07:01:02 -0500
Subject: [PATCH 1884/1890] bnxt_en: Fix _bnxt_get_max_rings() for 57500 chips.

The CP rings are accounted differently on the new 57500 chips.  There
must be enough CP rings for the sum of RX and TX rings on the new
chips.  The current logic may be over-estimating the RX and TX rings.

The output parameter max_cp should be the maximum NQs capped by
MSIX vectors available for networking in the context of 57500 chips.
The existing code which uses CMPL rings capped by the MSIX vectors
works most of the time but is not always correct.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 2e90d98640d1f..5d21c14853acc 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -9827,13 +9827,16 @@ static void _bnxt_get_max_rings(struct bnxt *bp, int *max_rx, int *max_tx,
 				int *max_cp)
 {
 	struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
-	int max_ring_grps = 0;
+	int max_ring_grps = 0, max_irq;
 
 	*max_tx = hw_resc->max_tx_rings;
 	*max_rx = hw_resc->max_rx_rings;
-	*max_cp = min_t(int, bnxt_get_max_func_cp_rings_for_en(bp),
-			hw_resc->max_irqs - bnxt_get_ulp_msix_num(bp));
-	*max_cp = min_t(int, *max_cp, hw_resc->max_stat_ctxs);
+	*max_cp = bnxt_get_max_func_cp_rings_for_en(bp);
+	max_irq = min_t(int, bnxt_get_max_func_irqs(bp) -
+			bnxt_get_ulp_msix_num(bp),
+			bnxt_get_max_func_stat_ctxs(bp));
+	if (!(bp->flags & BNXT_FLAG_CHIP_P5))
+		*max_cp = min_t(int, *max_cp, max_irq);
 	max_ring_grps = hw_resc->max_hw_ring_grps;
 	if (BNXT_CHIP_TYPE_NITRO_A0(bp) && BNXT_PF(bp)) {
 		*max_cp -= 1;
@@ -9841,6 +9844,11 @@ static void _bnxt_get_max_rings(struct bnxt *bp, int *max_rx, int *max_tx,
 	}
 	if (bp->flags & BNXT_FLAG_AGG_RINGS)
 		*max_rx >>= 1;
+	if (bp->flags & BNXT_FLAG_CHIP_P5) {
+		bnxt_trim_rings(bp, max_rx, max_tx, *max_cp, false);
+		/* On P5 chips, max_cp output param should be available NQs */
+		*max_cp = max_irq;
+	}
 	*max_rx = min_t(int, *max_rx, max_ring_grps);
 }
 
-- 
GitLab


From 35cc3cefc4de90001c9137e2d01dd9d06b11acfb Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Sun, 9 Dec 2018 18:10:24 +0200
Subject: [PATCH 1885/1890] net/sched: cls_flower: Reject duplicated rules also
 under skip_sw

Currently, duplicated rules are rejected only for skip_hw or "none",
hence allowing users to push duplicates into HW for no reason.

Use the flower tables to protect for that.

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Paul Blakey <paulb@mellanox.com>
Reported-by: Chris Mi <chrism@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_flower.c | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index c6c327874abcc..71312d7bd8f49 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -1238,18 +1238,16 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 	if (err)
 		goto errout_idr;
 
-	if (!tc_skip_sw(fnew->flags)) {
-		if (!fold && fl_lookup(fnew->mask, &fnew->mkey)) {
-			err = -EEXIST;
-			goto errout_mask;
-		}
-
-		err = rhashtable_insert_fast(&fnew->mask->ht, &fnew->ht_node,
-					     fnew->mask->filter_ht_params);
-		if (err)
-			goto errout_mask;
+	if (!fold && fl_lookup(fnew->mask, &fnew->mkey)) {
+		err = -EEXIST;
+		goto errout_mask;
 	}
 
+	err = rhashtable_insert_fast(&fnew->mask->ht, &fnew->ht_node,
+				     fnew->mask->filter_ht_params);
+	if (err)
+		goto errout_mask;
+
 	if (!tc_skip_hw(fnew->flags)) {
 		err = fl_hw_replace_filter(tp, fnew, extack);
 		if (err)
@@ -1303,9 +1301,8 @@ static int fl_delete(struct tcf_proto *tp, void *arg, bool *last,
 	struct cls_fl_head *head = rtnl_dereference(tp->root);
 	struct cls_fl_filter *f = arg;
 
-	if (!tc_skip_sw(f->flags))
-		rhashtable_remove_fast(&f->mask->ht, &f->ht_node,
-				       f->mask->filter_ht_params);
+	rhashtable_remove_fast(&f->mask->ht, &f->ht_node,
+			       f->mask->filter_ht_params);
 	__fl_delete(tp, f, extack);
 	*last = list_empty(&head->masks);
 	return 0;
-- 
GitLab


From 40e020c129cfc991e8ab4736d2665351ffd1468d Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 9 Dec 2018 15:31:00 -0800
Subject: [PATCH 1886/1890] Linux 4.20-rc6

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index e9fd22c8445ec..f2c3423c3062f 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 4
 PATCHLEVEL = 20
 SUBLEVEL = 0
-EXTRAVERSION = -rc5
+EXTRAVERSION = -rc6
 NAME = Shy Crocodile
 
 # *DOCUMENTATION*
-- 
GitLab


From d347d0c82a99b41efc2770fc01a04066903300df Mon Sep 17 00:00:00 2001
From: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Date: Mon, 10 Dec 2018 13:14:37 +0100
Subject: [PATCH 1887/1890] backlight: pwm_bl: Fix brightness levels for non-DT
 case.

Commit '88ba95bedb79 ("backlight: pwm_bl: Compute brightness of LED
linearly to human eye")' allows the possibility to compute a default
brightness table when there isn't the brightness-levels property in the
DT. Unfortunately the changes made broke the pwm backlight for the
non-DT boards.

Usually, the non-DT boards don't pass the brightness levels via platform
data, instead, it sets the max_brightness in their platform data and the
driver calculates the level without a table. The offending patch assumed
that when there is no brightness levels table we should create one, but this
is clearly wrong for the non-DT case.

After this patch the code handles the DT and the non-DT case taking in
consideration also if max_brightness is set or not.

Fixes: 88ba95bedb79 ("backlight: pwm_bl: Compute brightness of LED linearly to human eye")
Reported-by: Robert Jarzmik <robert.jarzmik@free.fr>
Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Tested-by: Robert Jarzmik <robert.jarzmik@free.fr>
Acked-by: Daniel Thompson <daniel.thompson@linaro.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/video/backlight/pwm_bl.c | 41 +++++++++++++++++++++++++++-----
 1 file changed, 35 insertions(+), 6 deletions(-)

diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c
index 678b270631983..f9ef0673a083c 100644
--- a/drivers/video/backlight/pwm_bl.c
+++ b/drivers/video/backlight/pwm_bl.c
@@ -562,7 +562,30 @@ static int pwm_backlight_probe(struct platform_device *pdev)
 		goto err_alloc;
 	}
 
-	if (!data->levels) {
+	if (data->levels) {
+		/*
+		 * For the DT case, only when brightness levels is defined
+		 * data->levels is filled. For the non-DT case, data->levels
+		 * can come from platform data, however is not usual.
+		 */
+		for (i = 0; i <= data->max_brightness; i++) {
+			if (data->levels[i] > pb->scale)
+				pb->scale = data->levels[i];
+
+			pb->levels = data->levels;
+		}
+	} else if (!data->max_brightness) {
+		/*
+		 * If no brightness levels are provided and max_brightness is
+		 * not set, use the default brightness table. For the DT case,
+		 * max_brightness is set to 0 when brightness levels is not
+		 * specified. For the non-DT case, max_brightness is usually
+		 * set to some value.
+		 */
+
+		/* Get the PWM period (in nanoseconds) */
+		pwm_get_state(pb->pwm, &state);
+
 		ret = pwm_backlight_brightness_default(&pdev->dev, data,
 						       state.period);
 		if (ret < 0) {
@@ -570,13 +593,19 @@ static int pwm_backlight_probe(struct platform_device *pdev)
 				"failed to setup default brightness table\n");
 			goto err_alloc;
 		}
-	}
 
-	for (i = 0; i <= data->max_brightness; i++) {
-		if (data->levels[i] > pb->scale)
-			pb->scale = data->levels[i];
+		for (i = 0; i <= data->max_brightness; i++) {
+			if (data->levels[i] > pb->scale)
+				pb->scale = data->levels[i];
 
-		pb->levels = data->levels;
+			pb->levels = data->levels;
+		}
+	} else {
+		/*
+		 * That only happens for the non-DT case, where platform data
+		 * sets the max_brightness value.
+		 */
+		pb->scale = data->max_brightness;
 	}
 
 	pb->lth_brightness = data->lth_brightness * (state.period / pb->scale);
-- 
GitLab


From 0e1f37d120ec316e78b62ee570eec98e808ff0a4 Mon Sep 17 00:00:00 2001
From: Aleix Roca Nonell <kernelrocks@gmail.com>
Date: Sun, 16 Dec 2018 16:53:19 +0100
Subject: [PATCH 1888/1890] HID: asus: Add support for the ASUS T101HA keyboard
 dock

The ASUS T101HA keyboard dock generates HID events using the ASUS vendor
specific UsagePage 0xff31. In consequence, some multimedia keys such as
brightness up and down are not working with hid-generic.

This commit adds the T101HA dock into the supported device list of the
hid-asus driver. It also prevents the dock's integrated touchpad to be
bound with hid-asus given that it is already working fine with
hid-multitouch.

Signed-off-by: Aleix Roca Nonell <kernelrocks@gmail.com>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
---
 drivers/hid/hid-asus.c | 8 ++++++++
 drivers/hid/hid-ids.h  | 1 +
 2 files changed, 9 insertions(+)

diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c
index ab8bd40a77ed5..951bb17ae8b2c 100644
--- a/drivers/hid/hid-asus.c
+++ b/drivers/hid/hid-asus.c
@@ -70,6 +70,7 @@ MODULE_DESCRIPTION("Asus HID Keyboard and TouchPad");
 #define QUIRK_T100_KEYBOARD		BIT(6)
 #define QUIRK_T100CHI			BIT(7)
 #define QUIRK_G752_KEYBOARD		BIT(8)
+#define QUIRK_T101HA_DOCK		BIT(9)
 
 #define I2C_KEYBOARD_QUIRKS			(QUIRK_FIX_NOTEBOOK_REPORT | \
 						 QUIRK_NO_INIT_REPORTS | \
@@ -699,6 +700,11 @@ static int asus_probe(struct hid_device *hdev, const struct hid_device_id *id)
 		return ret;
 	}
 
+	/* use hid-multitouch for T101HA touchpad */
+	if (id->driver_data & QUIRK_T101HA_DOCK &&
+	    hdev->collection->usage == HID_GD_MOUSE)
+		return -ENODEV;
+
 	ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
 	if (ret) {
 		hid_err(hdev, "Asus hw start failed: %d\n", ret);
@@ -830,6 +836,8 @@ static const struct hid_device_id asus_devices[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK,
 		USB_DEVICE_ID_ASUSTEK_T100TAF_KEYBOARD),
 	  QUIRK_T100_KEYBOARD | QUIRK_NO_CONSUMER_USAGES },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK,
+		USB_DEVICE_ID_ASUSTEK_T101HA_KEYBOARD), QUIRK_T101HA_DOCK },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_ASUS_AK1D) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_TURBOX, USB_DEVICE_ID_ASUS_MD_5110) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_JESS, USB_DEVICE_ID_ASUS_MD_5112) },
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 4206428c0ba28..f1eee2778b703 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -184,6 +184,7 @@
 #define USB_DEVICE_ID_ASUSTEK_T100TA_KEYBOARD	0x17e0
 #define USB_DEVICE_ID_ASUSTEK_T100TAF_KEYBOARD	0x1807
 #define USB_DEVICE_ID_ASUSTEK_T100CHI_KEYBOARD	0x8502
+#define USB_DEVICE_ID_ASUSTEK_T101HA_KEYBOARD	0x183d
 #define USB_DEVICE_ID_ASUSTEK_T304_KEYBOARD	0x184a
 #define USB_DEVICE_ID_ASUSTEK_I2C_KEYBOARD	0x8585
 #define USB_DEVICE_ID_ASUSTEK_I2C_TOUCHPAD	0x0101
-- 
GitLab


From 8c32d47b151b675291ca4278bfce0ec3eb46fad8 Mon Sep 17 00:00:00 2001
From: Yangtao Li <tiny.windzz@gmail.com>
Date: Fri, 30 Nov 2018 21:49:51 -0500
Subject: [PATCH 1889/1890] HID: debug: Change to use DEFINE_SHOW_ATTRIBUTE
 macro

Use DEFINE_SHOW_ATTRIBUTE macro to simplify the code.

Signed-off-by: Yangtao Li <tiny.windzz@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-debug.c | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c
index b48100236df89..c530476edba62 100644
--- a/drivers/hid/hid-debug.c
+++ b/drivers/hid/hid-debug.c
@@ -1072,11 +1072,6 @@ static int hid_debug_rdesc_show(struct seq_file *f, void *p)
 	return 0;
 }
 
-static int hid_debug_rdesc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, hid_debug_rdesc_show, inode->i_private);
-}
-
 static int hid_debug_events_open(struct inode *inode, struct file *file)
 {
 	int err = 0;
@@ -1211,12 +1206,7 @@ static int hid_debug_events_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static const struct file_operations hid_debug_rdesc_fops = {
-	.open           = hid_debug_rdesc_open,
-	.read           = seq_read,
-	.llseek         = seq_lseek,
-	.release        = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(hid_debug_rdesc);
 
 static const struct file_operations hid_debug_events_fops = {
 	.owner =        THIS_MODULE,
-- 
GitLab


From 74fbc7d371d9a438b8e39d926d9d5012d2f762c4 Mon Sep 17 00:00:00 2001
From: Song Hongyan <hongyan.song@intel.com>
Date: Tue, 11 Dec 2018 09:58:23 +0800
Subject: [PATCH 1890/1890] HID: intel-ish-hid: add MSI interrupt support

Now ish hid ipc only support sideband interrupt but on some platforms
they use MSI interrupt.
In order to make the interrupt type coverage all the scenario add
single MSI interrupt support, it can match all interrupt types.

Signed-off-by: Song Hongyan <hongyan.song@intel.com>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/intel-ish-hid/ipc/pci-ish.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/hid/intel-ish-hid/ipc/pci-ish.c b/drivers/hid/intel-ish-hid/ipc/pci-ish.c
index 8793cc49f8554..a6e1ee744f4d4 100644
--- a/drivers/hid/intel-ish-hid/ipc/pci-ish.c
+++ b/drivers/hid/intel-ish-hid/ipc/pci-ish.c
@@ -117,6 +117,7 @@ static int ish_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	int ret;
 	struct ish_hw *hw;
+	unsigned long irq_flag = 0;
 	struct ishtp_device *ishtp;
 	struct device *dev = &pdev->dev;
 
@@ -156,8 +157,12 @@ static int ish_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	pdev->dev_flags |= PCI_DEV_FLAGS_NO_D3;
 
 	/* request and enable interrupt */
+	ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES);
+	if (!pdev->msi_enabled && !pdev->msix_enabled)
+		irq_flag = IRQF_SHARED;
+
 	ret = devm_request_irq(dev, pdev->irq, ish_irq_handler,
-			       IRQF_SHARED, KBUILD_MODNAME, ishtp);
+			       irq_flag, KBUILD_MODNAME, ishtp);
 	if (ret) {
 		dev_err(dev, "ISH: request IRQ %d failed\n", pdev->irq);
 		return ret;
-- 
GitLab